diff options
Diffstat (limited to 'net/sched/sch_fq.c')
-rw-r--r-- | net/sched/sch_fq.c | 137 |
1 files changed, 83 insertions, 54 deletions
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 32ad015ee8ce..95d843961907 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -88,7 +88,7 @@ struct fq_sched_data { struct fq_flow internal; /* for non classified or high prio packets */ u32 quantum; u32 initial_quantum; - u32 flow_default_rate;/* rate per flow : bytes per second */ + u32 flow_refill_delay; u32 flow_max_rate; /* optional max rate per flow */ u32 flow_plimit; /* max packets per flow */ struct rb_root *fq_root; @@ -115,6 +115,7 @@ static struct fq_flow detached, throttled; static void fq_flow_set_detached(struct fq_flow *f) { f->next = &detached; + f->age = jiffies; } static bool fq_flow_is_detached(const struct fq_flow *f) @@ -209,21 +210,15 @@ static void fq_gc(struct fq_sched_data *q, } } -static const u8 prio2band[TC_PRIO_MAX + 1] = { - 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 -}; - static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) { struct rb_node **p, *parent; struct sock *sk = skb->sk; struct rb_root *root; struct fq_flow *f; - int band; /* warning: no starvation prevention... */ - band = prio2band[skb->priority & TC_PRIO_MAX]; - if (unlikely(band == 0)) + if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL)) return &q->internal; if (unlikely(!sk)) { @@ -255,6 +250,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) f->socket_hash != sk->sk_hash)) { f->credit = q->initial_quantum; f->socket_hash = sk->sk_hash; + f->time_next_packet = 0ULL; } return f; } @@ -285,7 +281,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) /* remove one skb from head of flow queue */ -static struct sk_buff *fq_dequeue_head(struct fq_flow *flow) +static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow) { struct sk_buff *skb = flow->head; @@ -293,6 +289,8 @@ static struct sk_buff *fq_dequeue_head(struct fq_flow *flow) flow->head = skb->next; skb->next = NULL; flow->qlen--; + sch->qstats.backlog -= qdisc_pkt_len(skb); + sch->q.qlen--; } return skb; } @@ -370,17 +368,20 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch) } f->qlen++; - flow_queue_add(f, skb); if (skb_is_retransmit(skb)) q->stat_tcp_retrans++; sch->qstats.backlog += qdisc_pkt_len(skb); if (fq_flow_is_detached(f)) { fq_flow_add_tail(&q->new_flows, f); - if (q->quantum > f->credit) - f->credit = q->quantum; + if (time_after(jiffies, f->age + q->flow_refill_delay)) + f->credit = max_t(u32, f->credit, q->quantum); q->inactive_flows--; qdisc_unthrottled(sch); } + + /* Note: this overwrites f->age */ + flow_queue_add(f, skb); + if (unlikely(f == &q->internal)) { q->stat_internal_packets++; qdisc_unthrottled(sch); @@ -418,8 +419,9 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) struct fq_flow_head *head; struct sk_buff *skb; struct fq_flow *f; + u32 rate; - skb = fq_dequeue_head(&q->internal); + skb = fq_dequeue_head(sch, &q->internal); if (skb) goto out; fq_check_throttled(q, now); @@ -449,7 +451,7 @@ begin: goto begin; } - skb = fq_dequeue_head(f); + skb = fq_dequeue_head(sch, f); if (!skb) { head->first = f->next; /* force a pass through old_flows to prevent starvation */ @@ -457,7 +459,6 @@ begin: fq_flow_add_tail(&q->old_flows, f); } else { fq_flow_set_detached(f); - f->age = jiffies; q->inactive_flows++; } goto begin; @@ -466,43 +467,70 @@ begin: f->time_next_packet = now; f->credit -= qdisc_pkt_len(skb); - if (f->credit <= 0 && - q->rate_enable && - skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) { - u32 rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate; + if (f->credit > 0 || !q->rate_enable) + goto out; - rate = min(rate, q->flow_max_rate); - if (rate) { - u64 len = (u64)qdisc_pkt_len(skb) * NSEC_PER_SEC; + rate = q->flow_max_rate; + if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) + rate = min(skb->sk->sk_pacing_rate, rate); - do_div(len, rate); - /* Since socket rate can change later, - * clamp the delay to 125 ms. - * TODO: maybe segment the too big skb, as in commit - * e43ac79a4bc ("sch_tbf: segment too big GSO packets") - */ - if (unlikely(len > 125 * NSEC_PER_MSEC)) { - len = 125 * NSEC_PER_MSEC; - q->stat_pkts_too_long++; - } + if (rate != ~0U) { + u32 plen = max(qdisc_pkt_len(skb), q->quantum); + u64 len = (u64)plen * NSEC_PER_SEC; - f->time_next_packet = now + len; + if (likely(rate)) + do_div(len, rate); + /* Since socket rate can change later, + * clamp the delay to 125 ms. + * TODO: maybe segment the too big skb, as in commit + * e43ac79a4bc ("sch_tbf: segment too big GSO packets") + */ + if (unlikely(len > 125 * NSEC_PER_MSEC)) { + len = 125 * NSEC_PER_MSEC; + q->stat_pkts_too_long++; } + + f->time_next_packet = now + len; } out: - sch->qstats.backlog -= qdisc_pkt_len(skb); qdisc_bstats_update(sch, skb); - sch->q.qlen--; qdisc_unthrottled(sch); return skb; } static void fq_reset(struct Qdisc *sch) { + struct fq_sched_data *q = qdisc_priv(sch); + struct rb_root *root; struct sk_buff *skb; + struct rb_node *p; + struct fq_flow *f; + unsigned int idx; - while ((skb = fq_dequeue(sch)) != NULL) + while ((skb = fq_dequeue_head(sch, &q->internal)) != NULL) kfree_skb(skb); + + if (!q->fq_root) + return; + + for (idx = 0; idx < (1U << q->fq_trees_log); idx++) { + root = &q->fq_root[idx]; + while ((p = rb_first(root)) != NULL) { + f = container_of(p, struct fq_flow, fq_node); + rb_erase(p, root); + + while ((skb = fq_dequeue_head(sch, f)) != NULL) + kfree_skb(skb); + + kmem_cache_free(fq_flow_cachep, f); + } + } + q->new_flows.first = NULL; + q->old_flows.first = NULL; + q->delayed = RB_ROOT; + q->flows = 0; + q->inactive_flows = 0; + q->throttled_flows = 0; } static void fq_rehash(struct fq_sched_data *q, @@ -584,6 +612,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_FLOW_DEFAULT_RATE] = { .type = NLA_U32 }, [TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 }, [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 }, + [TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 }, }; static int fq_change(struct Qdisc *sch, struct nlattr *opt) @@ -622,10 +651,11 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) q->quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]); if (tb[TCA_FQ_INITIAL_QUANTUM]) - q->quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]); + q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]); if (tb[TCA_FQ_FLOW_DEFAULT_RATE]) - q->flow_default_rate = nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]); + pr_warn_ratelimited("sch_fq: defrate %u ignored.\n", + nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE])); if (tb[TCA_FQ_FLOW_MAX_RATE]) q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]); @@ -639,12 +669,20 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) err = -EINVAL; } + if (tb[TCA_FQ_FLOW_REFILL_DELAY]) { + u32 usecs_delay = nla_get_u32(tb[TCA_FQ_FLOW_REFILL_DELAY]) ; + + q->flow_refill_delay = usecs_to_jiffies(usecs_delay); + } + if (!err) err = fq_resize(q, fq_log); while (sch->q.qlen > sch->limit) { struct sk_buff *skb = fq_dequeue(sch); + if (!skb) + break; kfree_skb(skb); drop_count++; } @@ -657,21 +695,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) static void fq_destroy(struct Qdisc *sch) { struct fq_sched_data *q = qdisc_priv(sch); - struct rb_root *root; - struct rb_node *p; - unsigned int idx; - if (q->fq_root) { - for (idx = 0; idx < (1U << q->fq_trees_log); idx++) { - root = &q->fq_root[idx]; - while ((p = rb_first(root)) != NULL) { - rb_erase(p, root); - kmem_cache_free(fq_flow_cachep, - container_of(p, struct fq_flow, fq_node)); - } - } - kfree(q->fq_root); - } + fq_reset(sch); + kfree(q->fq_root); qdisc_watchdog_cancel(&q->watchdog); } @@ -684,7 +710,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt) q->flow_plimit = 100; q->quantum = 2 * psched_mtu(qdisc_dev(sch)); q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch)); - q->flow_default_rate = 0; + q->flow_refill_delay = msecs_to_jiffies(40); q->flow_max_rate = ~0U; q->rate_enable = 1; q->new_flows.first = NULL; @@ -711,13 +737,16 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) if (opts == NULL) goto nla_put_failure; + /* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */ + if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) || nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) || nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) || nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) || nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) || - nla_put_u32(skb, TCA_FQ_FLOW_DEFAULT_RATE, q->flow_default_rate) || nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) || + nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY, + jiffies_to_usecs(q->flow_refill_delay)) || nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log)) goto nla_put_failure; |