From f3483c8e1da62993fe0f57af23b925de7661adaa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 21 Mar 2025 17:13:09 +0000 Subject: [PATCH] net: rfs: hash function change RFS is using two kinds of hash tables. First one is controlled by /proc/sys/net/core/rps_sock_flow_entries = 2^N and using the N low order bits of the l4 hash is good enough. Then each RX queue has its own hash table, controlled by /sys/class/net/eth1/queues/rx-$q/rps_flow_cnt = 2^X Current hash function, using the X low order bits is suboptimal, because RSS is usually using Func(hash) = (hash % power_of_two); For example, with 32 RX queues, 6 low order bits have no entropy for a given queue. Switch this hash function to hash_32(hash, log) to increase chances to use all possible slots and reduce collisions. Signed-off-by: Eric Dumazet Cc: Tom Herbert Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250321171309.634100-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/rps.h | 2 +- net/core/dev.c | 13 +++++++++---- net/core/net-sysfs.c | 4 ++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/include/net/rps.h b/include/net/rps.h index a93401d23d66..e358e9711f27 100644 --- a/include/net/rps.h +++ b/include/net/rps.h @@ -39,7 +39,7 @@ struct rps_dev_flow { * The rps_dev_flow_table structure contains a table of flow mappings. */ struct rps_dev_flow_table { - unsigned int mask; + u8 log; struct rcu_head rcu; struct rps_dev_flow flows[]; }; diff --git a/net/core/dev.c b/net/core/dev.c index bcf81c3ff6a3..b6b1c7898281 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4751,6 +4751,11 @@ EXPORT_SYMBOL(rps_needed); struct static_key_false rfs_needed __read_mostly; EXPORT_SYMBOL(rfs_needed); +static u32 rfs_slot(u32 hash, const struct rps_dev_flow_table *flow_table) +{ + return hash_32(hash, flow_table->log); +} + static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow *rflow, u16 next_cpu) @@ -4777,7 +4782,7 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, flow_table = rcu_dereference(rxqueue->rps_flow_table); if (!flow_table) goto out; - flow_id = skb_get_hash(skb) & flow_table->mask; + flow_id = rfs_slot(skb_get_hash(skb), flow_table); rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, rxq_index, flow_id); if (rc < 0) @@ -4856,7 +4861,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, /* OK, now we know there is a match, * we can look at the local (per receive queue) flow table */ - rflow = &flow_table->flows[hash & flow_table->mask]; + rflow = &flow_table->flows[rfs_slot(hash, flow_table)]; tcpu = rflow->cpu; /* @@ -4923,13 +4928,13 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, rcu_read_lock(); flow_table = rcu_dereference(rxqueue->rps_flow_table); - if (flow_table && flow_id <= flow_table->mask) { + if (flow_table && flow_id < (1UL << flow_table->log)) { rflow = &flow_table->flows[flow_id]; cpu = READ_ONCE(rflow->cpu); if (READ_ONCE(rflow->filter) == filter_id && cpu < nr_cpu_ids && ((int)(READ_ONCE(per_cpu(softnet_data, cpu).input_queue_head) - READ_ONCE(rflow->last_qtail)) < - (int)(10 * flow_table->mask))) + (int)(10 << flow_table->log))) expire = false; } rcu_read_unlock(); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index abaa1c919b98..b6fbe629ccee 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1056,7 +1056,7 @@ static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, rcu_read_lock(); flow_table = rcu_dereference(queue->rps_flow_table); if (flow_table) - val = (unsigned long)flow_table->mask + 1; + val = 1UL << flow_table->log; rcu_read_unlock(); return sysfs_emit(buf, "%lu\n", val); @@ -1109,7 +1109,7 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, if (!table) return -ENOMEM; - table->mask = mask; + table->log = ilog2(mask) + 1; for (count = 0; count <= mask; count++) table->flows[count].cpu = RPS_NO_CPU; } else {