[ovs-dev] [PATCH 2/2] datapath: Hash only the part of struct sw_flow_key populated by flow_extract().

Andrew Evans aevans at nicira.com
Fri Mar 4 19:37:47 PST 2011


Now that struct sw_flow_key's fields have been reordered to put optional
information last, it's possible to limit hashing of flow keys to only the
relevant portion for this flow as a performance optimization. flow_extract() is
modified to return the length of the flow key that contains useful information,
and flow_hash() now requires the length as a parameter.

We don't care as much about the performance of hashing flows from userspace, so
the full flow key struct is still hashed in those cases.

Suggested-by: Jesse Gross <jesse at nicira.com>
Signed-off-by: Andrew Evans <aevans at nicira.com>
---
 datapath/datapath.c |   15 +++++++++------
 datapath/flow.c     |   43 ++++++++++++++++++++++++++++++++-----------
 datapath/flow.h     |    4 ++--
 datapath/tunnel.c   |    7 +++++--
 4 files changed, 48 insertions(+), 21 deletions(-)

diff --git a/datapath/datapath.c b/datapath/datapath.c
index 733acad..482437a 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -276,10 +276,11 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 	if (!OVS_CB(skb)->flow) {
 		struct sw_flow_key key;
 		struct tbl_node *flow_node;
+		int key_len;
 		bool is_frag;
 
 		/* Extract flow from 'skb' into 'key'. */
-		error = flow_extract(skb, p->port_no, &key, &is_frag);
+		error = flow_extract(skb, p->port_no, &key, &key_len, &is_frag);
 		if (unlikely(error)) {
 			kfree_skb(skb);
 			return;
@@ -293,7 +294,7 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 
 		/* Look up flow. */
 		flow_node = tbl_lookup(rcu_dereference(dp->table), &key,
-					flow_hash(&key), flow_cmp);
+				flow_hash(&key, key_len), flow_cmp);
 		if (unlikely(!flow_node)) {
 			struct dp_upcall_info upcall;
 
@@ -675,6 +676,7 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 	struct sw_flow_key key;
 	struct datapath *dp;
 	struct ethhdr *eth;
+	int key_len;
 	bool is_frag;
 	int err;
 
@@ -705,7 +707,7 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 	else
 		packet->protocol = htons(ETH_P_802_2);
 
-	err = flow_extract(packet, -1, &key, &is_frag);
+	err = flow_extract(packet, -1, &key, &key_len, &is_frag);
 	if (err)
 		goto exit;
 
@@ -963,7 +965,7 @@ static int odp_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 	if (!dp)
 		goto error;
 
-	hash = flow_hash(&key);
+	hash = flow_hash(&key, sizeof(key));
 	table = get_table_protected(dp);
 	flow_node = tbl_lookup(table, &key, hash, flow_cmp);
 	if (!flow_node) {
@@ -1087,7 +1089,7 @@ static int odp_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
 		return -ENODEV;
 
 	table = get_table_protected(dp);
-	flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
+	flow_node = tbl_lookup(table, &key, flow_hash(&key, sizeof(key)), flow_cmp);
 	if (!flow_node)
 		return -ENOENT;
 
@@ -1122,7 +1124,8 @@ static int odp_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
  		return -ENODEV;
 
 	table = get_table_protected(dp);
-	flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
+	flow_node = tbl_lookup(table, &key, flow_hash(&key, sizeof(key)),
+			       flow_cmp);
 	if (!flow_node)
 		return -ENOENT;
 	flow = flow_cast(flow_node);
diff --git a/datapath/flow.c b/datapath/flow.c
index e3a1a6d..475e4ee 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -101,7 +101,11 @@ u64 flow_used_time(unsigned long flow_jiffies)
 	return cur_ms - idle_ms;
 }
 
-static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
+#define SW_FLOW_KEY_OFFSET(field)			\
+	offsetof(struct sw_flow_key, field) +		\
+	sizeof(((struct sw_flow_key *)0)->field)
+
+static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key, int *key_len)
 {
 	unsigned int nh_ofs = skb_network_offset(skb);
 	unsigned int nh_len;
@@ -118,10 +122,11 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
 	payload_ofs = (u8 *)(nh + 1) - skb->data;
 	payload_len = ntohs(nh->payload_len);
 
+	key->nw_proto = NEXTHDR_NONE;
+	key->nw_tos = ipv6_get_dsfield(nh) & ~INET_ECN_MASK;
 	memcpy(key->ipv6.src, nh->saddr.in6_u.u6_addr8, sizeof(key->ipv6.src));
 	memcpy(key->ipv6.dst, nh->daddr.in6_u.u6_addr8, sizeof(key->ipv6.dst));
-	key->nw_tos = ipv6_get_dsfield(nh) & ~INET_ECN_MASK;
-	key->nw_proto = NEXTHDR_NONE;
+	*key_len = SW_FLOW_KEY_OFFSET(ipv6.dst);
 
 	/* We don't process jumbograms. */
 	if (!payload_len)
@@ -318,7 +323,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
 }
 
 static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
-		int nh_len)
+			int *key_len, int nh_len)
 {
 	struct ipv6hdr *nh = ipv6_hdr(skb);
 	int icmp_len = ntohs(nh->payload_len) + sizeof(*nh) - nh_len;
@@ -328,6 +333,7 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
 	 * fields, so we need to store them in 16-bit network byte order. */
 	key->ipv6.tp.src = htons(icmp->icmp6_type);
 	key->ipv6.tp.dst = htons(icmp->icmp6_code);
+	*key_len = SW_FLOW_KEY_OFFSET(ipv6.tp.dst);
 
 	if (!icmp->icmp6_code
 			&& ((icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION)
@@ -344,6 +350,7 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
 
 		nd = (struct nd_msg *)skb_transport_header(skb);
 		memcpy(key->ipv6.nd_target, &nd->target, sizeof(key->ipv6.nd_target));
+		*key_len = SW_FLOW_KEY_OFFSET(ipv6.nd_target);
 
 		icmp_len -= sizeof(*nd);
 		offset = 0;
@@ -363,12 +370,14 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
 					goto invalid;
 				memcpy(key->ipv6.nd_sha,
 						&nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+				*key_len = SW_FLOW_KEY_OFFSET(ipv6.nd_sha);
 			} else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR
 					&& opt_len == 8) {
 				if (!is_zero_ether_addr(key->ipv6.nd_tha))
 					goto invalid;
 				memcpy(key->ipv6.nd_tha,
 						&nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+				*key_len = SW_FLOW_KEY_OFFSET(ipv6.nd_tha);
 			}
 
 			icmp_len -= opt_len;
@@ -382,6 +391,7 @@ invalid:
 	memset(key->ipv6.nd_target, 0, sizeof(key->ipv6.nd_target));
 	memset(key->ipv6.nd_sha, 0, sizeof(key->ipv6.nd_sha));
 	memset(key->ipv6.nd_tha, 0, sizeof(key->ipv6.nd_tha));
+	*key_len = SW_FLOW_KEY_OFFSET(ipv6.tp.dst);
 
 	return 0;
 }
@@ -412,11 +422,12 @@ invalid:
  *      For other key->dl_type values it is left untouched.
  */
 int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
-		 bool *is_frag)
+		int *key_len, bool *is_frag)
 {
 	struct ethhdr *eth;
 
 	memset(key, 0, sizeof(*key));
+	*key_len = 0;
 	key->tun_id = OVS_CB(skb)->tun_id;
 	key->in_port = in_port;
 	*is_frag = false;
@@ -458,6 +469,7 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 		parse_vlan(skb, key);
 
 	key->dl_type = parse_ethertype(skb);
+	*key_len = SW_FLOW_KEY_OFFSET(dl_type);
 	skb_reset_network_header(skb);
 	__skb_push(skb, skb->data - (unsigned char *)eth);
 
@@ -478,8 +490,9 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 		nh = ip_hdr(skb);
 		key->ipv4.src = nh->saddr;
 		key->ipv4.dst = nh->daddr;
-		key->nw_tos = nh->tos & ~INET_ECN_MASK;
 		key->nw_proto = nh->protocol;
+		key->nw_tos = nh->tos & ~INET_ECN_MASK;
+		*key_len = SW_FLOW_KEY_OFFSET(nw_tos);
 
 		/* Transport layer. */
 		if (!(nh->frag_off & htons(IP_MF | IP_OFFSET)) &&
@@ -489,12 +502,14 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 					struct tcphdr *tcp = tcp_hdr(skb);
 					key->ipv4.tp.src = tcp->source;
 					key->ipv4.tp.dst = tcp->dest;
+					*key_len = SW_FLOW_KEY_OFFSET(ipv4.tp.dst);
 				}
 			} else if (key->nw_proto == IPPROTO_UDP) {
 				if (udphdr_ok(skb)) {
 					struct udphdr *udp = udp_hdr(skb);
 					key->ipv4.tp.src = udp->source;
 					key->ipv4.tp.dst = udp->dest;
+					*key_len = SW_FLOW_KEY_OFFSET(ipv4.tp.dst);
 				}
 			} else if (key->nw_proto == IPPROTO_ICMP) {
 				if (icmphdr_ok(skb)) {
@@ -504,6 +519,7 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 					 * in 16-bit network byte order. */
 					key->ipv4.tp.src = htons(icmp->type);
 					key->ipv4.tp.dst = htons(icmp->code);
+					*key_len = SW_FLOW_KEY_OFFSET(ipv4.tp.dst);
 				}
 			}
 		} else
@@ -520,8 +536,10 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 				&& arp->ar_pln == 4) {
 
 			/* We only match on the lower 8 bits of the opcode. */
-			if (ntohs(arp->ar_op) <= 0xff)
+			if (ntohs(arp->ar_op) <= 0xff) {
 				key->nw_proto = ntohs(arp->ar_op);
+				*key_len = SW_FLOW_KEY_OFFSET(nw_proto);
+			}
 
 			if (key->nw_proto == ARPOP_REQUEST
 					|| key->nw_proto == ARPOP_REPLY) {
@@ -529,12 +547,13 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 				memcpy(&key->ipv4.dst, arp->ar_tip, sizeof(key->ipv4.dst));
 				memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN);
 				memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
+				*key_len = SW_FLOW_KEY_OFFSET(ipv4.arp.tha);
 			}
 		}
 	} else if (key->dl_type == htons(ETH_P_IPV6)) {
 		int nh_len;             /* IPv6 Header + Extensions */
 
-		nh_len = parse_ipv6hdr(skb, key);
+		nh_len = parse_ipv6hdr(skb, key, key_len);
 		if (unlikely(nh_len < 0)) {
 			if (nh_len == -EINVAL) {
 				skb->transport_header = skb->network_header;
@@ -549,16 +568,18 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 				struct tcphdr *tcp = tcp_hdr(skb);
 				key->ipv6.tp.src = tcp->source;
 				key->ipv6.tp.dst = tcp->dest;
+				*key_len = SW_FLOW_KEY_OFFSET(ipv6.tp.dst);
 			}
 		} else if (key->nw_proto == NEXTHDR_UDP) {
 			if (udphdr_ok(skb)) {
 				struct udphdr *udp = udp_hdr(skb);
 				key->ipv6.tp.src = udp->source;
 				key->ipv6.tp.dst = udp->dest;
+				*key_len = SW_FLOW_KEY_OFFSET(ipv6.tp.dst);
 			}
 		} else if (key->nw_proto == NEXTHDR_ICMP) {
 			if (icmp6hdr_ok(skb)) {
-				int error = parse_icmpv6(skb, key, nh_len);
+				int error = parse_icmpv6(skb, key, key_len, nh_len);
 				if (error < 0)
 					return error;
 			}
@@ -567,9 +588,9 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 	return 0;
 }
 
-u32 flow_hash(const struct sw_flow_key *key)
+u32 flow_hash(const struct sw_flow_key *key, int key_len)
 {
-	return jhash2((u32*)key, sizeof(*key) / sizeof(u32), hash_seed);
+	return jhash2((u32*)key, key_len / sizeof(u32), hash_seed);
 }
 
 int flow_cmp(const struct tbl_node *node, void *key2_)
diff --git a/datapath/flow.h b/datapath/flow.h
index 4b304d4..5413c7d 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -114,11 +114,11 @@ void flow_deferred_free_acts(struct sw_flow_actions *);
 void flow_hold(struct sw_flow *);
 void flow_put(struct sw_flow *);
 
-int flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, bool *is_frag);
+int flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, int *key_len, bool *is_frag);
 void flow_used(struct sw_flow *, struct sk_buff *);
 u64 flow_used_time(unsigned long flow_jiffies);
 
-u32 flow_hash(const struct sw_flow_key *);
+u32 flow_hash(const struct sw_flow_key *, int key_len);
 int flow_cmp(const struct tbl_node *, void *target);
 
 /* Upper bound on the length of a nlattr-formatted flow key.  The longest
diff --git a/datapath/tunnel.c b/datapath/tunnel.c
index f1711f1..d29234d 100644
--- a/datapath/tunnel.c
+++ b/datapath/tunnel.c
@@ -930,6 +930,7 @@ static struct tnl_cache *build_cache(struct vport *vport,
 		struct tbl_node *flow_node;
 		struct vport *dst_vport;
 		struct sk_buff *skb;
+		int flow_key_len;
 		bool is_frag;
 		int err;
 
@@ -944,14 +945,16 @@ static struct tnl_cache *build_cache(struct vport *vport,
 		__skb_put(skb, cache->len);
 		memcpy(skb->data, get_cached_header(cache), cache->len);
 
-		err = flow_extract(skb, dst_vport->port_no, &flow_key, &is_frag);
+		err = flow_extract(skb, dst_vport->port_no, &flow_key,
+				&flow_key_len, &is_frag);
 
 		kfree_skb(skb);
 		if (err || is_frag)
 			goto done;
 
 		flow_node = tbl_lookup(rcu_dereference(dst_vport->dp->table),
-				       &flow_key, flow_hash(&flow_key),
+				       &flow_key,
+				       flow_hash(&flow_key, flow_key_len),
 				       flow_cmp);
 		if (flow_node) {
 			struct sw_flow *flow = flow_cast(flow_node);
-- 
1.7.2.3




More information about the dev mailing list