[ovs-dev] [PATCH 2/2] datapath: Hash only the part of struct sw_flow_key populated by flow_extract().
Andrew Evans
aevans at nicira.com
Fri Mar 4 19:37:47 PST 2011
Now that struct sw_flow_key's fields have been reordered to put optional
information last, it's possible to limit hashing of flow keys to only the
relevant portion for this flow as a performance optimization. flow_extract() is
modified to return the length of the flow key that contains useful information,
and flow_hash() now requires the length as a parameter.
We don't care as much about the performance of hashing flows from userspace, so
the full flow key struct is still hashed in those cases.
Suggested-by: Jesse Gross <jesse at nicira.com>
Signed-off-by: Andrew Evans <aevans at nicira.com>
---
datapath/datapath.c | 15 +++++++++------
datapath/flow.c | 43 ++++++++++++++++++++++++++++++++-----------
datapath/flow.h | 4 ++--
datapath/tunnel.c | 7 +++++--
4 files changed, 48 insertions(+), 21 deletions(-)
diff --git a/datapath/datapath.c b/datapath/datapath.c
index 733acad..482437a 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -276,10 +276,11 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
if (!OVS_CB(skb)->flow) {
struct sw_flow_key key;
struct tbl_node *flow_node;
+ int key_len;
bool is_frag;
/* Extract flow from 'skb' into 'key'. */
- error = flow_extract(skb, p->port_no, &key, &is_frag);
+ error = flow_extract(skb, p->port_no, &key, &key_len, &is_frag);
if (unlikely(error)) {
kfree_skb(skb);
return;
@@ -293,7 +294,7 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
/* Look up flow. */
flow_node = tbl_lookup(rcu_dereference(dp->table), &key,
- flow_hash(&key), flow_cmp);
+ flow_hash(&key, key_len), flow_cmp);
if (unlikely(!flow_node)) {
struct dp_upcall_info upcall;
@@ -675,6 +676,7 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
struct sw_flow_key key;
struct datapath *dp;
struct ethhdr *eth;
+ int key_len;
bool is_frag;
int err;
@@ -705,7 +707,7 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
else
packet->protocol = htons(ETH_P_802_2);
- err = flow_extract(packet, -1, &key, &is_frag);
+ err = flow_extract(packet, -1, &key, &key_len, &is_frag);
if (err)
goto exit;
@@ -963,7 +965,7 @@ static int odp_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
if (!dp)
goto error;
- hash = flow_hash(&key);
+ hash = flow_hash(&key, sizeof(key));
table = get_table_protected(dp);
flow_node = tbl_lookup(table, &key, hash, flow_cmp);
if (!flow_node) {
@@ -1087,7 +1089,7 @@ static int odp_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
return -ENODEV;
table = get_table_protected(dp);
- flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
+ flow_node = tbl_lookup(table, &key, flow_hash(&key, sizeof(key)), flow_cmp);
if (!flow_node)
return -ENOENT;
@@ -1122,7 +1124,8 @@ static int odp_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
return -ENODEV;
table = get_table_protected(dp);
- flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
+ flow_node = tbl_lookup(table, &key, flow_hash(&key, sizeof(key)),
+ flow_cmp);
if (!flow_node)
return -ENOENT;
flow = flow_cast(flow_node);
diff --git a/datapath/flow.c b/datapath/flow.c
index e3a1a6d..475e4ee 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -101,7 +101,11 @@ u64 flow_used_time(unsigned long flow_jiffies)
return cur_ms - idle_ms;
}
-static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
+#define SW_FLOW_KEY_OFFSET(field) \
+ offsetof(struct sw_flow_key, field) + \
+ sizeof(((struct sw_flow_key *)0)->field)
+
+static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key, int *key_len)
{
unsigned int nh_ofs = skb_network_offset(skb);
unsigned int nh_len;
@@ -118,10 +122,11 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
payload_ofs = (u8 *)(nh + 1) - skb->data;
payload_len = ntohs(nh->payload_len);
+ key->nw_proto = NEXTHDR_NONE;
+ key->nw_tos = ipv6_get_dsfield(nh) & ~INET_ECN_MASK;
memcpy(key->ipv6.src, nh->saddr.in6_u.u6_addr8, sizeof(key->ipv6.src));
memcpy(key->ipv6.dst, nh->daddr.in6_u.u6_addr8, sizeof(key->ipv6.dst));
- key->nw_tos = ipv6_get_dsfield(nh) & ~INET_ECN_MASK;
- key->nw_proto = NEXTHDR_NONE;
+ *key_len = SW_FLOW_KEY_OFFSET(ipv6.dst);
/* We don't process jumbograms. */
if (!payload_len)
@@ -318,7 +323,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
}
static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
- int nh_len)
+ int *key_len, int nh_len)
{
struct ipv6hdr *nh = ipv6_hdr(skb);
int icmp_len = ntohs(nh->payload_len) + sizeof(*nh) - nh_len;
@@ -328,6 +333,7 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
* fields, so we need to store them in 16-bit network byte order. */
key->ipv6.tp.src = htons(icmp->icmp6_type);
key->ipv6.tp.dst = htons(icmp->icmp6_code);
+ *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp.dst);
if (!icmp->icmp6_code
&& ((icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION)
@@ -344,6 +350,7 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
nd = (struct nd_msg *)skb_transport_header(skb);
memcpy(key->ipv6.nd_target, &nd->target, sizeof(key->ipv6.nd_target));
+ *key_len = SW_FLOW_KEY_OFFSET(ipv6.nd_target);
icmp_len -= sizeof(*nd);
offset = 0;
@@ -363,12 +370,14 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
goto invalid;
memcpy(key->ipv6.nd_sha,
&nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+ *key_len = SW_FLOW_KEY_OFFSET(ipv6.nd_sha);
} else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR
&& opt_len == 8) {
if (!is_zero_ether_addr(key->ipv6.nd_tha))
goto invalid;
memcpy(key->ipv6.nd_tha,
&nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+ *key_len = SW_FLOW_KEY_OFFSET(ipv6.nd_tha);
}
icmp_len -= opt_len;
@@ -382,6 +391,7 @@ invalid:
memset(key->ipv6.nd_target, 0, sizeof(key->ipv6.nd_target));
memset(key->ipv6.nd_sha, 0, sizeof(key->ipv6.nd_sha));
memset(key->ipv6.nd_tha, 0, sizeof(key->ipv6.nd_tha));
+ *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp.dst);
return 0;
}
@@ -412,11 +422,12 @@ invalid:
* For other key->dl_type values it is left untouched.
*/
int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
- bool *is_frag)
+ int *key_len, bool *is_frag)
{
struct ethhdr *eth;
memset(key, 0, sizeof(*key));
+ *key_len = 0;
key->tun_id = OVS_CB(skb)->tun_id;
key->in_port = in_port;
*is_frag = false;
@@ -458,6 +469,7 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
parse_vlan(skb, key);
key->dl_type = parse_ethertype(skb);
+ *key_len = SW_FLOW_KEY_OFFSET(dl_type);
skb_reset_network_header(skb);
__skb_push(skb, skb->data - (unsigned char *)eth);
@@ -478,8 +490,9 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
nh = ip_hdr(skb);
key->ipv4.src = nh->saddr;
key->ipv4.dst = nh->daddr;
- key->nw_tos = nh->tos & ~INET_ECN_MASK;
key->nw_proto = nh->protocol;
+ key->nw_tos = nh->tos & ~INET_ECN_MASK;
+ *key_len = SW_FLOW_KEY_OFFSET(nw_tos);
/* Transport layer. */
if (!(nh->frag_off & htons(IP_MF | IP_OFFSET)) &&
@@ -489,12 +502,14 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
struct tcphdr *tcp = tcp_hdr(skb);
key->ipv4.tp.src = tcp->source;
key->ipv4.tp.dst = tcp->dest;
+ *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp.dst);
}
} else if (key->nw_proto == IPPROTO_UDP) {
if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
key->ipv4.tp.src = udp->source;
key->ipv4.tp.dst = udp->dest;
+ *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp.dst);
}
} else if (key->nw_proto == IPPROTO_ICMP) {
if (icmphdr_ok(skb)) {
@@ -504,6 +519,7 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
* in 16-bit network byte order. */
key->ipv4.tp.src = htons(icmp->type);
key->ipv4.tp.dst = htons(icmp->code);
+ *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp.dst);
}
}
} else
@@ -520,8 +536,10 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
&& arp->ar_pln == 4) {
/* We only match on the lower 8 bits of the opcode. */
- if (ntohs(arp->ar_op) <= 0xff)
+ if (ntohs(arp->ar_op) <= 0xff) {
key->nw_proto = ntohs(arp->ar_op);
+ *key_len = SW_FLOW_KEY_OFFSET(nw_proto);
+ }
if (key->nw_proto == ARPOP_REQUEST
|| key->nw_proto == ARPOP_REPLY) {
@@ -529,12 +547,13 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
memcpy(&key->ipv4.dst, arp->ar_tip, sizeof(key->ipv4.dst));
memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN);
memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
+ *key_len = SW_FLOW_KEY_OFFSET(ipv4.arp.tha);
}
}
} else if (key->dl_type == htons(ETH_P_IPV6)) {
int nh_len; /* IPv6 Header + Extensions */
- nh_len = parse_ipv6hdr(skb, key);
+ nh_len = parse_ipv6hdr(skb, key, key_len);
if (unlikely(nh_len < 0)) {
if (nh_len == -EINVAL) {
skb->transport_header = skb->network_header;
@@ -549,16 +568,18 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
struct tcphdr *tcp = tcp_hdr(skb);
key->ipv6.tp.src = tcp->source;
key->ipv6.tp.dst = tcp->dest;
+ *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp.dst);
}
} else if (key->nw_proto == NEXTHDR_UDP) {
if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
key->ipv6.tp.src = udp->source;
key->ipv6.tp.dst = udp->dest;
+ *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp.dst);
}
} else if (key->nw_proto == NEXTHDR_ICMP) {
if (icmp6hdr_ok(skb)) {
- int error = parse_icmpv6(skb, key, nh_len);
+ int error = parse_icmpv6(skb, key, key_len, nh_len);
if (error < 0)
return error;
}
@@ -567,9 +588,9 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
return 0;
}
-u32 flow_hash(const struct sw_flow_key *key)
+u32 flow_hash(const struct sw_flow_key *key, int key_len)
{
- return jhash2((u32*)key, sizeof(*key) / sizeof(u32), hash_seed);
+ return jhash2((u32*)key, key_len / sizeof(u32), hash_seed);
}
int flow_cmp(const struct tbl_node *node, void *key2_)
diff --git a/datapath/flow.h b/datapath/flow.h
index 4b304d4..5413c7d 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -114,11 +114,11 @@ void flow_deferred_free_acts(struct sw_flow_actions *);
void flow_hold(struct sw_flow *);
void flow_put(struct sw_flow *);
-int flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, bool *is_frag);
+int flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, int *key_len, bool *is_frag);
void flow_used(struct sw_flow *, struct sk_buff *);
u64 flow_used_time(unsigned long flow_jiffies);
-u32 flow_hash(const struct sw_flow_key *);
+u32 flow_hash(const struct sw_flow_key *, int key_len);
int flow_cmp(const struct tbl_node *, void *target);
/* Upper bound on the length of a nlattr-formatted flow key. The longest
diff --git a/datapath/tunnel.c b/datapath/tunnel.c
index f1711f1..d29234d 100644
--- a/datapath/tunnel.c
+++ b/datapath/tunnel.c
@@ -930,6 +930,7 @@ static struct tnl_cache *build_cache(struct vport *vport,
struct tbl_node *flow_node;
struct vport *dst_vport;
struct sk_buff *skb;
+ int flow_key_len;
bool is_frag;
int err;
@@ -944,14 +945,16 @@ static struct tnl_cache *build_cache(struct vport *vport,
__skb_put(skb, cache->len);
memcpy(skb->data, get_cached_header(cache), cache->len);
- err = flow_extract(skb, dst_vport->port_no, &flow_key, &is_frag);
+ err = flow_extract(skb, dst_vport->port_no, &flow_key,
+ &flow_key_len, &is_frag);
kfree_skb(skb);
if (err || is_frag)
goto done;
flow_node = tbl_lookup(rcu_dereference(dst_vport->dp->table),
- &flow_key, flow_hash(&flow_key),
+ &flow_key,
+ flow_hash(&flow_key, flow_key_len),
flow_cmp);
if (flow_node) {
struct sw_flow *flow = flow_cast(flow_node);
--
1.7.2.3
More information about the dev
mailing list