[ovs-dev] [PATCH 01/16] tunneling: Add support for tunnel ID.

Jesse Gross jesse at nicira.com
Wed Apr 14 14:10:08 PDT 2010


On Wed, Apr 14, 2010 at 3:34 PM, Ben Pfaff <blp at nicira.com> wrote:
>
>  > +static void set_tunnel(struct sk_buff *skb, struct odp_flow_key *key,
> > +                    const struct odp_action_tunnel *a)
> > +{
> > +     OVS_CB(skb)->tun_id = key->tun_id = a->tun_id;
> > +}
>
> Since this just uses a->tun_id I'd be inclined to make its third
> argument '__be32 tun_id' (but I expect that the compiler will optimize
> them equivalently).
>

Sure.


>
> >  static struct sk_buff *
> >  vlan_pull_tag(struct sk_buff *skb)
>
>
> > diff --git a/include/openflow/nicira-ext.h
> b/include/openflow/nicira-ext.h
> > index 7232d57..2454335 100644
> > --- a/include/openflow/nicira-ext.h
> > +++ b/include/openflow/nicira-ext.h
> > @@ -37,6 +37,10 @@ enum nicira_type {
> >       * pairs in the form "key=value\n". */
> >      NXT_STATUS_REPLY,
> >
> > +    /* Use the high 32 bits of the cookie field as the tunnel ID in the
> flow
> > +     * match. */
> > +    NXT_TUN_ID_FROM_COOKIE,
> > +
> >      /* No longer used. */
> >      NXT_ACT_SET_CONFIG__OBSOLETE,
> >      NXT_ACT_GET_CONFIG__OBSOLETE,
>
> We should only add new types at the end of the list to avoid conflicting
> with obsolete types.
>

Good point.


>
> > @@ -71,6 +84,17 @@ struct nx_action_resubmit {
> >  };
> >  OFP_ASSERT(sizeof(struct nx_action_resubmit) == 16);
> >
> > +/* Action structure for NXAST_SET_TUNNEL. */
> > +struct nx_action_set_tunnel {
> > +    uint16_t type;                  /* OFPAT_VENDOR. */
> > +    uint16_t len;                   /* Length is 8. */
>
> The length should be 16.  (I think we get this wrong elsewhere in the
> header though.)
>

Yeah, I just cut and paste it.  I fixed all of them.


>
> > +    uint32_t vendor;                /* NX_VENDOR_ID. */
> > +    uint16_t subtype;               /* NXAST_SET_TUNNEL. */
> > +    uint8_t pad[2];
> > +    uint32_t tun_id;                /* Tunnel ID. */
> > +};
> > +OFP_ASSERT(sizeof(struct nx_action_set_tunnel) == 16);
>
> ...
>
> > --- a/include/openvswitch/datapath-protocol.h
> > +++ b/include/openvswitch/datapath-protocol.h
> > @@ -127,7 +127,8 @@ struct odp_stats {
> >   * @arg: Argument value whose meaning depends on @type.
> >   *
> >   * For @type == %_ODPL_MISS_NR, the header is followed by packet data.
>  The
> > - * @arg member is unused and set to 0.
> > + * @arg member is the ID of the tunnel that encapsulated this packet. It
> is 0
> > + * if the packet was not received on a tunnel.
>
> I'd add that @arg must be in network byte order.
>

OK.


>
> >   * For @type == %_ODPL_ACTION_NR, the header is followed by packet data.
>  The
> >   * @arg member is copied from the &struct odp_action_controller that
> caused
> > @@ -191,6 +192,7 @@ struct odp_flow_stats {
> >  };
> >
> >  struct odp_flow_key {
> > +    __be32 tun_id;               /* Encapsulating tunnel ID. */
> >      __be32 nw_src;               /* IP source address. */
> >      __be32 nw_dst;               /* IP destination address. */
> >      __u16  in_port;              /* Input switch port. */
> > @@ -243,17 +245,18 @@ struct odp_flowvec {
> >  #define ODPAT_OUTPUT            0    /* Output to switch port. */
> >  #define ODPAT_OUTPUT_GROUP      1    /* Output to all ports in group. */
> >  #define ODPAT_CONTROLLER        2    /* Send copy to controller. */
> > -#define ODPAT_SET_VLAN_VID      3    /* Set the 802.1q VLAN id. */
> > -#define ODPAT_SET_VLAN_PCP      4    /* Set the 802.1q priority. */
> > -#define ODPAT_STRIP_VLAN        5    /* Strip the 802.1q header. */
> > -#define ODPAT_SET_DL_SRC        6    /* Ethernet source address. */
> > -#define ODPAT_SET_DL_DST        7    /* Ethernet destination address. */
> > -#define ODPAT_SET_NW_SRC        8    /* IP source address. */
> > -#define ODPAT_SET_NW_DST        9    /* IP destination address. */
> > -#define ODPAT_SET_NW_TOS        10   /* IP ToS/DSCP field (6 bits). */
> > -#define ODPAT_SET_TP_SRC        11   /* TCP/UDP source port. */
> > -#define ODPAT_SET_TP_DST        12   /* TCP/UDP destination port. */
> > -#define ODPAT_N_ACTIONS         13
> > +#define ODPAT_SET_TUNNEL        3    /* Set the encapsulating tunnel ID.
> */
> > +#define ODPAT_SET_VLAN_VID      4    /* Set the 802.1q VLAN id. */
> > +#define ODPAT_SET_VLAN_PCP      5    /* Set the 802.1q priority. */
> > +#define ODPAT_STRIP_VLAN        6    /* Strip the 802.1q header. */
> > +#define ODPAT_SET_DL_SRC        7    /* Ethernet source address. */
> > +#define ODPAT_SET_DL_DST        8    /* Ethernet destination address. */
> > +#define ODPAT_SET_NW_SRC        9    /* IP source address. */
> > +#define ODPAT_SET_NW_DST        10   /* IP destination address. */
> > +#define ODPAT_SET_NW_TOS        11   /* IP ToS/DSCP field (6 bits). */
> > +#define ODPAT_SET_TP_SRC        12   /* TCP/UDP source port. */
> > +#define ODPAT_SET_TP_DST        13   /* TCP/UDP destination port. */
> > +#define ODPAT_N_ACTIONS         14
>
> If we renumber the actions like that we add a kernel<->user
> incompatibility that I don't think anything else forces.  I'd like to
> get in the habit of avoiding those when we can.
>

You're right.


>
> >  struct odp_action_output {
> >      __u16 type;                  /* ODPAT_OUTPUT. */
> > @@ -275,6 +278,12 @@ struct odp_action_controller {
> >      __u32 arg;                  /* Copied to struct odp_msg 'arg'
> member. */
> >  };
> >
> > +struct odp_action_tunnel {
> > +    __u16 type;                 /* ODPAT_SET_TUNNEL. */
> > +    __u16 reserved;
> > +    __u32 tun_id;               /* Tunnel ID. */
> > +};
>
> From looking elsewhere I think 'tun_id' is in network byte order so
> __be32 would better document this.
>

Yeah, that's right.


>
> > diff --git a/lib/classifier.c b/lib/classifier.c
> > index ee78dad..3f624d9 100644
> > --- a/lib/classifier.c
> > +++ b/lib/classifier.c
> > @@ -55,8 +55,8 @@ static bool rules_match_2wild(const struct cls_rule
> *wild1,
> >  /* Converts the flow in 'flow' into a cls_rule in 'rule', with the given
> >   * 'wildcards' and 'priority'.*/
> >  void
> > -cls_rule_from_flow(struct cls_rule *rule, const flow_t *flow,
> > -                   uint32_t wildcards, unsigned int priority)
> > +cls_rule_from_flow(const flow_t *flow, uint32_t wildcards,
> > +                   unsigned int priority, struct cls_rule *rule)
>
> I just know that changing the argument order here is going to confuse me
> later.  Do you really want to do that?  I kind of consider 'rule' here
> to be like a 'this' argument in C++, so that it should go first.
>

The reason why I changed it is because it seems inconsistent with other
functions where inputs are first and outputs are last.  I guess if you
continue the C++ analogy, this function is a constructor and it is returning
the object, not taking it as a 'this' argument.


>
> >  {
> >      assert(!flow->reserved[0] && !flow->reserved[1] &&
> !flow->reserved[2]);
> >      rule->flow = *flow;
> > @@ -68,11 +68,12 @@ cls_rule_from_flow(struct cls_rule *rule, const
> flow_t *flow,
> >  /* Converts the ofp_match in 'match' into a cls_rule in 'rule', with the
> given
> >   * 'priority'. */
> >  void
> > -cls_rule_from_match(struct cls_rule *rule, const struct ofp_match
> *match,
> > -                    unsigned int priority)
> > +cls_rule_from_match(const struct ofp_match *match, unsigned int
> priority,
> > +                    bool tun_id_from_cookie, uint64_t cookie,
> > +                    struct cls_rule *rule)
>
> In addition to reordering, probably should update the comment on
> cls_rule_from_match().
>

Right.


>
> > diff --git a/lib/classifier.h b/lib/classifier.h
> > index 126d149..3551602 100644
> > --- a/lib/classifier.h
> > +++ b/lib/classifier.h
> > @@ -44,10 +44,11 @@
> >  #include "flow.h"
> >  #include "hmap.h"
> >  #include "list.h"
> > +#include "openflow/nicira-ext.h"
> >  #include "openflow/openflow.h"
> >
> >  /* Number of bytes of fields in a rule. */
> > -#define CLS_N_BYTES 31
> > +#define CLS_N_BYTES 37
> >
> >  /* Fields in a rule.
> >   *
> > @@ -59,6 +60,7 @@
> >      /*        wildcard bit(s)    member name  name     */   \
> >      /*        -----------------  -----------  -------- */   \
> >      CLS_FIELD(OFPFW_IN_PORT,     in_port,     IN_PORT)      \
> > +    CLS_FIELD(NXFW_TUN_ID,       tun_id,      TUN_ID)       \
> >      CLS_FIELD(OFPFW_DL_VLAN,     dl_vlan,     DL_VLAN)      \
> >      CLS_FIELD(OFPFW_DL_VLAN_PCP, dl_vlan_pcp, DL_VLAN_PCP)  \
> >      CLS_FIELD(OFPFW_DL_SRC,      dl_src,      DL_SRC)       \
>
> I'm not sure this should be so near the beginning since I expect that it
> will be wildcarded very often.
>

I wasn't sure where to put it, since it really depends on the situation.  In
what I would consider the general use case it is almost always going to be
wildcarded but in some of our use cases it will be very important.  On the
other hand, vlan is also frequently wildcarded (especially the PCP bits).
 They seemed somewhat similar, which is why I put them together.

More generally, as long as we have a static ordering we're always going to
have poor performance in some situations where it could be better given a
different ordering.  My impression is that given a large number of flows the
vast majority are going to be of the same form (have the same wildcards).
 This means that if we tracked the types of flows we could self-optimizing
at runtime and hit the hash table in the majority of what I would expect are
real world situations.


>
> > --- a/lib/flow.c
> > +++ b/lib/flow.c
> > @@ -89,7 +90,8 @@ pull_vlan(struct ofpbuf *packet)
> >
> >  /* Returns 1 if 'packet' is an IP fragment, 0 otherwise. */
> >  int
> > -flow_extract(struct ofpbuf *packet, uint16_t in_port, flow_t *flow)
> > +flow_extract(struct ofpbuf *packet, uint32_t tun_id, uint16_t in_port,
> > +             flow_t *flow)
>
> 'tun_id' here must be in network byte order, because struct
> odp_flow_key's tun_id member is defined as __be32.  'in_port' must in
> host byte order for a similar reason.  But it would be really nice to
> say that in a comment just above the function; otherwise I'll get
> confused.
>

Sure.


>
> >  {
> >      struct ofpbuf b = *packet;
> >      struct eth_header *eth;
>
> > @@ -3200,6 +3216,17 @@ handle_flow_mod(struct ofproto *p, struct ofconn
> *ofconn,
> >  }
> >
> >  static int
> > +handle_tun_id_from_cookie(struct ofproto *p, struct nxt_tun_id_cookie
> *msg)
> > +{
> > +    if (ntohs(msg->header.length) < sizeof(struct nxt_tun_id_cookie)) {
> > +        return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LEN);
> > +    }
>
> You could use check_ofp_message() here (maybe we should create an
> equivalent for Nicira extensions, so that the log message is more
> informative; I could use that in my multiple-controllers series too).
> At least, I think that you should log the problem.
>

I changed it to use check_ofp_message() here and also in the main vendor
extension handling code that wasn't logging either.


>
> > diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c
> > index 360f881..98174e9 100644
> > --- a/utilities/ovs-ofctl.c
> > +++ b/utilities/ovs-ofctl.c
> > @@ -717,7 +723,7 @@ static bool
> >  parse_field(const char *name, const struct field **f_out)
> >  {
> >  #define F_OFS(MEMBER) offsetof(struct ofp_match, MEMBER)
> > -    static const struct field fields[] = {
> > +    static const struct field fields[] = {
> >          { "in_port", OFPFW_IN_PORT, F_U16, F_OFS(in_port), 0 },
> >          { "dl_vlan", OFPFW_DL_VLAN, F_U16, F_OFS(dl_vlan), 0 },
> >          { "dl_vlan_pcp", OFPFW_DL_VLAN_PCP, F_U8, F_OFS(dl_vlan_pcp), 0
> },
> > @@ -825,6 +831,8 @@ str_to_flow(char *string, struct ofp_match *match,
> struct ofpbuf *actions,
> >                  *hard_timeout = atoi(value);
> >              } else if (cookie && !strcmp(name, "cookie")) {
> >                  *cookie = str_to_u64(value);
> > +            } else if (!strcmp(name, "tun_id_wild") && !strcmp(value,
> "true")) {
> > +                wildcards |= NXFW_TUN_ID;
>
> I would be tempted to ignore the value for tun_id_wild, just to save
> some typing.
>

Sure.


>
> >              } else if (parse_field(name, &f)) {
> >                  void *data = (char *) match + f->offset;
> >                  if (!strcmp(value, "*") || !strcmp(value, "ANY")) {
> > @@ -1034,6 +1042,25 @@ static void do_del_flows(int argc, char *argv[])
> >  }
> >
> >  static void
> > +do_tun_cookie(int argc OVS_UNUSED, char *argv[])
> > +{
> > +    struct nxt_tun_id_cookie *tun_id_cookie;
> > +    struct ofpbuf *buffer;
> > +    struct vconn *vconn;
> > +
> > +    tun_id_cookie = make_openflow(sizeof *tun_id_cookie, OFPT_VENDOR,
> &buffer);
> > +
> > +    tun_id_cookie->vendor = htonl(NX_VENDOR_ID);
> > +    tun_id_cookie->subtype = htonl(NXT_TUN_ID_FROM_COOKIE);
> > +    tun_id_cookie->set = !strcmp(argv[2], "true");
> > +    memset(tun_id_cookie->pad, 0, sizeof tun_id_cookie->pad);
>
> For what it's worth, make_openflow() zeros all the bytes it allocated
> anyhow.
>

Good to know.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://openvswitch.org/pipermail/dev/attachments/20100414/8bb6824c/attachment.htm>


More information about the dev mailing list