[ovs-dev] [netdevs 2/4] netdev: Clean up and refactor packet receive interface.

Ethan Jackson ethan at nicira.com
Fri Aug 5 18:39:05 PDT 2011


Looks like a significant code simplification.

Thanks,
Ethan

On Fri, Aug 5, 2011 at 14:42, Ben Pfaff <blp at nicira.com> wrote:
> The Open vSwitch tree only has one user of the ability for a netdev to
> receive packets from a network device.  Thus, this commit simplifies the
> common-case use of the netdev interface by replacing the "ethertype" option
> from "struct netdev_options" by a new netdev_listen() call.
>
> The only user of netdev_listen() wants to receive all packets from a
> network device, so this commit also removes the ability to restrict the
> received packets to a particular protocol.  (This ability was once used by
> the Open vSwitch integrated DHCP client, but that code has been removed.)
>
> This commit also simplifies and improves the implementation of the code
> in netdev-linux that started listening to a network device.  Before, I had
> not figured out how to avoid receiving all packets on all devices before
> binding to a particular device, but I took a closer look at the kernel code
> and figured it out.
>
> I've tested that the userspace datapath (dpif-netdev), the only user of
> netdev_recv(), still works after this change.
> ---
>  lib/dpif-netdev.c     |    9 ++++-
>  lib/netdev-dummy.c    |   24 +++++++++--
>  lib/netdev-linux.c    |  109 ++++++++++++++++++++++++++-----------------------
>  lib/netdev-provider.h |   44 +++++++++++++------
>  lib/netdev-vport.c    |    4 +-
>  lib/netdev.c          |   28 +++++++++----
>  lib/netdev.h          |    8 +---
>  ofproto/ofproto.c     |    1 -
>  utilities/ovs-dpctl.c |    2 -
>  vswitchd/bridge.c     |    2 -
>  10 files changed, 138 insertions(+), 93 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 14b9192..fa6b549 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -350,7 +350,6 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
>     /* Open and validate network device. */
>     memset(&netdev_options, 0, sizeof netdev_options);
>     netdev_options.name = devname;
> -    netdev_options.ethertype = NETDEV_ETH_TYPE_ANY;
>     if (dp->class == &dpif_dummy_class) {
>         netdev_options.type = "dummy";
>     } else if (internal) {
> @@ -364,6 +363,14 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
>     /* XXX reject loopback devices */
>     /* XXX reject non-Ethernet devices */
>
> +    error = netdev_listen(netdev);
> +    if (error) {
> +        VLOG_ERR("%s: cannot receive packets on this network device (%s)",
> +                 devname, strerror(errno));
> +        netdev_close(netdev);
> +        return error;
> +    }
> +
>     error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, false);
>     if (error) {
>         netdev_close(netdev);
> diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
> index 9cd06f1..15d97cf 100644
> --- a/lib/netdev-dummy.c
> +++ b/lib/netdev-dummy.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2010 Nicira Networks.
> + * Copyright (c) 2010, 2011 Nicira Networks.
>  *
>  * Licensed under the Apache License, Version 2.0 (the "License");
>  * you may not use this file except in compliance with the License.
> @@ -102,8 +102,7 @@ netdev_dummy_destroy(struct netdev_dev *netdev_dev_)
>  }
>
>  static int
> -netdev_dummy_open(struct netdev_dev *netdev_dev_, int ethertype OVS_UNUSED,
> -                  struct netdev **netdevp)
> +netdev_dummy_open(struct netdev_dev *netdev_dev_, struct netdev **netdevp)
>  {
>     struct netdev_dummy *netdev;
>
> @@ -122,6 +121,22 @@ netdev_dummy_close(struct netdev *netdev_)
>  }
>
>  static int
> +netdev_dummy_listen(struct netdev *netdev_ OVS_UNUSED)
> +{
> +    /* It's OK to listen on a dummy device.  It just never receives any
> +     * packets. */
> +    return 0;
> +}
> +
> +static int
> +netdev_dummy_recv(struct netdev *netdev_ OVS_UNUSED,
> +                  void *buffer OVS_UNUSED, size_t size OVS_UNUSED)
> +{
> +    /* A dummy device never receives any packets. */
> +    return -EAGAIN;
> +}
> +
> +static int
>  netdev_dummy_set_etheraddr(struct netdev *netdev,
>                            const uint8_t mac[ETH_ADDR_LEN])
>  {
> @@ -234,7 +249,8 @@ static const struct netdev_class dummy_class = {
>
>     NULL,                       /* enumerate */
>
> -    NULL,                       /* recv */
> +    netdev_dummy_listen,        /* listen */
> +    netdev_dummy_recv,          /* recv */
>     NULL,                       /* recv_wait */
>     NULL,                       /* drain */
>
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index 385c0b8..ac511f0 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -639,8 +639,7 @@ netdev_linux_destroy(struct netdev_dev *netdev_dev_)
>  }
>
>  static int
> -netdev_linux_open(struct netdev_dev *netdev_dev_, int ethertype,
> -                  struct netdev **netdevp)
> +netdev_linux_open(struct netdev_dev *netdev_dev_, struct netdev **netdevp)
>  {
>     struct netdev_dev_linux *netdev_dev = netdev_dev_linux_cast(netdev_dev_);
>     struct netdev_linux *netdev;
> @@ -676,54 +675,6 @@ netdev_linux_open(struct netdev_dev *netdev_dev_, int ethertype,
>          * directions appearing to be reversed. */
>         netdev->fd = netdev_dev->state.tap.fd;
>         netdev_dev->state.tap.opened = true;
> -    } else if (ethertype != NETDEV_ETH_TYPE_NONE) {
> -        struct sockaddr_ll sll;
> -        int protocol;
> -        int ifindex;
> -
> -        /* Create file descriptor. */
> -        protocol = (ethertype == NETDEV_ETH_TYPE_ANY ? ETH_P_ALL
> -                    : ethertype == NETDEV_ETH_TYPE_802_2 ? ETH_P_802_2
> -                    : ethertype);
> -        netdev->fd = socket(PF_PACKET, SOCK_RAW,
> -                            (OVS_FORCE int) htons(protocol));
> -        if (netdev->fd < 0) {
> -            error = errno;
> -            goto error;
> -        }
> -
> -        /* Set non-blocking mode. */
> -        error = set_nonblocking(netdev->fd);
> -        if (error) {
> -            goto error;
> -        }
> -
> -        /* Get ethernet device index. */
> -        error = get_ifindex(&netdev->netdev, &ifindex);
> -        if (error) {
> -            goto error;
> -        }
> -
> -        /* Bind to specific ethernet device. */
> -        memset(&sll, 0, sizeof sll);
> -        sll.sll_family = AF_PACKET;
> -        sll.sll_ifindex = ifindex;
> -        if (bind(netdev->fd,
> -                 (struct sockaddr *) &sll, sizeof sll) < 0) {
> -            error = errno;
> -            VLOG_ERR("bind to %s failed: %s", netdev_dev_get_name(netdev_dev_),
> -                     strerror(error));
> -            goto error;
> -        }
> -
> -        /* Between the socket() and bind() calls above, the socket receives all
> -         * packets of the requested type on all system interfaces.  We do not
> -         * want to receive that data, but there is no way to avoid it.  So we
> -         * must now drain out the receive queue. */
> -        error = drain_rcvbuf(netdev->fd);
> -        if (error) {
> -            goto error;
> -        }
>     }
>
>     *netdevp = &netdev->netdev;
> @@ -769,12 +720,67 @@ netdev_linux_enumerate(struct sset *sset)
>  }
>
>  static int
> +netdev_linux_listen(struct netdev *netdev_)
> +{
> +    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
> +    struct sockaddr_ll sll;
> +    int ifindex;
> +    int error;
> +    int fd;
> +
> +    if (netdev->fd >= 0) {
> +        return 0;
> +    }
> +
> +    /* Create file descriptor. */
> +    fd = socket(PF_PACKET, SOCK_RAW, 0);
> +    if (fd < 0) {
> +        error = errno;
> +        VLOG_ERR("failed to create raw socket (%s)", strerror(error));
> +        goto error;
> +    }
> +
> +    /* Set non-blocking mode. */
> +    error = set_nonblocking(fd);
> +    if (error) {
> +        goto error;
> +    }
> +
> +    /* Get ethernet device index. */
> +    error = get_ifindex(&netdev->netdev, &ifindex);
> +    if (error) {
> +        goto error;
> +    }
> +
> +    /* Bind to specific ethernet device. */
> +    memset(&sll, 0, sizeof sll);
> +    sll.sll_family = AF_PACKET;
> +    sll.sll_ifindex = ifindex;
> +    sll.sll_protocol = (OVS_FORCE unsigned short int) htons(ETH_P_ALL);
> +    if (bind(fd, (struct sockaddr *) &sll, sizeof sll) < 0) {
> +        error = errno;
> +        VLOG_ERR("%s: failed to bind raw socket (%s)",
> +                 netdev_get_name(netdev_), strerror(error));
> +        goto error;
> +    }
> +
> +    netdev->fd = fd;
> +    return 0;
> +
> +error:
> +    if (fd >= 0) {
> +        close(fd);
> +    }
> +    return error;
> +}
> +
> +static int
>  netdev_linux_recv(struct netdev *netdev_, void *data, size_t size)
>  {
>     struct netdev_linux *netdev = netdev_linux_cast(netdev_);
>
>     if (netdev->fd < 0) {
> -        /* Device was opened with NETDEV_ETH_TYPE_NONE. */
> +        /* Device is not listening. */
>         return -EAGAIN;
>     }
>
> @@ -2254,6 +2260,7 @@ netdev_linux_change_seq(const struct netdev *netdev)
>                                                                 \
>     ENUMERATE,                                                  \
>                                                                 \
> +    netdev_linux_listen,                                        \
>     netdev_linux_recv,                                          \
>     netdev_linux_recv_wait,                                     \
>     netdev_linux_drain,                                         \
> diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
> index 7bb4eac..093a25d 100644
> --- a/lib/netdev-provider.h
> +++ b/lib/netdev-provider.h
> @@ -147,14 +147,8 @@ struct netdev_class {
>                          const struct shash *args);
>
>     /* Attempts to open a network device.  On success, sets 'netdevp'
> -     * to the new network device.
> -     *
> -     * 'ethertype' may be a 16-bit Ethernet protocol value in host byte order
> -     * to capture frames of that type received on the device.  It may also be
> -     * one of the 'enum netdev_pseudo_ethertype' values to receive frames in
> -     * one of those categories. */
> -    int (*open)(struct netdev_dev *netdev_dev, int ethertype,
> -                struct netdev **netdevp);
> +     * to the new network device. */
> +    int (*open)(struct netdev_dev *netdev_dev, struct netdev **netdevp);
>
>     /* Closes 'netdev'. */
>     void (*close)(struct netdev *netdev);
> @@ -168,17 +162,39 @@ struct netdev_class {
>      * If this netdev class does not support enumeration, this may be a null
>      * pointer. */
>     int (*enumerate)(struct sset *all_names);
> +
> +/* ## ----------------- ## */
> +/* ## Receiving Packets ## */
> +/* ## ----------------- ## */
> +
> +/* The network provider interface is mostly used for inspecting and configuring
> + * device "metadata", not for sending and receiving packets directly.  It may
> + * be impractical to implement these functions on some operating systems and
> + * hardware.  These functions may all be NULL in such cases.
> + *
> + * (However, the "dpif-netdev" implementation, which is the easiest way to
> + * integrate Open vSwitch with a new operating system or hardware, does require
> + * the ability to receive packets.) */
> +
> +    /* Attempts to set up 'netdev' for receiving packets with ->recv().
> +     * Returns 0 if successful, otherwise a positive errno value.  Return
> +     * EOPNOTSUPP to indicate that the network device does not implement packet
> +     * reception through this interface.  This function may be set to null if
> +     * it would always return EOPNOTSUPP anyhow.  (This will prevent the
> +     * network device from being usefully used by the netdev-based "userspace
> +     * datapath".)*/
> +    int (*listen)(struct netdev *netdev);
>
>     /* Attempts to receive a packet from 'netdev' into the 'size' bytes in
>      * 'buffer'.  If successful, returns the number of bytes in the received
>      * packet, otherwise a negative errno value.  Returns -EAGAIN immediately
>      * if no packet is ready to be received.
>      *
> -     * May return -EOPNOTSUPP if a network device does not implement packet
> -     * reception through this interface.  This function may be set to null if
> -     * it would always return -EOPNOTSUPP anyhow.  (This will prevent the
> -     * network device from being usefully used by the netdev-based "userspace
> -     * datapath".) */
> +     * This function can only be expected to return a packet if ->listen() has
> +     * been called successfully.
> +     *
> +     * May be null if not needed, such as for a network device that does not
> +     * implement packet reception through the 'recv' member function. */
>     int (*recv)(struct netdev *netdev, void *buffer, size_t size);
>
>     /* Registers with the poll loop to wake up from the next call to
> @@ -194,7 +210,7 @@ struct netdev_class {
>      * May be null if not needed, such as for a network device that does not
>      * implement packet reception through the 'recv' member function. */
>     int (*drain)(struct netdev *netdev);
> -
> +
>     /* Sends the 'size'-byte packet in 'buffer' on 'netdev'.  Returns 0 if
>      * successful, otherwise a positive errno value.  Returns EAGAIN without
>      * blocking if the packet cannot be queued immediately.  Returns EMSGSIZE
> diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
> index b9c1bfe..e3e480d 100644
> --- a/lib/netdev-vport.c
> +++ b/lib/netdev-vport.c
> @@ -258,8 +258,7 @@ netdev_vport_destroy(struct netdev_dev *netdev_dev_)
>  }
>
>  static int
> -netdev_vport_open(struct netdev_dev *netdev_dev_, int ethertype OVS_UNUSED,
> -                struct netdev **netdevp)
> +netdev_vport_open(struct netdev_dev *netdev_dev_, struct netdev **netdevp)
>  {
>     struct netdev_vport *netdev;
>
> @@ -937,6 +936,7 @@ config_equal_ipsec(const struct shash *nd_args, const struct shash *args)
>                                                             \
>     NULL,                       /* enumerate */             \
>                                                             \
> +    NULL,                       /* listen */                \
>     NULL,                       /* recv */                  \
>     NULL,                       /* recv_wait */             \
>     NULL,                       /* drain */                 \
> diff --git a/lib/netdev.c b/lib/netdev.c
> index b8592c1..9954929 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -204,12 +204,8 @@ update_device_args(struct netdev_dev *dev, const struct shash *args)
>  * to the new network device, otherwise to null.
>  *
>  * If this is the first time the device has been opened, then create is called
> - * before opening.  The device is created using the given type and arguments.
> - *
> - * 'ethertype' may be a 16-bit Ethernet protocol value in host byte order to
> - * capture frames of that type received on the device.  It may also be one of
> - * the 'enum netdev_pseudo_ethertype' values to receive frames in one of those
> - * categories. */
> + * before opening.  The device is created using the given type and
> + * arguments. */
>  int
>  netdev_open(struct netdev_options *options, struct netdev **netdevp)
>  {
> @@ -250,8 +246,7 @@ netdev_open(struct netdev_options *options, struct netdev **netdevp)
>         return EINVAL;
>     }
>
> -    error = netdev_dev->netdev_class->open(netdev_dev, options->ethertype,
> -                netdevp);
> +    error = netdev_dev->netdev_class->open(netdev_dev, netdevp);
>
>     if (!error) {
>         netdev_dev->ref_cnt++;
> @@ -271,7 +266,6 @@ netdev_open_default(const char *name, struct netdev **netdevp)
>
>     memset(&options, 0, sizeof options);
>     options.name = name;
> -    options.ethertype = NETDEV_ETH_TYPE_NONE;
>
>     return netdev_open(&options, netdevp);
>  }
> @@ -387,6 +381,19 @@ netdev_enumerate(struct sset *sset)
>     return error;
>  }
>
> +/* Attempts to set up 'netdev' for receiving packets with netdev_recv().
> + * Returns 0 if successful, otherwise a positive errno value.  EOPNOTSUPP
> + * indicates that the network device does not implement packet reception
> + * through this interface. */
> +int
> +netdev_listen(struct netdev *netdev)
> +{
> +    int (*listen)(struct netdev *);
> +
> +    listen = netdev_get_dev(netdev)->netdev_class->listen;
> +    return listen ? (listen)(netdev) : EOPNOTSUPP;
> +}
> +
>  /* Attempts to receive a packet from 'netdev' into 'buffer', which the caller
>  * must have initialized with sufficient room for the packet.  The space
>  * required to receive any packet is ETH_HEADER_LEN bytes, plus VLAN_HEADER_LEN
> @@ -394,6 +401,9 @@ netdev_enumerate(struct sset *sset)
>  * (Some devices do not allow for a VLAN header, in which case VLAN_HEADER_LEN
>  * need not be included.)
>  *
> + * This function can only be expected to return a packet if ->listen() has
> + * been called successfully.
> + *
>  * If a packet is successfully retrieved, returns 0.  In this case 'buffer' is
>  * guaranteed to contain at least ETH_TOTAL_MIN bytes.  Otherwise, returns a
>  * positive errno value.  Returns EAGAIN immediately if no packet is ready to
> diff --git a/lib/netdev.h b/lib/netdev.h
> index ba4a8e3..7e16bd3 100644
> --- a/lib/netdev.h
> +++ b/lib/netdev.h
> @@ -44,12 +44,6 @@ enum netdev_flags {
>     NETDEV_LOOPBACK = 0x0004    /* This is a loopback device. */
>  };
>
> -enum netdev_pseudo_ethertype {
> -    NETDEV_ETH_TYPE_NONE = -128, /* Receive no frames. */
> -    NETDEV_ETH_TYPE_ANY,         /* Receive all frames. */
> -    NETDEV_ETH_TYPE_802_2        /* Receive all IEEE 802.2 frames. */
> -};
> -
>  /* Network device statistics.
>  *
>  * Values of unsupported statistics are set to all-1-bits (UINT64_MAX). */
> @@ -85,7 +79,6 @@ struct netdev_options {
>     const char *name;
>     const char *type;
>     const struct shash *args;
> -    int ethertype;
>  };
>
>  struct netdev;
> @@ -117,6 +110,7 @@ int netdev_get_mtu(const struct netdev *, int *mtup);
>  int netdev_get_ifindex(const struct netdev *);
>
>  /* Packet send and receive. */
> +int netdev_listen(struct netdev *);
>  int netdev_recv(struct netdev *, struct ofpbuf *);
>  void netdev_recv_wait(struct netdev *);
>  int netdev_drain(struct netdev *);
> diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
> index f40f995..8054d05 100644
> --- a/ofproto/ofproto.c
> +++ b/ofproto/ofproto.c
> @@ -1115,7 +1115,6 @@ ofport_open(const struct ofproto_port *ofproto_port, struct ofp_phy_port *opp)
>     memset(&netdev_options, 0, sizeof netdev_options);
>     netdev_options.name = ofproto_port->name;
>     netdev_options.type = ofproto_port->type;
> -    netdev_options.ethertype = NETDEV_ETH_TYPE_NONE;
>
>     error = netdev_open(&netdev_options, &netdev);
>     if (error) {
> diff --git a/utilities/ovs-dpctl.c b/utilities/ovs-dpctl.c
> index 7962c7a..1c31c71 100644
> --- a/utilities/ovs-dpctl.c
> +++ b/utilities/ovs-dpctl.c
> @@ -232,7 +232,6 @@ do_add_if(int argc OVS_UNUSED, char *argv[])
>         options.name = strtok_r(argv[i], ",", &save_ptr);
>         options.type = "system";
>         options.args = &args;
> -        options.ethertype = NETDEV_ETH_TYPE_NONE;
>
>         if (!options.name) {
>             ovs_error(0, "%s is not a valid network device name", argv[i]);
> @@ -384,7 +383,6 @@ show_dpif(struct dpif *dpif)
>             netdev_options.name = dpif_port.name;
>             netdev_options.type = dpif_port.type;
>             netdev_options.args = NULL;
> -            netdev_options.ethertype = NETDEV_ETH_TYPE_NONE;
>             error = netdev_open(&netdev_options, &netdev);
>             if (!error) {
>                 const struct shash_node **nodes;
> diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
> index 9b30791..f43902b 100644
> --- a/vswitchd/bridge.c
> +++ b/vswitchd/bridge.c
> @@ -859,7 +859,6 @@ bridge_add_ofproto_ports(struct bridge *br)
>                 options.name = iface->name;
>                 options.type = iface->type;
>                 options.args = &args;
> -                options.ethertype = NETDEV_ETH_TYPE_NONE;
>                 error = netdev_open(&options, &iface->netdev);
>             } else {
>                 error = netdev_set_config(iface->netdev, &args);
> @@ -925,7 +924,6 @@ bridge_add_ofproto_ports(struct bridge *br)
>                 options.name = port->name;
>                 options.type = "internal";
>                 options.args = NULL;
> -                options.ethertype = NETDEV_ETH_TYPE_NONE;
>                 error = netdev_open(&options, &netdev);
>                 if (!error) {
>                     ofproto_port_add(br->ofproto, netdev, NULL);
> --
> 1.7.4.4
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
>



More information about the dev mailing list