[ovs-dev] [netdevs 2/4] netdev: Clean up and refactor packet receive interface.
Ethan Jackson
ethan at nicira.com
Fri Aug 5 18:39:05 PDT 2011
Looks like a significant code simplification.
Thanks,
Ethan
On Fri, Aug 5, 2011 at 14:42, Ben Pfaff <blp at nicira.com> wrote:
> The Open vSwitch tree only has one user of the ability for a netdev to
> receive packets from a network device. Thus, this commit simplifies the
> common-case use of the netdev interface by replacing the "ethertype" option
> from "struct netdev_options" by a new netdev_listen() call.
>
> The only user of netdev_listen() wants to receive all packets from a
> network device, so this commit also removes the ability to restrict the
> received packets to a particular protocol. (This ability was once used by
> the Open vSwitch integrated DHCP client, but that code has been removed.)
>
> This commit also simplifies and improves the implementation of the code
> in netdev-linux that started listening to a network device. Before, I had
> not figured out how to avoid receiving all packets on all devices before
> binding to a particular device, but I took a closer look at the kernel code
> and figured it out.
>
> I've tested that the userspace datapath (dpif-netdev), the only user of
> netdev_recv(), still works after this change.
> ---
> lib/dpif-netdev.c | 9 ++++-
> lib/netdev-dummy.c | 24 +++++++++--
> lib/netdev-linux.c | 109 ++++++++++++++++++++++++++-----------------------
> lib/netdev-provider.h | 44 +++++++++++++------
> lib/netdev-vport.c | 4 +-
> lib/netdev.c | 28 +++++++++----
> lib/netdev.h | 8 +---
> ofproto/ofproto.c | 1 -
> utilities/ovs-dpctl.c | 2 -
> vswitchd/bridge.c | 2 -
> 10 files changed, 138 insertions(+), 93 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 14b9192..fa6b549 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -350,7 +350,6 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
> /* Open and validate network device. */
> memset(&netdev_options, 0, sizeof netdev_options);
> netdev_options.name = devname;
> - netdev_options.ethertype = NETDEV_ETH_TYPE_ANY;
> if (dp->class == &dpif_dummy_class) {
> netdev_options.type = "dummy";
> } else if (internal) {
> @@ -364,6 +363,14 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
> /* XXX reject loopback devices */
> /* XXX reject non-Ethernet devices */
>
> + error = netdev_listen(netdev);
> + if (error) {
> + VLOG_ERR("%s: cannot receive packets on this network device (%s)",
> + devname, strerror(errno));
> + netdev_close(netdev);
> + return error;
> + }
> +
> error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, false);
> if (error) {
> netdev_close(netdev);
> diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
> index 9cd06f1..15d97cf 100644
> --- a/lib/netdev-dummy.c
> +++ b/lib/netdev-dummy.c
> @@ -1,5 +1,5 @@
> /*
> - * Copyright (c) 2010 Nicira Networks.
> + * Copyright (c) 2010, 2011 Nicira Networks.
> *
> * Licensed under the Apache License, Version 2.0 (the "License");
> * you may not use this file except in compliance with the License.
> @@ -102,8 +102,7 @@ netdev_dummy_destroy(struct netdev_dev *netdev_dev_)
> }
>
> static int
> -netdev_dummy_open(struct netdev_dev *netdev_dev_, int ethertype OVS_UNUSED,
> - struct netdev **netdevp)
> +netdev_dummy_open(struct netdev_dev *netdev_dev_, struct netdev **netdevp)
> {
> struct netdev_dummy *netdev;
>
> @@ -122,6 +121,22 @@ netdev_dummy_close(struct netdev *netdev_)
> }
>
> static int
> +netdev_dummy_listen(struct netdev *netdev_ OVS_UNUSED)
> +{
> + /* It's OK to listen on a dummy device. It just never receives any
> + * packets. */
> + return 0;
> +}
> +
> +static int
> +netdev_dummy_recv(struct netdev *netdev_ OVS_UNUSED,
> + void *buffer OVS_UNUSED, size_t size OVS_UNUSED)
> +{
> + /* A dummy device never receives any packets. */
> + return -EAGAIN;
> +}
> +
> +static int
> netdev_dummy_set_etheraddr(struct netdev *netdev,
> const uint8_t mac[ETH_ADDR_LEN])
> {
> @@ -234,7 +249,8 @@ static const struct netdev_class dummy_class = {
>
> NULL, /* enumerate */
>
> - NULL, /* recv */
> + netdev_dummy_listen, /* listen */
> + netdev_dummy_recv, /* recv */
> NULL, /* recv_wait */
> NULL, /* drain */
>
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index 385c0b8..ac511f0 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -639,8 +639,7 @@ netdev_linux_destroy(struct netdev_dev *netdev_dev_)
> }
>
> static int
> -netdev_linux_open(struct netdev_dev *netdev_dev_, int ethertype,
> - struct netdev **netdevp)
> +netdev_linux_open(struct netdev_dev *netdev_dev_, struct netdev **netdevp)
> {
> struct netdev_dev_linux *netdev_dev = netdev_dev_linux_cast(netdev_dev_);
> struct netdev_linux *netdev;
> @@ -676,54 +675,6 @@ netdev_linux_open(struct netdev_dev *netdev_dev_, int ethertype,
> * directions appearing to be reversed. */
> netdev->fd = netdev_dev->state.tap.fd;
> netdev_dev->state.tap.opened = true;
> - } else if (ethertype != NETDEV_ETH_TYPE_NONE) {
> - struct sockaddr_ll sll;
> - int protocol;
> - int ifindex;
> -
> - /* Create file descriptor. */
> - protocol = (ethertype == NETDEV_ETH_TYPE_ANY ? ETH_P_ALL
> - : ethertype == NETDEV_ETH_TYPE_802_2 ? ETH_P_802_2
> - : ethertype);
> - netdev->fd = socket(PF_PACKET, SOCK_RAW,
> - (OVS_FORCE int) htons(protocol));
> - if (netdev->fd < 0) {
> - error = errno;
> - goto error;
> - }
> -
> - /* Set non-blocking mode. */
> - error = set_nonblocking(netdev->fd);
> - if (error) {
> - goto error;
> - }
> -
> - /* Get ethernet device index. */
> - error = get_ifindex(&netdev->netdev, &ifindex);
> - if (error) {
> - goto error;
> - }
> -
> - /* Bind to specific ethernet device. */
> - memset(&sll, 0, sizeof sll);
> - sll.sll_family = AF_PACKET;
> - sll.sll_ifindex = ifindex;
> - if (bind(netdev->fd,
> - (struct sockaddr *) &sll, sizeof sll) < 0) {
> - error = errno;
> - VLOG_ERR("bind to %s failed: %s", netdev_dev_get_name(netdev_dev_),
> - strerror(error));
> - goto error;
> - }
> -
> - /* Between the socket() and bind() calls above, the socket receives all
> - * packets of the requested type on all system interfaces. We do not
> - * want to receive that data, but there is no way to avoid it. So we
> - * must now drain out the receive queue. */
> - error = drain_rcvbuf(netdev->fd);
> - if (error) {
> - goto error;
> - }
> }
>
> *netdevp = &netdev->netdev;
> @@ -769,12 +720,67 @@ netdev_linux_enumerate(struct sset *sset)
> }
>
> static int
> +netdev_linux_listen(struct netdev *netdev_)
> +{
> + struct netdev_linux *netdev = netdev_linux_cast(netdev_);
> + struct sockaddr_ll sll;
> + int ifindex;
> + int error;
> + int fd;
> +
> + if (netdev->fd >= 0) {
> + return 0;
> + }
> +
> + /* Create file descriptor. */
> + fd = socket(PF_PACKET, SOCK_RAW, 0);
> + if (fd < 0) {
> + error = errno;
> + VLOG_ERR("failed to create raw socket (%s)", strerror(error));
> + goto error;
> + }
> +
> + /* Set non-blocking mode. */
> + error = set_nonblocking(fd);
> + if (error) {
> + goto error;
> + }
> +
> + /* Get ethernet device index. */
> + error = get_ifindex(&netdev->netdev, &ifindex);
> + if (error) {
> + goto error;
> + }
> +
> + /* Bind to specific ethernet device. */
> + memset(&sll, 0, sizeof sll);
> + sll.sll_family = AF_PACKET;
> + sll.sll_ifindex = ifindex;
> + sll.sll_protocol = (OVS_FORCE unsigned short int) htons(ETH_P_ALL);
> + if (bind(fd, (struct sockaddr *) &sll, sizeof sll) < 0) {
> + error = errno;
> + VLOG_ERR("%s: failed to bind raw socket (%s)",
> + netdev_get_name(netdev_), strerror(error));
> + goto error;
> + }
> +
> + netdev->fd = fd;
> + return 0;
> +
> +error:
> + if (fd >= 0) {
> + close(fd);
> + }
> + return error;
> +}
> +
> +static int
> netdev_linux_recv(struct netdev *netdev_, void *data, size_t size)
> {
> struct netdev_linux *netdev = netdev_linux_cast(netdev_);
>
> if (netdev->fd < 0) {
> - /* Device was opened with NETDEV_ETH_TYPE_NONE. */
> + /* Device is not listening. */
> return -EAGAIN;
> }
>
> @@ -2254,6 +2260,7 @@ netdev_linux_change_seq(const struct netdev *netdev)
> \
> ENUMERATE, \
> \
> + netdev_linux_listen, \
> netdev_linux_recv, \
> netdev_linux_recv_wait, \
> netdev_linux_drain, \
> diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
> index 7bb4eac..093a25d 100644
> --- a/lib/netdev-provider.h
> +++ b/lib/netdev-provider.h
> @@ -147,14 +147,8 @@ struct netdev_class {
> const struct shash *args);
>
> /* Attempts to open a network device. On success, sets 'netdevp'
> - * to the new network device.
> - *
> - * 'ethertype' may be a 16-bit Ethernet protocol value in host byte order
> - * to capture frames of that type received on the device. It may also be
> - * one of the 'enum netdev_pseudo_ethertype' values to receive frames in
> - * one of those categories. */
> - int (*open)(struct netdev_dev *netdev_dev, int ethertype,
> - struct netdev **netdevp);
> + * to the new network device. */
> + int (*open)(struct netdev_dev *netdev_dev, struct netdev **netdevp);
>
> /* Closes 'netdev'. */
> void (*close)(struct netdev *netdev);
> @@ -168,17 +162,39 @@ struct netdev_class {
> * If this netdev class does not support enumeration, this may be a null
> * pointer. */
> int (*enumerate)(struct sset *all_names);
> +
> +/* ## ----------------- ## */
> +/* ## Receiving Packets ## */
> +/* ## ----------------- ## */
> +
> +/* The network provider interface is mostly used for inspecting and configuring
> + * device "metadata", not for sending and receiving packets directly. It may
> + * be impractical to implement these functions on some operating systems and
> + * hardware. These functions may all be NULL in such cases.
> + *
> + * (However, the "dpif-netdev" implementation, which is the easiest way to
> + * integrate Open vSwitch with a new operating system or hardware, does require
> + * the ability to receive packets.) */
> +
> + /* Attempts to set up 'netdev' for receiving packets with ->recv().
> + * Returns 0 if successful, otherwise a positive errno value. Return
> + * EOPNOTSUPP to indicate that the network device does not implement packet
> + * reception through this interface. This function may be set to null if
> + * it would always return EOPNOTSUPP anyhow. (This will prevent the
> + * network device from being usefully used by the netdev-based "userspace
> + * datapath".)*/
> + int (*listen)(struct netdev *netdev);
>
> /* Attempts to receive a packet from 'netdev' into the 'size' bytes in
> * 'buffer'. If successful, returns the number of bytes in the received
> * packet, otherwise a negative errno value. Returns -EAGAIN immediately
> * if no packet is ready to be received.
> *
> - * May return -EOPNOTSUPP if a network device does not implement packet
> - * reception through this interface. This function may be set to null if
> - * it would always return -EOPNOTSUPP anyhow. (This will prevent the
> - * network device from being usefully used by the netdev-based "userspace
> - * datapath".) */
> + * This function can only be expected to return a packet if ->listen() has
> + * been called successfully.
> + *
> + * May be null if not needed, such as for a network device that does not
> + * implement packet reception through the 'recv' member function. */
> int (*recv)(struct netdev *netdev, void *buffer, size_t size);
>
> /* Registers with the poll loop to wake up from the next call to
> @@ -194,7 +210,7 @@ struct netdev_class {
> * May be null if not needed, such as for a network device that does not
> * implement packet reception through the 'recv' member function. */
> int (*drain)(struct netdev *netdev);
> -
> +
> /* Sends the 'size'-byte packet in 'buffer' on 'netdev'. Returns 0 if
> * successful, otherwise a positive errno value. Returns EAGAIN without
> * blocking if the packet cannot be queued immediately. Returns EMSGSIZE
> diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
> index b9c1bfe..e3e480d 100644
> --- a/lib/netdev-vport.c
> +++ b/lib/netdev-vport.c
> @@ -258,8 +258,7 @@ netdev_vport_destroy(struct netdev_dev *netdev_dev_)
> }
>
> static int
> -netdev_vport_open(struct netdev_dev *netdev_dev_, int ethertype OVS_UNUSED,
> - struct netdev **netdevp)
> +netdev_vport_open(struct netdev_dev *netdev_dev_, struct netdev **netdevp)
> {
> struct netdev_vport *netdev;
>
> @@ -937,6 +936,7 @@ config_equal_ipsec(const struct shash *nd_args, const struct shash *args)
> \
> NULL, /* enumerate */ \
> \
> + NULL, /* listen */ \
> NULL, /* recv */ \
> NULL, /* recv_wait */ \
> NULL, /* drain */ \
> diff --git a/lib/netdev.c b/lib/netdev.c
> index b8592c1..9954929 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -204,12 +204,8 @@ update_device_args(struct netdev_dev *dev, const struct shash *args)
> * to the new network device, otherwise to null.
> *
> * If this is the first time the device has been opened, then create is called
> - * before opening. The device is created using the given type and arguments.
> - *
> - * 'ethertype' may be a 16-bit Ethernet protocol value in host byte order to
> - * capture frames of that type received on the device. It may also be one of
> - * the 'enum netdev_pseudo_ethertype' values to receive frames in one of those
> - * categories. */
> + * before opening. The device is created using the given type and
> + * arguments. */
> int
> netdev_open(struct netdev_options *options, struct netdev **netdevp)
> {
> @@ -250,8 +246,7 @@ netdev_open(struct netdev_options *options, struct netdev **netdevp)
> return EINVAL;
> }
>
> - error = netdev_dev->netdev_class->open(netdev_dev, options->ethertype,
> - netdevp);
> + error = netdev_dev->netdev_class->open(netdev_dev, netdevp);
>
> if (!error) {
> netdev_dev->ref_cnt++;
> @@ -271,7 +266,6 @@ netdev_open_default(const char *name, struct netdev **netdevp)
>
> memset(&options, 0, sizeof options);
> options.name = name;
> - options.ethertype = NETDEV_ETH_TYPE_NONE;
>
> return netdev_open(&options, netdevp);
> }
> @@ -387,6 +381,19 @@ netdev_enumerate(struct sset *sset)
> return error;
> }
>
> +/* Attempts to set up 'netdev' for receiving packets with netdev_recv().
> + * Returns 0 if successful, otherwise a positive errno value. EOPNOTSUPP
> + * indicates that the network device does not implement packet reception
> + * through this interface. */
> +int
> +netdev_listen(struct netdev *netdev)
> +{
> + int (*listen)(struct netdev *);
> +
> + listen = netdev_get_dev(netdev)->netdev_class->listen;
> + return listen ? (listen)(netdev) : EOPNOTSUPP;
> +}
> +
> /* Attempts to receive a packet from 'netdev' into 'buffer', which the caller
> * must have initialized with sufficient room for the packet. The space
> * required to receive any packet is ETH_HEADER_LEN bytes, plus VLAN_HEADER_LEN
> @@ -394,6 +401,9 @@ netdev_enumerate(struct sset *sset)
> * (Some devices do not allow for a VLAN header, in which case VLAN_HEADER_LEN
> * need not be included.)
> *
> + * This function can only be expected to return a packet if ->listen() has
> + * been called successfully.
> + *
> * If a packet is successfully retrieved, returns 0. In this case 'buffer' is
> * guaranteed to contain at least ETH_TOTAL_MIN bytes. Otherwise, returns a
> * positive errno value. Returns EAGAIN immediately if no packet is ready to
> diff --git a/lib/netdev.h b/lib/netdev.h
> index ba4a8e3..7e16bd3 100644
> --- a/lib/netdev.h
> +++ b/lib/netdev.h
> @@ -44,12 +44,6 @@ enum netdev_flags {
> NETDEV_LOOPBACK = 0x0004 /* This is a loopback device. */
> };
>
> -enum netdev_pseudo_ethertype {
> - NETDEV_ETH_TYPE_NONE = -128, /* Receive no frames. */
> - NETDEV_ETH_TYPE_ANY, /* Receive all frames. */
> - NETDEV_ETH_TYPE_802_2 /* Receive all IEEE 802.2 frames. */
> -};
> -
> /* Network device statistics.
> *
> * Values of unsupported statistics are set to all-1-bits (UINT64_MAX). */
> @@ -85,7 +79,6 @@ struct netdev_options {
> const char *name;
> const char *type;
> const struct shash *args;
> - int ethertype;
> };
>
> struct netdev;
> @@ -117,6 +110,7 @@ int netdev_get_mtu(const struct netdev *, int *mtup);
> int netdev_get_ifindex(const struct netdev *);
>
> /* Packet send and receive. */
> +int netdev_listen(struct netdev *);
> int netdev_recv(struct netdev *, struct ofpbuf *);
> void netdev_recv_wait(struct netdev *);
> int netdev_drain(struct netdev *);
> diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
> index f40f995..8054d05 100644
> --- a/ofproto/ofproto.c
> +++ b/ofproto/ofproto.c
> @@ -1115,7 +1115,6 @@ ofport_open(const struct ofproto_port *ofproto_port, struct ofp_phy_port *opp)
> memset(&netdev_options, 0, sizeof netdev_options);
> netdev_options.name = ofproto_port->name;
> netdev_options.type = ofproto_port->type;
> - netdev_options.ethertype = NETDEV_ETH_TYPE_NONE;
>
> error = netdev_open(&netdev_options, &netdev);
> if (error) {
> diff --git a/utilities/ovs-dpctl.c b/utilities/ovs-dpctl.c
> index 7962c7a..1c31c71 100644
> --- a/utilities/ovs-dpctl.c
> +++ b/utilities/ovs-dpctl.c
> @@ -232,7 +232,6 @@ do_add_if(int argc OVS_UNUSED, char *argv[])
> options.name = strtok_r(argv[i], ",", &save_ptr);
> options.type = "system";
> options.args = &args;
> - options.ethertype = NETDEV_ETH_TYPE_NONE;
>
> if (!options.name) {
> ovs_error(0, "%s is not a valid network device name", argv[i]);
> @@ -384,7 +383,6 @@ show_dpif(struct dpif *dpif)
> netdev_options.name = dpif_port.name;
> netdev_options.type = dpif_port.type;
> netdev_options.args = NULL;
> - netdev_options.ethertype = NETDEV_ETH_TYPE_NONE;
> error = netdev_open(&netdev_options, &netdev);
> if (!error) {
> const struct shash_node **nodes;
> diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
> index 9b30791..f43902b 100644
> --- a/vswitchd/bridge.c
> +++ b/vswitchd/bridge.c
> @@ -859,7 +859,6 @@ bridge_add_ofproto_ports(struct bridge *br)
> options.name = iface->name;
> options.type = iface->type;
> options.args = &args;
> - options.ethertype = NETDEV_ETH_TYPE_NONE;
> error = netdev_open(&options, &iface->netdev);
> } else {
> error = netdev_set_config(iface->netdev, &args);
> @@ -925,7 +924,6 @@ bridge_add_ofproto_ports(struct bridge *br)
> options.name = port->name;
> options.type = "internal";
> options.args = NULL;
> - options.ethertype = NETDEV_ETH_TYPE_NONE;
> error = netdev_open(&options, &netdev);
> if (!error) {
> ofproto_port_add(br->ofproto, netdev, NULL);
> --
> 1.7.4.4
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
>
More information about the dev
mailing list