[ovs-dev] [PATCH] Implement draft VXLAN L2-over-L3 tunneling protocol.

Ben Pfaff blp at nicira.com
Wed Oct 12 16:01:12 PDT 2011


This commit implements the VXLAN tunneling protocol described at
http://tools.ietf.org/html/draft-mahalingam-dutt-dcops-vxlan-00

Multicast support is not yet implemented, but it's in the works.

In my simple VM-based test rig, "netperf" performance was the same as GRE.

Jesse Gross <jesse at nicira.com> really deserves credit for most of
datapath/vport-vxlan.c.  It's mostly a mixed-up and simplified copy of
datapath/vport-capwap.c with a big of search-and-replace.

Justin Pettit <jpettit at nicira.com> implemented the changes to
debian/ovs-monitor-ipsec.  I made only small changes there.  I wrote a unit
test and verified that it passed, but I didn't otherwise test the IPSEC
support.

I build (only) tested this on 2.6.25 (where it does not change anything),
2.6.26 through 2.6.34, inclusive, all on i386, plus 2.6.36 on x86-64.  I
ran netperf tests with VXLAN on 2.6.37 on i386 inside KVM.

Signed-off-by: Ben Pfaff <blp at nicira.com>
Bug #7558.
---
 NEWS                                    |    4 +
 README                                  |    2 +-
 datapath/Modules.mk                     |    3 +-
 datapath/linux/.gitignore               |    1 +
 datapath/tunnel.h                       |    1 +
 datapath/vport-vxlan.c                  |  207 +++++++++++++++++++++++++++++++
 datapath/vport.c                        |    1 +
 datapath/vport.h                        |    1 +
 debian/control                          |    4 +-
 debian/openvswitch-ipsec.init           |    3 +-
 debian/openvswitch-switch.init          |    2 +
 debian/ovs-monitor-ipsec                |  117 +++++++++++-------
 include/openflow/nicira-ext.h           |    9 +-
 include/openvswitch/datapath-protocol.h |    1 +
 lib/netdev-vport.c                      |   53 ++++++---
 rhel/etc_init.d_openvswitch             |    2 +
 tests/ovs-monitor-ipsec.at              |   67 ++++++++++
 vswitchd/vswitch.xml                    |   55 +++++++--
 xenserver/etc_init.d_openvswitch        |    2 +
 19 files changed, 457 insertions(+), 78 deletions(-)
 create mode 100644 datapath/vport-vxlan.c

diff --git a/NEWS b/NEWS
index ff3bc44..540dbff 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,9 @@
 Post-v1.2.0
 ------------------------
+
+    - New support for the experimental VXLAN tunnel protocol (see
+      http://tools.ietf.org/html/draft-mahalingam-dutt-dcops-vxlan-00)
+      and VXLAN over IPSEC.
     - OpenFlow:
       - Added an OpenFlow extension which allows the "output" action to accept
         NXM fields.
diff --git a/README b/README
index dddad3f..8b0289a 100644
--- a/README
+++ b/README
@@ -24,7 +24,7 @@ vSwitch supports the following features:
     * NIC bonding with or without LACP on upstream switch
     * NetFlow, sFlow(R), SPAN, RSPAN, and ERSPAN for increased visibility
     * QoS (Quality of Service) configuration, plus policing
-    * GRE, GRE over IPSEC, and CAPWAP tunneling
+    * GRE, GRE over IPSEC, CAPWAP, VXLAN, and VXLAN over IPSEC tunneling
     * 802.1ag connectivity fault management
     * OpenFlow 1.0 plus numerous extensions
     * Transactional configuration database with C and Python bindings
diff --git a/datapath/Modules.mk b/datapath/Modules.mk
index 087cf44..c59a202 100644
--- a/datapath/Modules.mk
+++ b/datapath/Modules.mk
@@ -25,7 +25,8 @@ openvswitch_sources = \
 	vport-gre.c \
 	vport-internal_dev.c \
 	vport-netdev.c \
-	vport-patch.c
+	vport-patch.c \
+	vport-vxlan.c
 
 openvswitch_headers = \
 	actions.h \
diff --git a/datapath/linux/.gitignore b/datapath/linux/.gitignore
index 0aee746..37cb9ff 100644
--- a/datapath/linux/.gitignore
+++ b/datapath/linux/.gitignore
@@ -38,4 +38,5 @@
 /vport-internal_dev.c
 /vport-netdev.c
 /vport-patch.c
+/vport-vxlan.c
 /vport.c
diff --git a/datapath/tunnel.h b/datapath/tunnel.h
index e7bafbc..59ad47e 100644
--- a/datapath/tunnel.h
+++ b/datapath/tunnel.h
@@ -29,6 +29,7 @@
  */
 #define TNL_T_PROTO_GRE		0
 #define TNL_T_PROTO_CAPWAP	1
+#define TNL_T_PROTO_VXLAN	2
 
 /* These flags are only needed when calling tnl_find_port(). */
 #define TNL_T_KEY_EXACT		(1 << 10)
diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c
new file mode 100644
index 0000000..a6e5439
--- /dev/null
+++ b/datapath/vport-vxlan.c
@@ -0,0 +1,207 @@
+ /*
+ * Copyright (c) 2011 Nicira Networks.
+ * Distributed under the terms of the GNU GPL version 2.
+ *
+ * Significant portions of this file may be copied from parts of the Linux
+ * kernel, by Linus Torvalds and others.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
+
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/net.h>
+#include <linux/udp.h>
+
+#include <net/icmp.h>
+
+#include "tunnel.h"
+#include "vport.h"
+#include "vport-generic.h"
+
+#define VXLAN_DST_PORT 49170
+#define VXLAN_IPSEC_SRC_PORT 49171
+
+#define VXLAN_FLAGS 0x08000000	/* struct vxlanhdr.vx_flags required value. */
+
+/**
+ * struct vxlanhdr - VXLAN header
+ * @vx_flags: Must have the exact value %VXLAN_FLAGS.
+ * @vx_vni: VXLAN Network Identifier (VNI) in top 24 bits, low 8 bits zeroed.
+ */
+struct vxlanhdr {
+	__be32 vx_flags;
+	__be32 vx_vni;
+};
+
+static struct vxlanhdr *vxlan_hdr(const struct sk_buff *skb)
+{
+	return (struct vxlanhdr *)(udp_hdr(skb) + 1);
+}
+
+#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
+
+static struct socket *vxlan_rcv_socket;
+
+static int vxlan_hdr_len(const struct tnl_mutable_config *mutable)
+{
+	return VXLAN_HLEN;
+}
+
+static __be16 get_src_port(const struct sk_buff *skb,
+                           const struct tnl_mutable_config *mutable)
+{
+        if (mutable->flags & TNL_F_IPSEC)
+                return htons(VXLAN_IPSEC_SRC_PORT);
+
+        /* Convert hash into a port between 32768 and 65535. */
+        return (__force __be16)OVS_CB(skb)->flow->hash | htons(32768);
+}
+static void vxlan_build_header(const struct vport *vport,
+			       const struct tnl_mutable_config *mutable,
+			       void *header)
+{
+	struct udphdr *udph = header;
+	struct vxlanhdr *vxh = (struct vxlanhdr *)(udph + 1);
+
+	udph->dest = htons(VXLAN_DST_PORT);
+	udph->check = 0;
+
+	vxh->vx_flags = htonl(VXLAN_FLAGS);
+	vxh->vx_vni = htonl(be64_to_cpu(mutable->out_key) << 8);
+}
+
+static struct sk_buff *vxlan_update_header(const struct vport *vport,
+					   const struct tnl_mutable_config *mutable,
+					   struct dst_entry *dst,
+					   struct sk_buff *skb)
+{
+	struct udphdr *udph = udp_hdr(skb);
+	struct vxlanhdr *vxh = (struct vxlanhdr *)(udph + 1);
+
+	if (mutable->flags & TNL_F_OUT_KEY_ACTION)
+		vxh->vx_vni = htonl(be64_to_cpu(OVS_CB(skb)->tun_id) << 8);
+
+	udph->source = get_src_port(skb, mutable);
+	udph->len = htons(skb->len - skb_transport_offset(skb));
+
+	return skb;
+}
+
+/* Called with rcu_read_lock and BH disabled. */
+static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	struct vport *vport;
+	struct vxlanhdr *vxh;
+	const struct tnl_mutable_config *mutable;
+	struct iphdr *iph;
+	__be64 key;
+
+	if (unlikely(!pskb_may_pull(skb, VXLAN_HLEN + ETH_HLEN)))
+		goto error;
+
+	vxh = vxlan_hdr(skb);
+	if (unlikely(vxh->vx_flags != htonl(VXLAN_FLAGS) ||
+		     vxh->vx_vni & htonl(0xff)))
+		goto error;
+
+	__skb_pull(skb, VXLAN_HLEN);
+	skb_postpull_rcsum(skb, skb_transport_header(skb), VXLAN_HLEN + ETH_HLEN);
+
+	key = cpu_to_be64(ntohl(vxh->vx_vni) >> 8);
+
+	iph = ip_hdr(skb);
+	vport = tnl_find_port(iph->daddr, iph->saddr, key,
+			      TNL_T_PROTO_VXLAN | TNL_T_KEY_EITHER, &mutable);
+	if (unlikely(!vport)) {
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+		goto error;
+	}
+
+	if (mutable->flags & TNL_F_IN_KEY_MATCH)
+		OVS_CB(skb)->tun_id = key;
+	else
+		OVS_CB(skb)->tun_id = 0;
+
+	tnl_rcv(vport, skb, iph->tos);
+	goto out;
+
+error:
+	kfree_skb(skb);
+out:
+	return 0;
+}
+
+static const struct tnl_ops vxlan_tnl_ops = {
+	.tunnel_type	= TNL_T_PROTO_VXLAN,
+	.ipproto	= IPPROTO_UDP,
+	.hdr_len	= vxlan_hdr_len,
+	.build_header	= vxlan_build_header,
+	.update_header	= vxlan_update_header,
+};
+
+static struct vport *vxlan_create(const struct vport_parms *parms)
+{
+	return tnl_create(parms, &vxlan_vport_ops, &vxlan_tnl_ops);
+}
+
+/* Random value.  Irrelevant as long as it's not 0 since we set the handler. */
+#define UDP_ENCAP_VXLAN 10
+static int vxlan_init(void)
+{
+	int err;
+	struct sockaddr_in sin;
+
+	err = sock_create(AF_INET, SOCK_DGRAM, 0, &vxlan_rcv_socket);
+	if (err)
+		goto error;
+
+	sin.sin_family = AF_INET;
+	sin.sin_addr.s_addr = htonl(INADDR_ANY);
+	sin.sin_port = htons(VXLAN_DST_PORT);
+
+	err = kernel_bind(vxlan_rcv_socket, (struct sockaddr *)&sin,
+			  sizeof(struct sockaddr_in));
+	if (err)
+		goto error_sock;
+
+	udp_sk(vxlan_rcv_socket->sk)->encap_type = UDP_ENCAP_VXLAN;
+	udp_sk(vxlan_rcv_socket->sk)->encap_rcv = vxlan_rcv;
+
+	return 0;
+
+error_sock:
+	sock_release(vxlan_rcv_socket);
+error:
+	pr_warn("cannot register vxlan protocol handler\n");
+	return err;
+}
+
+static void vxlan_exit(void)
+{
+	sock_release(vxlan_rcv_socket);
+}
+
+const struct vport_ops vxlan_vport_ops = {
+	.type		= OVS_VPORT_TYPE_VXLAN,
+	.flags		= VPORT_F_TUN_ID,
+	.init		= vxlan_init,
+	.exit		= vxlan_exit,
+	.create		= vxlan_create,
+	.destroy	= tnl_destroy,
+	.set_addr	= tnl_set_addr,
+	.get_name	= tnl_get_name,
+	.get_addr	= tnl_get_addr,
+	.get_options	= tnl_get_options,
+	.set_options	= tnl_set_options,
+	.get_dev_flags	= vport_gen_get_dev_flags,
+	.is_running	= vport_gen_is_running,
+	.get_operstate	= vport_gen_get_operstate,
+	.send		= tnl_send,
+};
+#else
+#warning VXLAN tunneling will not be available on kernels before 2.6.26
+#endif /* Linux kernel < 2.6.26 */
diff --git a/datapath/vport.c b/datapath/vport.c
index ad5a10e..d577639 100644
--- a/datapath/vport.c
+++ b/datapath/vport.c
@@ -31,6 +31,7 @@ static const struct vport_ops *base_vport_ops_list[] = {
 	&gre_vport_ops,
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
 	&capwap_vport_ops,
+	&vxlan_vport_ops,
 #endif
 };
 
diff --git a/datapath/vport.h b/datapath/vport.h
index b6b94e0..34a3d0e 100644
--- a/datapath/vport.h
+++ b/datapath/vport.h
@@ -252,5 +252,6 @@ extern const struct vport_ops internal_vport_ops;
 extern const struct vport_ops patch_vport_ops;
 extern const struct vport_ops gre_vport_ops;
 extern const struct vport_ops capwap_vport_ops;
+extern const struct vport_ops vxlan_vport_ops;
 
 #endif /* vport.h */
diff --git a/debian/control b/debian/control
index 1f3387a..4c23e59 100644
--- a/debian/control
+++ b/debian/control
@@ -60,9 +60,9 @@ Depends:
  openvswitch-common (= ${binary:Version}),
  openvswitch-switch (= ${binary:Version}),
  python-openvswitch (= ${source:Version})
-Description: Open vSwitch GRE-over-IPsec support
+Description: Open vSwitch support for GRE and VXLAN over ISPEC
  The ovs-monitor-ipsec script provides support for encrypting GRE
- tunnels with IPsec.
+ and VXLAN tunnels with IPsec.
  .
  Open vSwitch is a full-featured software-based Ethernet switch.
 
diff --git a/debian/openvswitch-ipsec.init b/debian/openvswitch-ipsec.init
index 17835a5..bb9a5bd 100755
--- a/debian/openvswitch-ipsec.init
+++ b/debian/openvswitch-ipsec.init
@@ -1,5 +1,6 @@
 #!/bin/sh
 #
+# Copyright (c) 2011 Nicira Networks
 # Copyright (c) 2007, 2009 Javier Fernandez-Sanguino <jfs at debian.org>
 #
 # This is free software; you may redistribute it and/or modify
@@ -23,7 +24,7 @@
 # Required-Stop:     $remote_fs
 # Default-Start:     2 3 4 5
 # Default-Stop:      0 1 6
-# Short-Description: Open vSwitch GRE-over-IPsec daemon
+# Short-Description: Open vSwitch IPsec tunnel daemon
 ### END INIT INFO
 
 PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
diff --git a/debian/openvswitch-switch.init b/debian/openvswitch-switch.init
index 3d187a0..d9e57ca 100755
--- a/debian/openvswitch-switch.init
+++ b/debian/openvswitch-switch.init
@@ -63,7 +63,9 @@ start () {
     fi
     "$@" || exit $?
 
+    # Allow tunnel traffic.
     ovs_ctl --protocol=gre enable-protocol
+    ovs_ctl --protocol=udp --dport=49170 enable-protocol
 }
 
 stop () {
diff --git a/debian/ovs-monitor-ipsec b/debian/ovs-monitor-ipsec
index ac2cd7e..19abe1a 100755
--- a/debian/ovs-monitor-ipsec
+++ b/debian/ovs-monitor-ipsec
@@ -14,9 +14,10 @@
 # limitations under the License.
 
 
-# A daemon to monitor attempts to create GRE-over-IPsec tunnels.
-# Uses racoon and setkey to support the configuration.  Assumes that
-# OVS has complete control over IPsec configuration for the box.
+# A daemon to monitor attempts to create tunnels over IPsec.
+# Racoon and setkey are used to support the configuration.  It is
+# assumed that OVS has complete control over IPsec configuration for
+# the box.
 
 # xxx To-do:
 #  - Doesn't actually check that Interface is connected to bridge
@@ -41,7 +42,12 @@ import ovs.vlog
 
 vlog = ovs.vlog.Vlog("ovs-monitor-ipsec")
 root_prefix = ''                # Prefix for absolute file names, for testing.
-setkey = "/usr/sbin/setkey"
+SETKEY = "/usr/sbin/setkey"
+
+# UDP ports used for VXLAN.  The source port is only fixed for
+# VXLAN-over-IPsec traffic.
+VXLAN_DST_PORT = 49170
+VXLAN_SRC_PORT = 49171
 
 
 # Class to configure the racoon daemon, which handles IKE negotiation
@@ -251,17 +257,17 @@ path certificate "%s";
 
 # Class to configure IPsec on a system using racoon for IKE and setkey
 # for maintaining the Security Association Database (SAD) and Security
-# Policy Database (SPD).  Only policies for GRE are supported.
+# Policy Database (SPD).  Only policies for GRE and VXLAN are supported.
 class IPsec:
     def __init__(self):
         self.sad_flush()
         self.spd_flush()
         self.racoon = Racoon()
-        self.entries = []
+        self.entries = {}
 
     def call_setkey(self, cmds):
         try:
-            p = subprocess.Popen([root_prefix + setkey, "-c"],
+            p = subprocess.Popen([root_prefix + SETKEY, "-c"],
                                  stdin=subprocess.PIPE,
                                  stdout=subprocess.PIPE)
         except:
@@ -317,26 +323,44 @@ class IPsec:
         self.call_setkey("spdflush;\n")
 
     def spd_add(self, local_ip, remote_ip):
-        cmds = ("spdadd %s %s gre -P out ipsec esp/transport//require;\n" %
-                    (local_ip, remote_ip))
-        cmds += ("spdadd %s %s gre -P in ipsec esp/transport//require;\n" %
-                    (remote_ip, local_ip))
+        tunnel_type = self.entries[remote_ip]
+        if tunnel_type == "vxlan":
+            cmds = ("spdadd %s[any] %s[any] udp -P out ipsec esp/transport/%s[%s]-%s[%s]/require;\n"
+                        % (local_ip,  remote_ip, local_ip, VXLAN_SRC_PORT,
+                           remote_ip, VXLAN_DST_PORT))
+            cmds += ("spdadd %s[any] %s[any] udp -P in ipsec esp/transport/%s[%s]-%s[%s]/require;\n"
+                        % (remote_ip, local_ip, remote_ip, VXLAN_DST_PORT,
+                           local_ip, VXLAN_SRC_PORT))
+        else:
+            cmds = ("spdadd %s %s gre -P out ipsec esp/transport//require;\n" %
+                        (local_ip, remote_ip))
+            cmds += ("spdadd %s %s gre -P in ipsec esp/transport//require;\n" %
+                        (remote_ip, local_ip))
         self.call_setkey(cmds)
 
     def spd_del(self, local_ip, remote_ip):
-        cmds = "spddelete %s %s gre -P out;\n" % (local_ip, remote_ip)
-        cmds += "spddelete %s %s gre -P in;\n" % (remote_ip, local_ip)
+        tunnel_type = self.entries[remote_ip]
+        if tunnel_type == "vxlan":
+            cmds = ("spddelete %s %s udp -P out;\n" % (local_ip, remote_ip))
+            cmds += ("spddelete %s %s udp -P in;\n" % (remote_ip, local_ip))
+        else:
+            cmds = "spddelete %s %s gre -P out;\n" % (local_ip, remote_ip)
+            cmds += "spddelete %s %s gre -P in;\n" % (remote_ip, local_ip)
         self.call_setkey(cmds)
 
     def add_entry(self, local_ip, remote_ip, vals):
+        tunnel_type = vals["tunnel_type"]
+        if tunnel_type not in ("gre", "vxlan"):
+            raise error.Error("unknown tunnel type: %s" % tunnel_type)
+
         if remote_ip in self.entries:
             raise error.Error("host %s already configured for ipsec"
                               % remote_ip)
 
         self.racoon.add_entry(remote_ip, vals)
-        self.spd_add(local_ip, remote_ip)
 
-        self.entries.append(remote_ip)
+        self.entries[remote_ip] = tunnel_type
+        self.spd_add(local_ip, remote_ip)
 
     def del_entry(self, local_ip, remote_ip):
         if remote_ip in self.entries:
@@ -344,7 +368,7 @@ class IPsec:
             self.spd_del(local_ip, remote_ip)
             self.sad_del(local_ip, remote_ip)
 
-            self.entries.remove(remote_ip)
+            del self.entries[remote_ip]
 
 
 def keep_table_columns(schema, table_name, column_types):
@@ -463,36 +487,43 @@ def main():
         new_interfaces = {}
         for rec in idl.tables["Interface"].rows.itervalues():
             if rec.type == "ipsec_gre":
-                name = rec.name
-                options = rec.options
-                entry = {
-                    "remote_ip": options.get("remote_ip"),
-                    "local_ip": options.get("local_ip", "0.0.0.0/0"),
-                    "certificate": options.get("certificate"),
-                    "private_key": options.get("private_key"),
-                    "use_ssl_cert": options.get("use_ssl_cert"),
-                    "peer_cert": options.get("peer_cert"),
-                    "psk": options.get("psk")}
-
-                if entry["peer_cert"] and entry["psk"]:
-                    vlog.warn("both 'peer_cert' and 'psk' defined for %s"
-                              % name)
-                    continue
-                elif not entry["peer_cert"] and not entry["psk"]:
-                    vlog.warn("no 'peer_cert' or 'psk' defined for %s" % name)
-                    continue
+                tunnel_type = "gre"
+            elif rec.type == "ipsec_vxlan":
+                tunnel_type = "vxlan"
+            else:
+                continue
+
+            name = rec.name
+            options = rec.options
+            entry = {
+                "remote_ip": options.get("remote_ip"),
+                "local_ip": options.get("local_ip", "0.0.0.0/0"),
+                "certificate": options.get("certificate"),
+                "private_key": options.get("private_key"),
+                "use_ssl_cert": options.get("use_ssl_cert"),
+                "peer_cert": options.get("peer_cert"),
+                "psk": options.get("psk"),
+                "tunnel_type": tunnel_type }
+
+            if entry["peer_cert"] and entry["psk"]:
+                vlog.warn("both 'peer_cert' and 'psk' defined for %s"
+                        % name)
+                continue
+            elif not entry["peer_cert"] and not entry["psk"]:
+                vlog.warn("no 'peer_cert' or 'psk' defined for %s" % name)
+                continue
 
-                # The "use_ssl_cert" option is deprecated and will
-                # likely go away in the near future.
-                if entry["use_ssl_cert"] == "true":
-                    if not ssl_cert:
-                        vlog.warn("no valid SSL entry for %s" % name)
-                        continue
+            # The "use_ssl_cert" option is deprecated and will
+            # likely go away in the near future.
+            if entry["use_ssl_cert"] == "true":
+                if not ssl_cert:
+                    vlog.warn("no valid SSL entry for %s" % name)
+                    continue
 
-                    entry["certificate"] = ssl_cert[0]
-                    entry["private_key"] = ssl_cert[1]
+                entry["certificate"] = ssl_cert[0]
+                entry["private_key"] = ssl_cert[1]
 
-                new_interfaces[name] = entry
+            new_interfaces[name] = entry
 
         if interfaces != new_interfaces:
             update_ipsec(ipsec, interfaces, new_interfaces)
diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h
index 51f21f0..67b2717 100644
--- a/include/openflow/nicira-ext.h
+++ b/include/openflow/nicira-ext.h
@@ -1444,9 +1444,12 @@ OFP_ASSERT(sizeof(struct nx_action_output_reg) == 24);
 
 /* Tunnel ID.
  *
- * For a packet received via GRE tunnel including a (32-bit) key, the key is
- * stored in the low 32-bits and the high bits are zeroed.  For other packets,
- * the value is 0.
+ * The key, for a packet received via a keyed tunnel.  If the key is less than
+ * 64 bits wide, this field holds the in its low-order bits and higher bits are
+ * zeroed.  If the key is more than 64 bits wide, this field contains the 64
+ * lowest-order bits.
+ *
+ * All zero bits, for packets not received via a keyed tunnel.
  *
  * Prereqs: None.
  *
diff --git a/include/openvswitch/datapath-protocol.h b/include/openvswitch/datapath-protocol.h
index 6c89411..58db169 100644
--- a/include/openvswitch/datapath-protocol.h
+++ b/include/openvswitch/datapath-protocol.h
@@ -200,6 +200,7 @@ enum ovs_vport_type {
 	OVS_VPORT_TYPE_PATCH,    /* virtual tunnel connecting two vports */
 	OVS_VPORT_TYPE_GRE,      /* GRE tunnel */
 	OVS_VPORT_TYPE_CAPWAP,   /* CAPWAP tunnel */
+	OVS_VPORT_TYPE_VXLAN,    /* VXLAN tunnel */
 	__OVS_VPORT_TYPE_MAX
 };
 
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index 06ec8fb..56216f2 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -134,11 +134,27 @@ netdev_vport_get_vport_type(const struct netdev *netdev)
             : OVS_VPORT_TYPE_UNSPEC);
 }
 
-const char *
-netdev_vport_get_netdev_type(const struct dpif_linux_vport *vport)
+static const char *
+get_maybe_ipsec_tunnel_type(const struct dpif_linux_vport *vport,
+                            const char *plain_type, const char *ipsec_type)
 {
     struct nlattr *a[OVS_TUNNEL_ATTR_MAX + 1];
+    uint32_t flags;
+
+    if (tnl_port_config_from_nlattr(vport->options, vport->options_len, a)) {
+        VLOG_WARN_RL(&rl, "dp%d: cannot parse options for port `%s' (type %u)",
+                     vport->dp_ifindex, vport->name,
+                     (unsigned int) vport->type);
+        return "unknown";
+    }
 
+    flags = nl_attr_get_u32(a[OVS_TUNNEL_ATTR_FLAGS]);
+    return flags & TNL_F_IPSEC ? ipsec_type : plain_type;
+}
+
+const char *
+netdev_vport_get_netdev_type(const struct dpif_linux_vport *vport)
+{
     switch (vport->type) {
     case OVS_VPORT_TYPE_UNSPEC:
         break;
@@ -153,16 +169,14 @@ netdev_vport_get_netdev_type(const struct dpif_linux_vport *vport)
         return "patch";
 
     case OVS_VPORT_TYPE_GRE:
-        if (tnl_port_config_from_nlattr(vport->options, vport->options_len,
-                                        a)) {
-            break;
-        }
-        return (nl_attr_get_u32(a[OVS_TUNNEL_ATTR_FLAGS]) & TNL_F_IPSEC
-                ? "ipsec_gre" : "gre");
+        return get_maybe_ipsec_tunnel_type(vport, "gre", "ipsec_gre");
 
     case OVS_VPORT_TYPE_CAPWAP:
         return "capwap";
 
+    case OVS_VPORT_TYPE_VXLAN:
+        return get_maybe_ipsec_tunnel_type(vport, "vxlan", "ipsec_vxlan");
+
     case __OVS_VPORT_TYPE_MAX:
         break;
     }
@@ -566,19 +580,18 @@ static int
 parse_tunnel_config(const char *name, const char *type,
                     const struct shash *args, struct ofpbuf *options)
 {
-    bool is_gre = false;
-    bool is_ipsec = false;
+    bool supports_csum;
+    bool is_ipsec;
     struct shash_node *node;
     bool ipsec_mech_set = false;
     ovs_be32 daddr = htonl(0);
     uint32_t flags;
 
+    supports_csum = !strcmp(type, "gre") || !strcmp(type, "ipsec_gre");
+    is_ipsec = !strncmp(type, "ipsec_", 6);
+
     flags = TNL_F_DF_DEFAULT | TNL_F_PMTUD | TNL_F_HDR_CACHE;
-    if (!strcmp(type, "gre")) {
-        is_gre = true;
-    } else if (!strcmp(type, "ipsec_gre")) {
-        is_gre = true;
-        is_ipsec = true;
+    if (is_ipsec) {
         flags |= TNL_F_IPSEC;
         flags &= ~TNL_F_HDR_CACHE;
     }
@@ -611,7 +624,7 @@ parse_tunnel_config(const char *name, const char *type,
             } else {
                 nl_msg_put_u8(options, OVS_TUNNEL_ATTR_TTL, atoi(node->data));
             }
-        } else if (!strcmp(node->name, "csum") && is_gre) {
+        } else if (!strcmp(node->name, "csum") && supports_csum) {
             if (!strcmp(node->data, "true")) {
                 flags |= TNL_F_CSUM;
             }
@@ -947,6 +960,14 @@ netdev_vport_register(void)
           { "capwap", VPORT_FUNCTIONS(netdev_vport_get_status) },
           parse_tunnel_config, unparse_tunnel_config },
 
+        { OVS_VPORT_TYPE_VXLAN,
+          { "vxlan", VPORT_FUNCTIONS(netdev_vport_get_status) },
+          parse_tunnel_config, unparse_tunnel_config },
+
+        { OVS_VPORT_TYPE_VXLAN,
+          { "vxlan_ipsec", VPORT_FUNCTIONS(netdev_vport_get_status) },
+          parse_tunnel_config, unparse_tunnel_config },
+
         { OVS_VPORT_TYPE_PATCH,
           { "patch", VPORT_FUNCTIONS(NULL) },
           parse_patch_config, unparse_patch_config }
diff --git a/rhel/etc_init.d_openvswitch b/rhel/etc_init.d_openvswitch
index 5501d18..113d99a 100755
--- a/rhel/etc_init.d_openvswitch
+++ b/rhel/etc_init.d_openvswitch
@@ -47,7 +47,9 @@ start () {
     fi
     "$@"
 
+    # Allow tunnel traffic.
     $ovs_ctl --protocol=gre enable-protocol
+    $ovs_ctl --protocol=udp --dport=49170 enable-protocol
 
     touch /var/lock/subsys/openvswitch
 }
diff --git a/tests/ovs-monitor-ipsec.at b/tests/ovs-monitor-ipsec.at
index f9868e7..19a834a 100644
--- a/tests/ovs-monitor-ipsec.at
+++ b/tests/ovs-monitor-ipsec.at
@@ -308,4 +308,71 @@ sainfo anonymous {
 ])
 AT_CHECK([test ! -f etc/racoon/certs/ovs-3.4.5.6.pem])
 
+###
+### Add an ipsec_vxlan psk interface and check what ovs-monitor-ipsec does
+###
+AT_CHECK([ovs_vsctl \
+              -- add-port br0 vxlan0 \
+              -- set interface vxlan0 type=ipsec_vxlan \
+                                      options:remote_ip=4.5.6.7 \
+                                      options:psk=mishmash])
+OVS_WAIT_UNTIL([test -f actions && grep 'spdadd 4.5.6.7' actions >/dev/null])
+AT_CHECK([sed '1,41d' actions], [0],
+[[racoon: reload
+setkey:
+> spdadd 0.0.0.0/0[any] 4.5.6.7[any] udp -P out ipsec esp/transport/0.0.0.0/0[49171]-4.5.6.7[49170]/require;
+> spdadd 4.5.6.7[any] 0.0.0.0/0[any] udp -P in ipsec esp/transport/4.5.6.7[49170]-0.0.0.0/0[49171]/require;
+]])
+AT_CHECK([trim etc/racoon/psk.txt], [0], [4.5.6.7   mishmash
+])
+AT_CHECK([trim etc/racoon/racoon.conf], [0], [dnl
+path pre_shared_key "/etc/racoon/psk.txt";
+path certificate "/etc/racoon/certs";
+remote 4.5.6.7 {
+        exchange_mode main;
+        nat_traversal on;
+        proposal {
+                encryption_algorithm aes;
+                hash_algorithm sha1;
+                authentication_method pre_shared_key;
+                dh_group 2;
+        }
+}
+sainfo anonymous {
+        pfs_group 2;
+        lifetime time 1 hour;
+        encryption_algorithm aes;
+        authentication_algorithm hmac_sha1, hmac_md5;
+        compression_algorithm deflate;
+}
+])
+
+###
+### Delete the ipsec_vxlan interface and check what ovs-monitor-ipsec does
+###
+AT_CHECK([ovs_vsctl del-port vxlan0])
+OVS_WAIT_UNTIL([test `wc -l < actions` -ge 17])
+AT_CHECK([sed '1,45d' actions], [0], [dnl
+racoon: reload
+setkey:
+> spddelete 0.0.0.0/0 4.5.6.7 udp -P out;
+> spddelete 4.5.6.7 0.0.0.0/0 udp -P in;
+setkey:
+> dump ;
+setkey:
+> dump ;
+])
+AT_CHECK([trim etc/racoon/psk.txt], [0], [])
+AT_CHECK([trim etc/racoon/racoon.conf], [0], [dnl
+path pre_shared_key "/etc/racoon/psk.txt";
+path certificate "/etc/racoon/certs";
+sainfo anonymous {
+        pfs_group 2;
+        lifetime time 1 hour;
+        encryption_algorithm aes;
+        authentication_algorithm hmac_sha1, hmac_md5;
+        compression_algorithm deflate;
+}
+])
+
 AT_CLEANUP
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index d579b87..ea50733 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -916,8 +916,7 @@
           <dt><code>gre</code></dt>
           <dd>
             An Ethernet over RFC 2890 Generic Routing Encapsulation over IPv4
-            tunnel.  See <ref group="Tunnel Options"/> for information on
-            configuring GRE tunnels.
+            tunnel.
           </dd>
 
           <dt><code>ipsec_gre</code></dt>
@@ -936,6 +935,28 @@
             with the Linux kernel datapath with kernel version 2.6.26 or later.
           </dd>
 
+          <dt><code>vxlan</code></dt>
+          <dd>
+	    <p>
+	      An Ethernet tunnel over the experimental, UDP-based VXLAN
+	      protocol described at
+	      <code>http://tools.ietf.org/html/draft-mahalingam-dutt-dcops-vxlan-00</code>.
+	      VXLAN is currently supported only with the Linux kernel datapath
+	      with kernel version 2.6.26 or later.
+	    </p>
+	    <p>
+	      As an experimental protocol, VXLAN has no officially assigned UDP
+	      port.  Open vSwitch currently uses UDP destination port 49170.
+	      The source port used for VXLAN traffic varies on a per-flow basis
+	      between 32768 and 65535 to allow load balancing.
+	    </p>
+          </dd>
+
+	  <dt><code>ipsec_vxlan</code></dt>
+	  <dd>
+	    VXLAN over an IPSEC tunnel.
+	  </dd>
+
           <dt><code>patch</code></dt>
           <dd>
             A pair of virtual devices that act as a patch cable.
@@ -950,7 +971,8 @@
     <group title="Tunnel Options">
       <p>
         These options apply to interfaces with <ref column="type"/> of
-        <code>gre</code>, <code>ipsec_gre</code>, and <code>capwap</code>.
+        <code>gre</code>, <code>ipsec_gre</code>, <code>capwap</code>, and
+        <code>vxlan</code>.
       </p>
 
       <p>
@@ -983,8 +1005,9 @@
             key="in_key"/> at all.
           </li>
           <li>
-            A positive 32-bit (for GRE) or 64-bit (for CAPWAP) number.  The
-            tunnel receives only packets with the specified key.
+            A positive 24-bit (for VXLAN), 32-bit (for GRE) or 64-bit (for
+            CAPWAP) number.  The tunnel receives only packets with the
+            specified key.
           </li>
           <li>
             The word <code>flow</code>.  The tunnel accepts packets with any
@@ -1009,8 +1032,9 @@
             key="out_key"/> at all.
           </li>
           <li>
-            A positive 32-bit (for GRE) or 64-bit (for CAPWAP) number.  Packets
-            sent through the tunnel will have the specified key.
+            A positive 24-bit (for VXLAN), 32-bit (for GRE) or 64-bit (for
+            CAPWAP) number.  Packets sent through the tunnel will have the
+            specified key.
           </li>
           <li>
             The word <code>flow</code>.  Packets sent through the tunnel will
@@ -1068,9 +1092,10 @@
         enabled; set to <code>false</code> to disable.
       </column>
 
-      <group title="Tunnel Options: gre only">
+      <group title="Tunnel Options: gre and vxlan only">
         <p>
-          Only <code>gre</code> interfaces support these options.
+          Only <code>gre</code> and <code>vxlan</code> interfaces support these
+          options.
         </p>
 
         <column name="options" key="header_cache" type='{"type": "boolean"}'>
@@ -1112,11 +1137,19 @@
         </column>
       </group>
 
-      <group title="Tunnel Options: ipsec_gre only">
+      <group title="Tunnel Options: ipsec_gre and ipsec_vxlan only">
         <p>
-          Only <code>ipsec_gre</code> interfaces support these options.
+          Only <code>ipsec_gre</code> and <code>ipsec_vxlan</code> interfaces
+          support these options.
         </p>
 
+	<p>
+	  These options are implemented through a separate daemon named
+	  <code>ovs-monitor-ipsec</code> that so far has only been ported to
+	  and packaged for Debian (including derivative distributions such as
+	  Ubuntu).
+	</p>
+
         <column name="options" key="peer_cert">
           Required for certificate authentication.  A string containing the
           peer's certificate in PEM format.  Additionally the host's
diff --git a/xenserver/etc_init.d_openvswitch b/xenserver/etc_init.d_openvswitch
index 8ba8aee..75d8391 100755
--- a/xenserver/etc_init.d_openvswitch
+++ b/xenserver/etc_init.d_openvswitch
@@ -76,7 +76,9 @@ start () {
             --log-file --pidfile --detach --monitor unix:/var/run/openvswitch/db.sock
     fi
 
+    # Allow tunnel traffic.
     $ovs_ctl --protocol=gre enable-protocol
+    $ovs_ctl --protocol=udp --dport=49170 enable-protocol
 
     touch /var/lock/subsys/openvswitch
 }
-- 
1.7.2.5




More information about the dev mailing list