[ovs-dev] [PATCH] Implement draft VXLAN L2-over-L3 tunneling protocol.
Ben Pfaff
blp at nicira.com
Wed Oct 12 16:01:12 PDT 2011
This commit implements the VXLAN tunneling protocol described at
http://tools.ietf.org/html/draft-mahalingam-dutt-dcops-vxlan-00
Multicast support is not yet implemented, but it's in the works.
In my simple VM-based test rig, "netperf" performance was the same as GRE.
Jesse Gross <jesse at nicira.com> really deserves credit for most of
datapath/vport-vxlan.c. It's mostly a mixed-up and simplified copy of
datapath/vport-capwap.c with a big of search-and-replace.
Justin Pettit <jpettit at nicira.com> implemented the changes to
debian/ovs-monitor-ipsec. I made only small changes there. I wrote a unit
test and verified that it passed, but I didn't otherwise test the IPSEC
support.
I build (only) tested this on 2.6.25 (where it does not change anything),
2.6.26 through 2.6.34, inclusive, all on i386, plus 2.6.36 on x86-64. I
ran netperf tests with VXLAN on 2.6.37 on i386 inside KVM.
Signed-off-by: Ben Pfaff <blp at nicira.com>
Bug #7558.
---
NEWS | 4 +
README | 2 +-
datapath/Modules.mk | 3 +-
datapath/linux/.gitignore | 1 +
datapath/tunnel.h | 1 +
datapath/vport-vxlan.c | 207 +++++++++++++++++++++++++++++++
datapath/vport.c | 1 +
datapath/vport.h | 1 +
debian/control | 4 +-
debian/openvswitch-ipsec.init | 3 +-
debian/openvswitch-switch.init | 2 +
debian/ovs-monitor-ipsec | 117 +++++++++++-------
include/openflow/nicira-ext.h | 9 +-
include/openvswitch/datapath-protocol.h | 1 +
lib/netdev-vport.c | 53 ++++++---
rhel/etc_init.d_openvswitch | 2 +
tests/ovs-monitor-ipsec.at | 67 ++++++++++
vswitchd/vswitch.xml | 55 +++++++--
xenserver/etc_init.d_openvswitch | 2 +
19 files changed, 457 insertions(+), 78 deletions(-)
create mode 100644 datapath/vport-vxlan.c
diff --git a/NEWS b/NEWS
index ff3bc44..540dbff 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,9 @@
Post-v1.2.0
------------------------
+
+ - New support for the experimental VXLAN tunnel protocol (see
+ http://tools.ietf.org/html/draft-mahalingam-dutt-dcops-vxlan-00)
+ and VXLAN over IPSEC.
- OpenFlow:
- Added an OpenFlow extension which allows the "output" action to accept
NXM fields.
diff --git a/README b/README
index dddad3f..8b0289a 100644
--- a/README
+++ b/README
@@ -24,7 +24,7 @@ vSwitch supports the following features:
* NIC bonding with or without LACP on upstream switch
* NetFlow, sFlow(R), SPAN, RSPAN, and ERSPAN for increased visibility
* QoS (Quality of Service) configuration, plus policing
- * GRE, GRE over IPSEC, and CAPWAP tunneling
+ * GRE, GRE over IPSEC, CAPWAP, VXLAN, and VXLAN over IPSEC tunneling
* 802.1ag connectivity fault management
* OpenFlow 1.0 plus numerous extensions
* Transactional configuration database with C and Python bindings
diff --git a/datapath/Modules.mk b/datapath/Modules.mk
index 087cf44..c59a202 100644
--- a/datapath/Modules.mk
+++ b/datapath/Modules.mk
@@ -25,7 +25,8 @@ openvswitch_sources = \
vport-gre.c \
vport-internal_dev.c \
vport-netdev.c \
- vport-patch.c
+ vport-patch.c \
+ vport-vxlan.c
openvswitch_headers = \
actions.h \
diff --git a/datapath/linux/.gitignore b/datapath/linux/.gitignore
index 0aee746..37cb9ff 100644
--- a/datapath/linux/.gitignore
+++ b/datapath/linux/.gitignore
@@ -38,4 +38,5 @@
/vport-internal_dev.c
/vport-netdev.c
/vport-patch.c
+/vport-vxlan.c
/vport.c
diff --git a/datapath/tunnel.h b/datapath/tunnel.h
index e7bafbc..59ad47e 100644
--- a/datapath/tunnel.h
+++ b/datapath/tunnel.h
@@ -29,6 +29,7 @@
*/
#define TNL_T_PROTO_GRE 0
#define TNL_T_PROTO_CAPWAP 1
+#define TNL_T_PROTO_VXLAN 2
/* These flags are only needed when calling tnl_find_port(). */
#define TNL_T_KEY_EXACT (1 << 10)
diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c
new file mode 100644
index 0000000..a6e5439
--- /dev/null
+++ b/datapath/vport-vxlan.c
@@ -0,0 +1,207 @@
+ /*
+ * Copyright (c) 2011 Nicira Networks.
+ * Distributed under the terms of the GNU GPL version 2.
+ *
+ * Significant portions of this file may be copied from parts of the Linux
+ * kernel, by Linus Torvalds and others.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
+
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/net.h>
+#include <linux/udp.h>
+
+#include <net/icmp.h>
+
+#include "tunnel.h"
+#include "vport.h"
+#include "vport-generic.h"
+
+#define VXLAN_DST_PORT 49170
+#define VXLAN_IPSEC_SRC_PORT 49171
+
+#define VXLAN_FLAGS 0x08000000 /* struct vxlanhdr.vx_flags required value. */
+
+/**
+ * struct vxlanhdr - VXLAN header
+ * @vx_flags: Must have the exact value %VXLAN_FLAGS.
+ * @vx_vni: VXLAN Network Identifier (VNI) in top 24 bits, low 8 bits zeroed.
+ */
+struct vxlanhdr {
+ __be32 vx_flags;
+ __be32 vx_vni;
+};
+
+static struct vxlanhdr *vxlan_hdr(const struct sk_buff *skb)
+{
+ return (struct vxlanhdr *)(udp_hdr(skb) + 1);
+}
+
+#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
+
+static struct socket *vxlan_rcv_socket;
+
+static int vxlan_hdr_len(const struct tnl_mutable_config *mutable)
+{
+ return VXLAN_HLEN;
+}
+
+static __be16 get_src_port(const struct sk_buff *skb,
+ const struct tnl_mutable_config *mutable)
+{
+ if (mutable->flags & TNL_F_IPSEC)
+ return htons(VXLAN_IPSEC_SRC_PORT);
+
+ /* Convert hash into a port between 32768 and 65535. */
+ return (__force __be16)OVS_CB(skb)->flow->hash | htons(32768);
+}
+static void vxlan_build_header(const struct vport *vport,
+ const struct tnl_mutable_config *mutable,
+ void *header)
+{
+ struct udphdr *udph = header;
+ struct vxlanhdr *vxh = (struct vxlanhdr *)(udph + 1);
+
+ udph->dest = htons(VXLAN_DST_PORT);
+ udph->check = 0;
+
+ vxh->vx_flags = htonl(VXLAN_FLAGS);
+ vxh->vx_vni = htonl(be64_to_cpu(mutable->out_key) << 8);
+}
+
+static struct sk_buff *vxlan_update_header(const struct vport *vport,
+ const struct tnl_mutable_config *mutable,
+ struct dst_entry *dst,
+ struct sk_buff *skb)
+{
+ struct udphdr *udph = udp_hdr(skb);
+ struct vxlanhdr *vxh = (struct vxlanhdr *)(udph + 1);
+
+ if (mutable->flags & TNL_F_OUT_KEY_ACTION)
+ vxh->vx_vni = htonl(be64_to_cpu(OVS_CB(skb)->tun_id) << 8);
+
+ udph->source = get_src_port(skb, mutable);
+ udph->len = htons(skb->len - skb_transport_offset(skb));
+
+ return skb;
+}
+
+/* Called with rcu_read_lock and BH disabled. */
+static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
+{
+ struct vport *vport;
+ struct vxlanhdr *vxh;
+ const struct tnl_mutable_config *mutable;
+ struct iphdr *iph;
+ __be64 key;
+
+ if (unlikely(!pskb_may_pull(skb, VXLAN_HLEN + ETH_HLEN)))
+ goto error;
+
+ vxh = vxlan_hdr(skb);
+ if (unlikely(vxh->vx_flags != htonl(VXLAN_FLAGS) ||
+ vxh->vx_vni & htonl(0xff)))
+ goto error;
+
+ __skb_pull(skb, VXLAN_HLEN);
+ skb_postpull_rcsum(skb, skb_transport_header(skb), VXLAN_HLEN + ETH_HLEN);
+
+ key = cpu_to_be64(ntohl(vxh->vx_vni) >> 8);
+
+ iph = ip_hdr(skb);
+ vport = tnl_find_port(iph->daddr, iph->saddr, key,
+ TNL_T_PROTO_VXLAN | TNL_T_KEY_EITHER, &mutable);
+ if (unlikely(!vport)) {
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+ goto error;
+ }
+
+ if (mutable->flags & TNL_F_IN_KEY_MATCH)
+ OVS_CB(skb)->tun_id = key;
+ else
+ OVS_CB(skb)->tun_id = 0;
+
+ tnl_rcv(vport, skb, iph->tos);
+ goto out;
+
+error:
+ kfree_skb(skb);
+out:
+ return 0;
+}
+
+static const struct tnl_ops vxlan_tnl_ops = {
+ .tunnel_type = TNL_T_PROTO_VXLAN,
+ .ipproto = IPPROTO_UDP,
+ .hdr_len = vxlan_hdr_len,
+ .build_header = vxlan_build_header,
+ .update_header = vxlan_update_header,
+};
+
+static struct vport *vxlan_create(const struct vport_parms *parms)
+{
+ return tnl_create(parms, &vxlan_vport_ops, &vxlan_tnl_ops);
+}
+
+/* Random value. Irrelevant as long as it's not 0 since we set the handler. */
+#define UDP_ENCAP_VXLAN 10
+static int vxlan_init(void)
+{
+ int err;
+ struct sockaddr_in sin;
+
+ err = sock_create(AF_INET, SOCK_DGRAM, 0, &vxlan_rcv_socket);
+ if (err)
+ goto error;
+
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = htonl(INADDR_ANY);
+ sin.sin_port = htons(VXLAN_DST_PORT);
+
+ err = kernel_bind(vxlan_rcv_socket, (struct sockaddr *)&sin,
+ sizeof(struct sockaddr_in));
+ if (err)
+ goto error_sock;
+
+ udp_sk(vxlan_rcv_socket->sk)->encap_type = UDP_ENCAP_VXLAN;
+ udp_sk(vxlan_rcv_socket->sk)->encap_rcv = vxlan_rcv;
+
+ return 0;
+
+error_sock:
+ sock_release(vxlan_rcv_socket);
+error:
+ pr_warn("cannot register vxlan protocol handler\n");
+ return err;
+}
+
+static void vxlan_exit(void)
+{
+ sock_release(vxlan_rcv_socket);
+}
+
+const struct vport_ops vxlan_vport_ops = {
+ .type = OVS_VPORT_TYPE_VXLAN,
+ .flags = VPORT_F_TUN_ID,
+ .init = vxlan_init,
+ .exit = vxlan_exit,
+ .create = vxlan_create,
+ .destroy = tnl_destroy,
+ .set_addr = tnl_set_addr,
+ .get_name = tnl_get_name,
+ .get_addr = tnl_get_addr,
+ .get_options = tnl_get_options,
+ .set_options = tnl_set_options,
+ .get_dev_flags = vport_gen_get_dev_flags,
+ .is_running = vport_gen_is_running,
+ .get_operstate = vport_gen_get_operstate,
+ .send = tnl_send,
+};
+#else
+#warning VXLAN tunneling will not be available on kernels before 2.6.26
+#endif /* Linux kernel < 2.6.26 */
diff --git a/datapath/vport.c b/datapath/vport.c
index ad5a10e..d577639 100644
--- a/datapath/vport.c
+++ b/datapath/vport.c
@@ -31,6 +31,7 @@ static const struct vport_ops *base_vport_ops_list[] = {
&gre_vport_ops,
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
&capwap_vport_ops,
+ &vxlan_vport_ops,
#endif
};
diff --git a/datapath/vport.h b/datapath/vport.h
index b6b94e0..34a3d0e 100644
--- a/datapath/vport.h
+++ b/datapath/vport.h
@@ -252,5 +252,6 @@ extern const struct vport_ops internal_vport_ops;
extern const struct vport_ops patch_vport_ops;
extern const struct vport_ops gre_vport_ops;
extern const struct vport_ops capwap_vport_ops;
+extern const struct vport_ops vxlan_vport_ops;
#endif /* vport.h */
diff --git a/debian/control b/debian/control
index 1f3387a..4c23e59 100644
--- a/debian/control
+++ b/debian/control
@@ -60,9 +60,9 @@ Depends:
openvswitch-common (= ${binary:Version}),
openvswitch-switch (= ${binary:Version}),
python-openvswitch (= ${source:Version})
-Description: Open vSwitch GRE-over-IPsec support
+Description: Open vSwitch support for GRE and VXLAN over ISPEC
The ovs-monitor-ipsec script provides support for encrypting GRE
- tunnels with IPsec.
+ and VXLAN tunnels with IPsec.
.
Open vSwitch is a full-featured software-based Ethernet switch.
diff --git a/debian/openvswitch-ipsec.init b/debian/openvswitch-ipsec.init
index 17835a5..bb9a5bd 100755
--- a/debian/openvswitch-ipsec.init
+++ b/debian/openvswitch-ipsec.init
@@ -1,5 +1,6 @@
#!/bin/sh
#
+# Copyright (c) 2011 Nicira Networks
# Copyright (c) 2007, 2009 Javier Fernandez-Sanguino <jfs at debian.org>
#
# This is free software; you may redistribute it and/or modify
@@ -23,7 +24,7 @@
# Required-Stop: $remote_fs
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
-# Short-Description: Open vSwitch GRE-over-IPsec daemon
+# Short-Description: Open vSwitch IPsec tunnel daemon
### END INIT INFO
PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
diff --git a/debian/openvswitch-switch.init b/debian/openvswitch-switch.init
index 3d187a0..d9e57ca 100755
--- a/debian/openvswitch-switch.init
+++ b/debian/openvswitch-switch.init
@@ -63,7 +63,9 @@ start () {
fi
"$@" || exit $?
+ # Allow tunnel traffic.
ovs_ctl --protocol=gre enable-protocol
+ ovs_ctl --protocol=udp --dport=49170 enable-protocol
}
stop () {
diff --git a/debian/ovs-monitor-ipsec b/debian/ovs-monitor-ipsec
index ac2cd7e..19abe1a 100755
--- a/debian/ovs-monitor-ipsec
+++ b/debian/ovs-monitor-ipsec
@@ -14,9 +14,10 @@
# limitations under the License.
-# A daemon to monitor attempts to create GRE-over-IPsec tunnels.
-# Uses racoon and setkey to support the configuration. Assumes that
-# OVS has complete control over IPsec configuration for the box.
+# A daemon to monitor attempts to create tunnels over IPsec.
+# Racoon and setkey are used to support the configuration. It is
+# assumed that OVS has complete control over IPsec configuration for
+# the box.
# xxx To-do:
# - Doesn't actually check that Interface is connected to bridge
@@ -41,7 +42,12 @@ import ovs.vlog
vlog = ovs.vlog.Vlog("ovs-monitor-ipsec")
root_prefix = '' # Prefix for absolute file names, for testing.
-setkey = "/usr/sbin/setkey"
+SETKEY = "/usr/sbin/setkey"
+
+# UDP ports used for VXLAN. The source port is only fixed for
+# VXLAN-over-IPsec traffic.
+VXLAN_DST_PORT = 49170
+VXLAN_SRC_PORT = 49171
# Class to configure the racoon daemon, which handles IKE negotiation
@@ -251,17 +257,17 @@ path certificate "%s";
# Class to configure IPsec on a system using racoon for IKE and setkey
# for maintaining the Security Association Database (SAD) and Security
-# Policy Database (SPD). Only policies for GRE are supported.
+# Policy Database (SPD). Only policies for GRE and VXLAN are supported.
class IPsec:
def __init__(self):
self.sad_flush()
self.spd_flush()
self.racoon = Racoon()
- self.entries = []
+ self.entries = {}
def call_setkey(self, cmds):
try:
- p = subprocess.Popen([root_prefix + setkey, "-c"],
+ p = subprocess.Popen([root_prefix + SETKEY, "-c"],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
except:
@@ -317,26 +323,44 @@ class IPsec:
self.call_setkey("spdflush;\n")
def spd_add(self, local_ip, remote_ip):
- cmds = ("spdadd %s %s gre -P out ipsec esp/transport//require;\n" %
- (local_ip, remote_ip))
- cmds += ("spdadd %s %s gre -P in ipsec esp/transport//require;\n" %
- (remote_ip, local_ip))
+ tunnel_type = self.entries[remote_ip]
+ if tunnel_type == "vxlan":
+ cmds = ("spdadd %s[any] %s[any] udp -P out ipsec esp/transport/%s[%s]-%s[%s]/require;\n"
+ % (local_ip, remote_ip, local_ip, VXLAN_SRC_PORT,
+ remote_ip, VXLAN_DST_PORT))
+ cmds += ("spdadd %s[any] %s[any] udp -P in ipsec esp/transport/%s[%s]-%s[%s]/require;\n"
+ % (remote_ip, local_ip, remote_ip, VXLAN_DST_PORT,
+ local_ip, VXLAN_SRC_PORT))
+ else:
+ cmds = ("spdadd %s %s gre -P out ipsec esp/transport//require;\n" %
+ (local_ip, remote_ip))
+ cmds += ("spdadd %s %s gre -P in ipsec esp/transport//require;\n" %
+ (remote_ip, local_ip))
self.call_setkey(cmds)
def spd_del(self, local_ip, remote_ip):
- cmds = "spddelete %s %s gre -P out;\n" % (local_ip, remote_ip)
- cmds += "spddelete %s %s gre -P in;\n" % (remote_ip, local_ip)
+ tunnel_type = self.entries[remote_ip]
+ if tunnel_type == "vxlan":
+ cmds = ("spddelete %s %s udp -P out;\n" % (local_ip, remote_ip))
+ cmds += ("spddelete %s %s udp -P in;\n" % (remote_ip, local_ip))
+ else:
+ cmds = "spddelete %s %s gre -P out;\n" % (local_ip, remote_ip)
+ cmds += "spddelete %s %s gre -P in;\n" % (remote_ip, local_ip)
self.call_setkey(cmds)
def add_entry(self, local_ip, remote_ip, vals):
+ tunnel_type = vals["tunnel_type"]
+ if tunnel_type not in ("gre", "vxlan"):
+ raise error.Error("unknown tunnel type: %s" % tunnel_type)
+
if remote_ip in self.entries:
raise error.Error("host %s already configured for ipsec"
% remote_ip)
self.racoon.add_entry(remote_ip, vals)
- self.spd_add(local_ip, remote_ip)
- self.entries.append(remote_ip)
+ self.entries[remote_ip] = tunnel_type
+ self.spd_add(local_ip, remote_ip)
def del_entry(self, local_ip, remote_ip):
if remote_ip in self.entries:
@@ -344,7 +368,7 @@ class IPsec:
self.spd_del(local_ip, remote_ip)
self.sad_del(local_ip, remote_ip)
- self.entries.remove(remote_ip)
+ del self.entries[remote_ip]
def keep_table_columns(schema, table_name, column_types):
@@ -463,36 +487,43 @@ def main():
new_interfaces = {}
for rec in idl.tables["Interface"].rows.itervalues():
if rec.type == "ipsec_gre":
- name = rec.name
- options = rec.options
- entry = {
- "remote_ip": options.get("remote_ip"),
- "local_ip": options.get("local_ip", "0.0.0.0/0"),
- "certificate": options.get("certificate"),
- "private_key": options.get("private_key"),
- "use_ssl_cert": options.get("use_ssl_cert"),
- "peer_cert": options.get("peer_cert"),
- "psk": options.get("psk")}
-
- if entry["peer_cert"] and entry["psk"]:
- vlog.warn("both 'peer_cert' and 'psk' defined for %s"
- % name)
- continue
- elif not entry["peer_cert"] and not entry["psk"]:
- vlog.warn("no 'peer_cert' or 'psk' defined for %s" % name)
- continue
+ tunnel_type = "gre"
+ elif rec.type == "ipsec_vxlan":
+ tunnel_type = "vxlan"
+ else:
+ continue
+
+ name = rec.name
+ options = rec.options
+ entry = {
+ "remote_ip": options.get("remote_ip"),
+ "local_ip": options.get("local_ip", "0.0.0.0/0"),
+ "certificate": options.get("certificate"),
+ "private_key": options.get("private_key"),
+ "use_ssl_cert": options.get("use_ssl_cert"),
+ "peer_cert": options.get("peer_cert"),
+ "psk": options.get("psk"),
+ "tunnel_type": tunnel_type }
+
+ if entry["peer_cert"] and entry["psk"]:
+ vlog.warn("both 'peer_cert' and 'psk' defined for %s"
+ % name)
+ continue
+ elif not entry["peer_cert"] and not entry["psk"]:
+ vlog.warn("no 'peer_cert' or 'psk' defined for %s" % name)
+ continue
- # The "use_ssl_cert" option is deprecated and will
- # likely go away in the near future.
- if entry["use_ssl_cert"] == "true":
- if not ssl_cert:
- vlog.warn("no valid SSL entry for %s" % name)
- continue
+ # The "use_ssl_cert" option is deprecated and will
+ # likely go away in the near future.
+ if entry["use_ssl_cert"] == "true":
+ if not ssl_cert:
+ vlog.warn("no valid SSL entry for %s" % name)
+ continue
- entry["certificate"] = ssl_cert[0]
- entry["private_key"] = ssl_cert[1]
+ entry["certificate"] = ssl_cert[0]
+ entry["private_key"] = ssl_cert[1]
- new_interfaces[name] = entry
+ new_interfaces[name] = entry
if interfaces != new_interfaces:
update_ipsec(ipsec, interfaces, new_interfaces)
diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h
index 51f21f0..67b2717 100644
--- a/include/openflow/nicira-ext.h
+++ b/include/openflow/nicira-ext.h
@@ -1444,9 +1444,12 @@ OFP_ASSERT(sizeof(struct nx_action_output_reg) == 24);
/* Tunnel ID.
*
- * For a packet received via GRE tunnel including a (32-bit) key, the key is
- * stored in the low 32-bits and the high bits are zeroed. For other packets,
- * the value is 0.
+ * The key, for a packet received via a keyed tunnel. If the key is less than
+ * 64 bits wide, this field holds the in its low-order bits and higher bits are
+ * zeroed. If the key is more than 64 bits wide, this field contains the 64
+ * lowest-order bits.
+ *
+ * All zero bits, for packets not received via a keyed tunnel.
*
* Prereqs: None.
*
diff --git a/include/openvswitch/datapath-protocol.h b/include/openvswitch/datapath-protocol.h
index 6c89411..58db169 100644
--- a/include/openvswitch/datapath-protocol.h
+++ b/include/openvswitch/datapath-protocol.h
@@ -200,6 +200,7 @@ enum ovs_vport_type {
OVS_VPORT_TYPE_PATCH, /* virtual tunnel connecting two vports */
OVS_VPORT_TYPE_GRE, /* GRE tunnel */
OVS_VPORT_TYPE_CAPWAP, /* CAPWAP tunnel */
+ OVS_VPORT_TYPE_VXLAN, /* VXLAN tunnel */
__OVS_VPORT_TYPE_MAX
};
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index 06ec8fb..56216f2 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -134,11 +134,27 @@ netdev_vport_get_vport_type(const struct netdev *netdev)
: OVS_VPORT_TYPE_UNSPEC);
}
-const char *
-netdev_vport_get_netdev_type(const struct dpif_linux_vport *vport)
+static const char *
+get_maybe_ipsec_tunnel_type(const struct dpif_linux_vport *vport,
+ const char *plain_type, const char *ipsec_type)
{
struct nlattr *a[OVS_TUNNEL_ATTR_MAX + 1];
+ uint32_t flags;
+
+ if (tnl_port_config_from_nlattr(vport->options, vport->options_len, a)) {
+ VLOG_WARN_RL(&rl, "dp%d: cannot parse options for port `%s' (type %u)",
+ vport->dp_ifindex, vport->name,
+ (unsigned int) vport->type);
+ return "unknown";
+ }
+ flags = nl_attr_get_u32(a[OVS_TUNNEL_ATTR_FLAGS]);
+ return flags & TNL_F_IPSEC ? ipsec_type : plain_type;
+}
+
+const char *
+netdev_vport_get_netdev_type(const struct dpif_linux_vport *vport)
+{
switch (vport->type) {
case OVS_VPORT_TYPE_UNSPEC:
break;
@@ -153,16 +169,14 @@ netdev_vport_get_netdev_type(const struct dpif_linux_vport *vport)
return "patch";
case OVS_VPORT_TYPE_GRE:
- if (tnl_port_config_from_nlattr(vport->options, vport->options_len,
- a)) {
- break;
- }
- return (nl_attr_get_u32(a[OVS_TUNNEL_ATTR_FLAGS]) & TNL_F_IPSEC
- ? "ipsec_gre" : "gre");
+ return get_maybe_ipsec_tunnel_type(vport, "gre", "ipsec_gre");
case OVS_VPORT_TYPE_CAPWAP:
return "capwap";
+ case OVS_VPORT_TYPE_VXLAN:
+ return get_maybe_ipsec_tunnel_type(vport, "vxlan", "ipsec_vxlan");
+
case __OVS_VPORT_TYPE_MAX:
break;
}
@@ -566,19 +580,18 @@ static int
parse_tunnel_config(const char *name, const char *type,
const struct shash *args, struct ofpbuf *options)
{
- bool is_gre = false;
- bool is_ipsec = false;
+ bool supports_csum;
+ bool is_ipsec;
struct shash_node *node;
bool ipsec_mech_set = false;
ovs_be32 daddr = htonl(0);
uint32_t flags;
+ supports_csum = !strcmp(type, "gre") || !strcmp(type, "ipsec_gre");
+ is_ipsec = !strncmp(type, "ipsec_", 6);
+
flags = TNL_F_DF_DEFAULT | TNL_F_PMTUD | TNL_F_HDR_CACHE;
- if (!strcmp(type, "gre")) {
- is_gre = true;
- } else if (!strcmp(type, "ipsec_gre")) {
- is_gre = true;
- is_ipsec = true;
+ if (is_ipsec) {
flags |= TNL_F_IPSEC;
flags &= ~TNL_F_HDR_CACHE;
}
@@ -611,7 +624,7 @@ parse_tunnel_config(const char *name, const char *type,
} else {
nl_msg_put_u8(options, OVS_TUNNEL_ATTR_TTL, atoi(node->data));
}
- } else if (!strcmp(node->name, "csum") && is_gre) {
+ } else if (!strcmp(node->name, "csum") && supports_csum) {
if (!strcmp(node->data, "true")) {
flags |= TNL_F_CSUM;
}
@@ -947,6 +960,14 @@ netdev_vport_register(void)
{ "capwap", VPORT_FUNCTIONS(netdev_vport_get_status) },
parse_tunnel_config, unparse_tunnel_config },
+ { OVS_VPORT_TYPE_VXLAN,
+ { "vxlan", VPORT_FUNCTIONS(netdev_vport_get_status) },
+ parse_tunnel_config, unparse_tunnel_config },
+
+ { OVS_VPORT_TYPE_VXLAN,
+ { "vxlan_ipsec", VPORT_FUNCTIONS(netdev_vport_get_status) },
+ parse_tunnel_config, unparse_tunnel_config },
+
{ OVS_VPORT_TYPE_PATCH,
{ "patch", VPORT_FUNCTIONS(NULL) },
parse_patch_config, unparse_patch_config }
diff --git a/rhel/etc_init.d_openvswitch b/rhel/etc_init.d_openvswitch
index 5501d18..113d99a 100755
--- a/rhel/etc_init.d_openvswitch
+++ b/rhel/etc_init.d_openvswitch
@@ -47,7 +47,9 @@ start () {
fi
"$@"
+ # Allow tunnel traffic.
$ovs_ctl --protocol=gre enable-protocol
+ $ovs_ctl --protocol=udp --dport=49170 enable-protocol
touch /var/lock/subsys/openvswitch
}
diff --git a/tests/ovs-monitor-ipsec.at b/tests/ovs-monitor-ipsec.at
index f9868e7..19a834a 100644
--- a/tests/ovs-monitor-ipsec.at
+++ b/tests/ovs-monitor-ipsec.at
@@ -308,4 +308,71 @@ sainfo anonymous {
])
AT_CHECK([test ! -f etc/racoon/certs/ovs-3.4.5.6.pem])
+###
+### Add an ipsec_vxlan psk interface and check what ovs-monitor-ipsec does
+###
+AT_CHECK([ovs_vsctl \
+ -- add-port br0 vxlan0 \
+ -- set interface vxlan0 type=ipsec_vxlan \
+ options:remote_ip=4.5.6.7 \
+ options:psk=mishmash])
+OVS_WAIT_UNTIL([test -f actions && grep 'spdadd 4.5.6.7' actions >/dev/null])
+AT_CHECK([sed '1,41d' actions], [0],
+[[racoon: reload
+setkey:
+> spdadd 0.0.0.0/0[any] 4.5.6.7[any] udp -P out ipsec esp/transport/0.0.0.0/0[49171]-4.5.6.7[49170]/require;
+> spdadd 4.5.6.7[any] 0.0.0.0/0[any] udp -P in ipsec esp/transport/4.5.6.7[49170]-0.0.0.0/0[49171]/require;
+]])
+AT_CHECK([trim etc/racoon/psk.txt], [0], [4.5.6.7 mishmash
+])
+AT_CHECK([trim etc/racoon/racoon.conf], [0], [dnl
+path pre_shared_key "/etc/racoon/psk.txt";
+path certificate "/etc/racoon/certs";
+remote 4.5.6.7 {
+ exchange_mode main;
+ nat_traversal on;
+ proposal {
+ encryption_algorithm aes;
+ hash_algorithm sha1;
+ authentication_method pre_shared_key;
+ dh_group 2;
+ }
+}
+sainfo anonymous {
+ pfs_group 2;
+ lifetime time 1 hour;
+ encryption_algorithm aes;
+ authentication_algorithm hmac_sha1, hmac_md5;
+ compression_algorithm deflate;
+}
+])
+
+###
+### Delete the ipsec_vxlan interface and check what ovs-monitor-ipsec does
+###
+AT_CHECK([ovs_vsctl del-port vxlan0])
+OVS_WAIT_UNTIL([test `wc -l < actions` -ge 17])
+AT_CHECK([sed '1,45d' actions], [0], [dnl
+racoon: reload
+setkey:
+> spddelete 0.0.0.0/0 4.5.6.7 udp -P out;
+> spddelete 4.5.6.7 0.0.0.0/0 udp -P in;
+setkey:
+> dump ;
+setkey:
+> dump ;
+])
+AT_CHECK([trim etc/racoon/psk.txt], [0], [])
+AT_CHECK([trim etc/racoon/racoon.conf], [0], [dnl
+path pre_shared_key "/etc/racoon/psk.txt";
+path certificate "/etc/racoon/certs";
+sainfo anonymous {
+ pfs_group 2;
+ lifetime time 1 hour;
+ encryption_algorithm aes;
+ authentication_algorithm hmac_sha1, hmac_md5;
+ compression_algorithm deflate;
+}
+])
+
AT_CLEANUP
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index d579b87..ea50733 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -916,8 +916,7 @@
<dt><code>gre</code></dt>
<dd>
An Ethernet over RFC 2890 Generic Routing Encapsulation over IPv4
- tunnel. See <ref group="Tunnel Options"/> for information on
- configuring GRE tunnels.
+ tunnel.
</dd>
<dt><code>ipsec_gre</code></dt>
@@ -936,6 +935,28 @@
with the Linux kernel datapath with kernel version 2.6.26 or later.
</dd>
+ <dt><code>vxlan</code></dt>
+ <dd>
+ <p>
+ An Ethernet tunnel over the experimental, UDP-based VXLAN
+ protocol described at
+ <code>http://tools.ietf.org/html/draft-mahalingam-dutt-dcops-vxlan-00</code>.
+ VXLAN is currently supported only with the Linux kernel datapath
+ with kernel version 2.6.26 or later.
+ </p>
+ <p>
+ As an experimental protocol, VXLAN has no officially assigned UDP
+ port. Open vSwitch currently uses UDP destination port 49170.
+ The source port used for VXLAN traffic varies on a per-flow basis
+ between 32768 and 65535 to allow load balancing.
+ </p>
+ </dd>
+
+ <dt><code>ipsec_vxlan</code></dt>
+ <dd>
+ VXLAN over an IPSEC tunnel.
+ </dd>
+
<dt><code>patch</code></dt>
<dd>
A pair of virtual devices that act as a patch cable.
@@ -950,7 +971,8 @@
<group title="Tunnel Options">
<p>
These options apply to interfaces with <ref column="type"/> of
- <code>gre</code>, <code>ipsec_gre</code>, and <code>capwap</code>.
+ <code>gre</code>, <code>ipsec_gre</code>, <code>capwap</code>, and
+ <code>vxlan</code>.
</p>
<p>
@@ -983,8 +1005,9 @@
key="in_key"/> at all.
</li>
<li>
- A positive 32-bit (for GRE) or 64-bit (for CAPWAP) number. The
- tunnel receives only packets with the specified key.
+ A positive 24-bit (for VXLAN), 32-bit (for GRE) or 64-bit (for
+ CAPWAP) number. The tunnel receives only packets with the
+ specified key.
</li>
<li>
The word <code>flow</code>. The tunnel accepts packets with any
@@ -1009,8 +1032,9 @@
key="out_key"/> at all.
</li>
<li>
- A positive 32-bit (for GRE) or 64-bit (for CAPWAP) number. Packets
- sent through the tunnel will have the specified key.
+ A positive 24-bit (for VXLAN), 32-bit (for GRE) or 64-bit (for
+ CAPWAP) number. Packets sent through the tunnel will have the
+ specified key.
</li>
<li>
The word <code>flow</code>. Packets sent through the tunnel will
@@ -1068,9 +1092,10 @@
enabled; set to <code>false</code> to disable.
</column>
- <group title="Tunnel Options: gre only">
+ <group title="Tunnel Options: gre and vxlan only">
<p>
- Only <code>gre</code> interfaces support these options.
+ Only <code>gre</code> and <code>vxlan</code> interfaces support these
+ options.
</p>
<column name="options" key="header_cache" type='{"type": "boolean"}'>
@@ -1112,11 +1137,19 @@
</column>
</group>
- <group title="Tunnel Options: ipsec_gre only">
+ <group title="Tunnel Options: ipsec_gre and ipsec_vxlan only">
<p>
- Only <code>ipsec_gre</code> interfaces support these options.
+ Only <code>ipsec_gre</code> and <code>ipsec_vxlan</code> interfaces
+ support these options.
</p>
+ <p>
+ These options are implemented through a separate daemon named
+ <code>ovs-monitor-ipsec</code> that so far has only been ported to
+ and packaged for Debian (including derivative distributions such as
+ Ubuntu).
+ </p>
+
<column name="options" key="peer_cert">
Required for certificate authentication. A string containing the
peer's certificate in PEM format. Additionally the host's
diff --git a/xenserver/etc_init.d_openvswitch b/xenserver/etc_init.d_openvswitch
index 8ba8aee..75d8391 100755
--- a/xenserver/etc_init.d_openvswitch
+++ b/xenserver/etc_init.d_openvswitch
@@ -76,7 +76,9 @@ start () {
--log-file --pidfile --detach --monitor unix:/var/run/openvswitch/db.sock
fi
+ # Allow tunnel traffic.
$ovs_ctl --protocol=gre enable-protocol
+ $ovs_ctl --protocol=udp --dport=49170 enable-protocol
touch /var/lock/subsys/openvswitch
}
--
1.7.2.5
More information about the dev
mailing list