RE: [PATCHv1 7/7] IPVTAP: IP-VLAN based tap driver
From: Grandhi, Sainath
Date: Tue Jan 17 2017 - 18:49:31 EST
> -----Original Message-----
> From: Mahesh Bandewar (àààà ààààààà)
> [mailto:maheshb@xxxxxxxxxx]
> Sent: Friday, January 06, 2017 3:47 PM
> To: Grandhi, Sainath <sainath.grandhi@xxxxxxxxx>
> Cc: linux-netdev <netdev@xxxxxxxxxxxxxxx>; David Miller
> <davem@xxxxxxxxxxxxx>; mahesh@xxxxxxxxxxxx; linux-
> kernel@xxxxxxxxxxxxxxx
> Subject: Re: [PATCHv1 7/7] IPVTAP: IP-VLAN based tap driver
>
> few superficial comments inline.
>
> On Fri, Jan 6, 2017 at 2:33 PM, Sainath Grandhi <sainath.grandhi@xxxxxxxxx>
> wrote:
> > This patch adds a tap character device driver that is based on the
> > IP-VLAN network interface, called ipvtap. An ipvtap device can be
> > created in the same way as an ipvlan device, using 'type ipvtap', and
> > then accessed using the tap user space interface.
> >
> > Signed-off-by: Sainath Grandhi <sainath.grandhi@xxxxxxxxx>
> > Tested-by: Sainath Grandhi <sainath.grandhi@xxxxxxxxx>
> > ---
> > drivers/net/Kconfig | 12 ++
> > drivers/net/Makefile | 1 +
> > drivers/net/ipvlan/Makefile | 1 +
> > drivers/net/ipvlan/ipvlan.h | 7 ++
> > drivers/net/ipvlan/ipvlan_core.c | 5 +-
> > drivers/net/ipvlan/ipvlan_main.c | 37 +++---
> > drivers/net/ipvlan/ipvtap.c | 238
> +++++++++++++++++++++++++++++++++++++++
> > 7 files changed, 282 insertions(+), 19 deletions(-) create mode
> > 100644 drivers/net/ipvlan/ipvtap.c
> >
> > diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index
> > 280380d..ddfb30a 100644
> > --- a/drivers/net/Kconfig
> > +++ b/drivers/net/Kconfig
> > @@ -165,6 +165,18 @@ config IPVLAN
> > To compile this driver as a module, choose M here: the module
> > will be called ipvlan.
> >
> > +config IPVTAP
> > + tristate "IP-VLAN based tap driver"
> > + depends on IPVLAN
> > + depends on INET
> > + help
> > + This adds a specialized tap character device driver that is based
> > + on the IP-VLAN network interface, called ipvtap. An ipvtap device
> > + can be added in the same way as a ipvlan device, using 'type
> > + ipvtap', and then be accessed through the tap user space interface.
> > +
> > + To compile this driver as a module, choose M here: the module
> > + will be called macvtap.
> >
> > config VXLAN
> > tristate "Virtual eXtensible Local Area Network (VXLAN)"
> > diff --git a/drivers/net/Makefile b/drivers/net/Makefile index
> > 7dd86ca..98ed4d9 100644
> > --- a/drivers/net/Makefile
> > +++ b/drivers/net/Makefile
> > @@ -7,6 +7,7 @@
> > #
> > obj-$(CONFIG_BONDING) += bonding/
> > obj-$(CONFIG_IPVLAN) += ipvlan/
> > +obj-$(CONFIG_IPVTAP) += ipvlan/
> > obj-$(CONFIG_DUMMY) += dummy.o
> > obj-$(CONFIG_EQUALIZER) += eql.o
> > obj-$(CONFIG_IFB) += ifb.o
> > diff --git a/drivers/net/ipvlan/Makefile b/drivers/net/ipvlan/Makefile
> > index df79910..8a2c64d 100644
> > --- a/drivers/net/ipvlan/Makefile
> > +++ b/drivers/net/ipvlan/Makefile
> > @@ -3,5 +3,6 @@
> > #
> >
> > obj-$(CONFIG_IPVLAN) += ipvlan.o
> > +obj-$(CONFIG_IPVTAP) += ipvtap.o
> >
> > ipvlan-objs := ipvlan_core.o ipvlan_main.o diff --git
> > a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index
> > dbfbb33..4362d88 100644
> > --- a/drivers/net/ipvlan/ipvlan.h
> > +++ b/drivers/net/ipvlan/ipvlan.h
> > @@ -133,4 +133,11 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device
> *dev, struct sk_buff *skb,
> > u16 proto); unsigned int
> > ipvlan_nf_input(void *priv, struct sk_buff *skb,
> > const struct nf_hook_state *state);
> > +void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
> > + unsigned int len, bool success, bool mcast); int
> > +ipvlan_link_new(struct net *src_net, struct net_device *dev,
> > + struct nlattr *tb[], struct nlattr *data[]); void
> > +ipvlan_link_delete(struct net_device *dev, struct list_head *head);
> > +void ipvlan_link_setup(struct net_device *dev); int
> > +ipvlan_link_register(struct rtnl_link_ops *ops);
> > #endif /* __IPVLAN_H */
> > diff --git a/drivers/net/ipvlan/ipvlan_core.c
> > b/drivers/net/ipvlan/ipvlan_core.c
> > index 83ce74a..9af16ab 100644
> > --- a/drivers/net/ipvlan/ipvlan_core.c
> > +++ b/drivers/net/ipvlan/ipvlan_core.c
> > @@ -16,8 +16,8 @@ void ipvlan_init_secret(void)
> > net_get_random_once(&ipvlan_jhash_secret,
> > sizeof(ipvlan_jhash_secret)); }
> >
> > -static void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
> > - unsigned int len, bool success, bool mcast)
> > +void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
> > + unsigned int len, bool success, bool mcast)
> > {
> > if (!ipvlan)
> > return;
> > @@ -36,6 +36,7 @@ static void ipvlan_count_rx(const struct ipvl_dev
> *ipvlan,
> > this_cpu_inc(ipvlan->pcpu_stats->rx_errs);
> > }
> > }
> > +EXPORT_SYMBOL_GPL(ipvlan_count_rx);
> Why export, isn't just removing 'static' enough?
This function becomes part of "ipvlan" module.
"ipvtap" module depends on this function exported by "ipvlan" module.
> >
> > static u8 ipvlan_get_v6_hash(const void *iaddr) { diff --git
> > a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
> > index 8b0f993..666a05d 100644
> > --- a/drivers/net/ipvlan/ipvlan_main.c
> > +++ b/drivers/net/ipvlan/ipvlan_main.c
> > @@ -13,14 +13,14 @@ static u32 ipvl_nf_hook_refcnt = 0;
> >
> > static struct nf_hook_ops ipvl_nfops[] __read_mostly = {
> > {
> > - .hook = ipvlan_nf_input,
> > - .pf = NFPROTO_IPV4,
> > + .hook = ipvlan_nf_input,
> > + .pf = NFPROTO_IPV4,
> spurious?
> > .hooknum = NF_INET_LOCAL_IN,
> > .priority = INT_MAX,
> > },
> > {
> > - .hook = ipvlan_nf_input,
> > - .pf = NFPROTO_IPV6,
> > + .hook = ipvlan_nf_input,
> > + .pf = NFPROTO_IPV6,
> spurious??
> > .hooknum = NF_INET_LOCAL_IN,
> > .priority = INT_MAX,
> > },
> > @@ -398,7 +398,7 @@ static int ipvlan_hard_header(struct sk_buff *skb,
> > struct net_device *dev, }
> >
> > static const struct header_ops ipvlan_header_ops = {
> > - .create = ipvlan_hard_header,
> > + .create = ipvlan_hard_header,
> spurious???
Would take care of it in the next version.
> > .parse = eth_header_parse,
> > .cache = eth_header_cache,
> > .cache_update = eth_header_cache_update,
> > @@ -494,8 +494,8 @@ static int ipvlan_nl_fillinfo(struct sk_buff *skb,
> > return ret;
> > }
> >
> > -static int ipvlan_link_new(struct net *src_net, struct net_device *dev,
> > - struct nlattr *tb[], struct nlattr *data[])
> > +int ipvlan_link_new(struct net *src_net, struct net_device *dev,
> > + struct nlattr *tb[], struct nlattr *data[])
> > {
> > struct ipvl_dev *ipvlan = netdev_priv(dev);
> > struct ipvl_port *port;
> > @@ -567,8 +567,9 @@ static int ipvlan_link_new(struct net *src_net,
> struct net_device *dev,
> > ipvlan_port_destroy(phy_dev);
> > return err;
> > }
> > +EXPORT_SYMBOL_GPL(ipvlan_link_new);
> >
> > -static void ipvlan_link_delete(struct net_device *dev, struct
> > list_head *head)
> > +void ipvlan_link_delete(struct net_device *dev, struct list_head
> > +*head)
> > {
> > struct ipvl_dev *ipvlan = netdev_priv(dev);
> > struct ipvl_addr *addr, *next; @@ -583,8 +584,9 @@ static void
> > ipvlan_link_delete(struct net_device *dev, struct list_head *head)
> > unregister_netdevice_queue(dev, head);
> > netdev_upper_dev_unlink(ipvlan->phy_dev, dev); }
> > +EXPORT_SYMBOL_GPL(ipvlan_link_delete);
> >
> > -static void ipvlan_link_setup(struct net_device *dev)
> > +void ipvlan_link_setup(struct net_device *dev)
> > {
> > ether_setup(dev);
> >
> > @@ -595,6 +597,7 @@ static void ipvlan_link_setup(struct net_device
> *dev)
> > dev->header_ops = &ipvlan_header_ops;
> > dev->ethtool_ops = &ipvlan_ethtool_ops; }
> > +EXPORT_SYMBOL_GPL(ipvlan_link_setup);
> >
> > static const struct nla_policy ipvlan_nl_policy[IFLA_IPVLAN_MAX + 1] =
> > {
> > @@ -605,22 +608,22 @@ static struct rtnl_link_ops ipvlan_link_ops = {
> > .kind = "ipvlan",
> > .priv_size = sizeof(struct ipvl_dev),
> >
> > - .get_size = ipvlan_nl_getsize,
> > - .policy = ipvlan_nl_policy,
> > - .validate = ipvlan_nl_validate,
> > - .fill_info = ipvlan_nl_fillinfo,
> > - .changelink = ipvlan_nl_changelink,
> > - .maxtype = IFLA_IPVLAN_MAX,
> > -
> > .setup = ipvlan_link_setup,
> > .newlink = ipvlan_link_new,
> > .dellink = ipvlan_link_delete,
> > };
> >
> > -static int ipvlan_link_register(struct rtnl_link_ops *ops)
> > +int ipvlan_link_register(struct rtnl_link_ops *ops)
> > {
> > + ops->get_size = ipvlan_nl_getsize;
> > + ops->policy = ipvlan_nl_policy;
> > + ops->validate = ipvlan_nl_validate;
> > + ops->fill_info = ipvlan_nl_fillinfo;
> > + ops->changelink = ipvlan_nl_changelink;
> > + ops->maxtype = IFLA_IPVLAN_MAX;
> > return rtnl_link_register(ops);
> > }
> > +EXPORT_SYMBOL_GPL(ipvlan_link_register);
> >
> > static int ipvlan_device_event(struct notifier_block *unused,
> > unsigned long event, void *ptr)
> > diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c
> > new file mode 100644
> > index 0000000..0422cdf
> > --- /dev/null
> > +++ b/drivers/net/ipvlan/ipvtap.c
> > @@ -0,0 +1,238 @@
> > +#include <linux/etherdevice.h>
> > +#include "ipvlan.h"
> > +#include <linux/if_vlan.h>
> > +#include <linux/if_tap.h>
> > +#include <linux/interrupt.h>
> > +#include <linux/nsproxy.h>
> > +#include <linux/compat.h>
> > +#include <linux/if_tun.h>
> > +#include <linux/module.h>
> > +#include <linux/skbuff.h>
> > +#include <linux/cache.h>
> > +#include <linux/sched.h>
> > +#include <linux/types.h>
> > +#include <linux/slab.h>
> > +#include <linux/wait.h>
> > +#include <linux/cdev.h>
> > +#include <linux/idr.h>
> > +#include <linux/fs.h>
> > +#include <linux/uio.h>
> > +
> > +#include <net/net_namespace.h>
> > +#include <net/rtnetlink.h>
> > +#include <net/sock.h>
> > +#include <linux/virtio_net.h>
> > +
> > +#define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN |
> NETIF_F_TSO | \
> > + NETIF_F_TSO6 | NETIF_F_UFO)
> > +
> > +static dev_t ipvtap_major;
> > +static struct cdev ipvtap_cdev;
> > +
> > +static const void *ipvtap_net_namespace(struct device *d)
> > +{
> > + struct net_device *dev = to_net_dev(d->parent);
> > + return dev_net(dev);
> > +}
> > +
> > +static struct class ipvtap_class = {
> > + .name = "ipvtap",
> > + .owner = THIS_MODULE,
> > + .ns_type = &net_ns_type_operations,
> > + .namespace = ipvtap_net_namespace,
> > +};
> > +
> > +struct ipvtap_dev {
> > + struct ipvl_dev vlan;
> > + struct tap_dev tap;
> > +};
> > +
> > +static void ipvtap_count_tx_dropped(struct tap_dev *tap)
> > +{
> > + struct ipvl_dev *vlan = (struct ipvl_dev *)container_of(tap, struct
> ipvtap_dev, tap);
> > +
> > + this_cpu_inc(vlan->pcpu_stats->tx_drps);
> > +}
> > +
> > +static void ipvtap_count_rx_dropped(struct tap_dev *tap)
> > +{
> > + struct ipvl_dev *vlan = (struct ipvl_dev *)container_of(tap, struct
> ipvtap_dev, tap);
> > +
> > + ipvlan_count_rx(vlan, 0, 0, 0);
> > +}
> > +
> > +static void ipvtap_update_features(struct tap_dev *tap,
> > + netdev_features_t features)
> > +{
> > + struct ipvl_dev *vlan = (struct ipvl_dev *)container_of(tap, struct
> ipvtap_dev, tap);
> > +
> > + vlan->sfeatures = features;
> > + netdev_update_features(vlan->dev);
> > +}
> > +
> > +static int ipvtap_newlink(struct net *src_net,
> > + struct net_device *dev,
> > + struct nlattr *tb[],
> > + struct nlattr *data[])
> > +{
> > + struct ipvtap_dev *vlantap = netdev_priv(dev);
> > + int err;
> > +
> > + INIT_LIST_HEAD(&vlantap->tap.queue_list);
> > +
> > + /* Since macvlan supports all offloads by default, make
> > + * tap support all offloads also.
> > + */
> > + vlantap->tap.tap_features = TUN_OFFLOADS;
> > + vlantap->tap.count_tx_dropped = ipvtap_count_tx_dropped;
> > + vlantap->tap.update_features = ipvtap_update_features;
> > + vlantap->tap.count_rx_dropped = ipvtap_count_rx_dropped;
> > +
> > + err = netdev_rx_handler_register(dev, tap_handle_frame, &vlantap-
> >tap);
> > + if (err)
> > + return err;
> > +
> > + /* Don't put anything that may fail after macvlan_common_newlink
> > + * because we can't undo what it does.
> > + */
> > + err = ipvlan_link_new(src_net, dev, tb, data);
> > + if (err) {
> > + netdev_rx_handler_unregister(dev);
> > + return err;
> > + }
> > +
> > + vlantap->tap.dev = vlantap->vlan.dev;
> > +
> > + return err;
> > +}
> > +
> > +static void ipvtap_dellink(struct net_device *dev,
> > + struct list_head *head)
> > +{
> > + struct ipvtap_dev *vlan = netdev_priv(dev);
> > +
> > + netdev_rx_handler_unregister(dev);
> > + tap_del_queues(&vlan->tap);
> > + ipvlan_link_delete(dev, head);
> > +}
> > +
> > +static void ipvtap_setup(struct net_device *dev)
> > +{
> > + ipvlan_link_setup(dev);
> > + dev->tx_queue_len = TUN_READQ_SIZE;
> > + dev->priv_flags &= ~IFF_NO_QUEUE;
> > +}
> > +
> > +static struct rtnl_link_ops ipvtap_link_ops __read_mostly = {
> > + .kind = "ipvtap",
> > + .setup = ipvtap_setup,
> > + .newlink = ipvtap_newlink,
> > + .dellink = ipvtap_dellink,
> > + .priv_size = sizeof(struct ipvtap_dev),
> > +};
> > +
> > +static int ipvtap_device_event(struct notifier_block *unused,
> > + unsigned long event, void *ptr)
> > +{
> > + struct net_device *dev = netdev_notifier_info_to_dev(ptr);
> > + struct ipvtap_dev *vlantap;
> > + struct device *classdev;
> > + dev_t devt;
> > + int err;
> > + char tap_name[IFNAMSIZ];
> > +
> > + if (dev->rtnl_link_ops != &ipvtap_link_ops)
> > + return NOTIFY_DONE;
> > +
> > + snprintf(tap_name, IFNAMSIZ, "tap%d", dev->ifindex);
> > + vlantap = netdev_priv(dev);
> > +
> > + switch (event) {
> > + case NETDEV_REGISTER:
> > + /* Create the device node here after the network device has
> > + * been registered but before register_netdevice has
> > + * finished running.
> > + */
> > + err = tap_get_minor(ipvtap_major, &vlantap->tap);
> > + if (err)
> > + return notifier_from_errno(err);
> > +
> > + devt = MKDEV(MAJOR(ipvtap_major), vlantap->tap.minor);
> > + classdev = device_create(&ipvtap_class, &dev->dev, devt,
> > + dev, tap_name);
> > + if (IS_ERR(classdev)) {
> > + tap_free_minor(ipvtap_major, &vlantap->tap);
> > + return notifier_from_errno(PTR_ERR(classdev));
> > + }
> > + err = sysfs_create_link(&dev->dev.kobj, &classdev->kobj,
> > + tap_name);
> > + if (err)
> > + return notifier_from_errno(err);
> > + break;
> > + case NETDEV_UNREGISTER:
> > + /* vlan->minor == 0 if NETDEV_REGISTER above failed */
> > + if (vlantap->tap.minor == 0)
> > + break;
> > + sysfs_remove_link(&dev->dev.kobj, tap_name);
> > + devt = MKDEV(MAJOR(ipvtap_major), vlantap->tap.minor);
> > + device_destroy(&ipvtap_class, devt);
> > + tap_free_minor(ipvtap_major, &vlantap->tap);
> > + break;
> > + case NETDEV_CHANGE_TX_QUEUE_LEN:
> > + if (tap_queue_resize(&vlantap->tap))
> > + return NOTIFY_BAD;
> > + break;
> > + }
> > +
> > + return NOTIFY_DONE;
> > +}
> > +
> > +static struct notifier_block ipvtap_notifier_block __read_mostly = {
> > + .notifier_call = ipvtap_device_event,
> > +};
> > +
> > +static int ipvtap_init(void)
> > +{
> > + int err;
> > +
> > + err = tap_create_cdev(&ipvtap_cdev, &ipvtap_major, "ipvtap");
> > +
> > + if (err)
> > + goto out1;
> > +
> > + err = class_register(&ipvtap_class);
> > + if (err)
> > + goto out2;
> > +
> > + err = register_netdevice_notifier(&ipvtap_notifier_block);
> > + if (err)
> > + goto out3;
> > +
> > + err = ipvlan_link_register(&ipvtap_link_ops);
> > + if (err)
> > + goto out4;
> > +
> > + return 0;
> > +
> > +out4:
> > + unregister_netdevice_notifier(&ipvtap_notifier_block);
> > +out3:
> > + class_unregister(&ipvtap_class);
> > +out2:
> > + cdev_del(&ipvtap_cdev);
> > +out1:
> > + return err;
> > +}
> > +module_init(ipvtap_init);
> > +
> > +static void ipvtap_exit(void)
> > +{
> > + rtnl_link_unregister(&ipvtap_link_ops);
> > + unregister_netdevice_notifier(&ipvtap_notifier_block);
> > + class_unregister(&ipvtap_class);
> > + tap_destroy_cdev(ipvtap_major, &ipvtap_cdev);
> > +}
> > +module_exit(ipvtap_exit);
> > +MODULE_ALIAS_RTNL_LINK("ipvtap");
> > +MODULE_AUTHOR("Arnd Bergmann <arnd@xxxxxxxx>");
> > +MODULE_LICENSE("GPL");
> > --
> > 2.7.4
> >