]> rtime.felk.cvut.cz Git - can-eth-gw-linux.git/blobdiff - net/core/dev.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
[can-eth-gw-linux.git] / net / core / dev.c
index e5942bf45a6d9b7e7412a2100b7ef116610aab65..d0cbc93fcf32d94db2b909274dd60a9fe88cc80e 100644 (file)
 #define PTYPE_HASH_MASK        (PTYPE_HASH_SIZE - 1)
 
 static DEFINE_SPINLOCK(ptype_lock);
+static DEFINE_SPINLOCK(offload_lock);
 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
 static struct list_head ptype_all __read_mostly;       /* Taps */
+static struct list_head offload_base __read_mostly;
 
 /*
  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -201,6 +203,8 @@ static struct list_head ptype_all __read_mostly;    /* Taps */
 DEFINE_RWLOCK(dev_base_lock);
 EXPORT_SYMBOL(dev_base_lock);
 
+DEFINE_SEQLOCK(devnet_rename_seq);
+
 static inline void dev_base_seq_inc(struct net *net)
 {
        while (++net->dev_base_seq == 0);
@@ -470,6 +474,82 @@ void dev_remove_pack(struct packet_type *pt)
 }
 EXPORT_SYMBOL(dev_remove_pack);
 
+
+/**
+ *     dev_add_offload - register offload handlers
+ *     @po: protocol offload declaration
+ *
+ *     Add protocol offload handlers to the networking stack. The passed
+ *     &proto_offload is linked into kernel lists and may not be freed until
+ *     it has been removed from the kernel lists.
+ *
+ *     This call does not sleep therefore it can not
+ *     guarantee all CPU's that are in middle of receiving packets
+ *     will see the new offload handlers (until the next received packet).
+ */
+void dev_add_offload(struct packet_offload *po)
+{
+       struct list_head *head = &offload_base;
+
+       spin_lock(&offload_lock);
+       list_add_rcu(&po->list, head);
+       spin_unlock(&offload_lock);
+}
+EXPORT_SYMBOL(dev_add_offload);
+
+/**
+ *     __dev_remove_offload     - remove offload handler
+ *     @po: packet offload declaration
+ *
+ *     Remove a protocol offload handler that was previously added to the
+ *     kernel offload handlers by dev_add_offload(). The passed &offload_type
+ *     is removed from the kernel lists and can be freed or reused once this
+ *     function returns.
+ *
+ *      The packet type might still be in use by receivers
+ *     and must not be freed until after all the CPU's have gone
+ *     through a quiescent state.
+ */
+void __dev_remove_offload(struct packet_offload *po)
+{
+       struct list_head *head = &offload_base;
+       struct packet_offload *po1;
+
+       spin_lock(&offload_lock);
+
+       list_for_each_entry(po1, head, list) {
+               if (po == po1) {
+                       list_del_rcu(&po->list);
+                       goto out;
+               }
+       }
+
+       pr_warn("dev_remove_offload: %p not found\n", po);
+out:
+       spin_unlock(&offload_lock);
+}
+EXPORT_SYMBOL(__dev_remove_offload);
+
+/**
+ *     dev_remove_offload       - remove packet offload handler
+ *     @po: packet offload declaration
+ *
+ *     Remove a packet offload handler that was previously added to the kernel
+ *     offload handlers by dev_add_offload(). The passed &offload_type is
+ *     removed from the kernel lists and can be freed or reused once this
+ *     function returns.
+ *
+ *     This call sleeps to guarantee that no CPU is looking at the packet
+ *     type after return.
+ */
+void dev_remove_offload(struct packet_offload *po)
+{
+       __dev_remove_offload(po);
+
+       synchronize_net();
+}
+EXPORT_SYMBOL(dev_remove_offload);
+
 /******************************************************************************
 
                      Device Boot-time Settings Routines
@@ -1013,22 +1093,31 @@ int dev_change_name(struct net_device *dev, const char *newname)
        if (dev->flags & IFF_UP)
                return -EBUSY;
 
-       if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
+       write_seqlock(&devnet_rename_seq);
+
+       if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
+               write_sequnlock(&devnet_rename_seq);
                return 0;
+       }
 
        memcpy(oldname, dev->name, IFNAMSIZ);
 
        err = dev_get_valid_name(net, dev, newname);
-       if (err < 0)
+       if (err < 0) {
+               write_sequnlock(&devnet_rename_seq);
                return err;
+       }
 
 rollback:
        ret = device_rename(&dev->dev, dev->name);
        if (ret) {
                memcpy(dev->name, oldname, IFNAMSIZ);
+               write_sequnlock(&devnet_rename_seq);
                return ret;
        }
 
+       write_sequnlock(&devnet_rename_seq);
+
        write_lock_bh(&dev_base_lock);
        hlist_del_rcu(&dev->name_hlist);
        write_unlock_bh(&dev_base_lock);
@@ -1046,6 +1135,7 @@ rollback:
                /* err >= 0 after dev_alloc_name() or stores the first errno */
                if (err >= 0) {
                        err = ret;
+                       write_seqlock(&devnet_rename_seq);
                        memcpy(dev->name, oldname, IFNAMSIZ);
                        goto rollback;
                } else {
@@ -1075,10 +1165,8 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
                return -EINVAL;
 
        if (!len) {
-               if (dev->ifalias) {
-                       kfree(dev->ifalias);
-                       dev->ifalias = NULL;
-               }
+               kfree(dev->ifalias);
+               dev->ifalias = NULL;
                return 0;
        }
 
@@ -1994,7 +2082,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
        netdev_features_t features)
 {
        struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
-       struct packet_type *ptype;
+       struct packet_offload *ptype;
        __be16 type = skb->protocol;
        int vlan_depth = ETH_HLEN;
        int err;
@@ -2023,18 +2111,17 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
        }
 
        rcu_read_lock();
-       list_for_each_entry_rcu(ptype,
-                       &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
-               if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
+       list_for_each_entry_rcu(ptype, &offload_base, list) {
+               if (ptype->type == type && ptype->callbacks.gso_segment) {
                        if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
-                               err = ptype->gso_send_check(skb);
+                               err = ptype->callbacks.gso_send_check(skb);
                                segs = ERR_PTR(err);
                                if (err || skb_gso_ok(skb, features))
                                        break;
                                __skb_push(skb, (skb->data -
                                                 skb_network_header(skb)));
                        }
-                       segs = ptype->gso_segment(skb, features);
+                       segs = ptype->callbacks.gso_segment(skb, features);
                        break;
                }
        }
@@ -2237,6 +2324,13 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
                        skb->vlan_tci = 0;
                }
 
+               /* If encapsulation offload request, verify we are testing
+                * hardware encapsulation features instead of standard
+                * features for the netdev
+                */
+               if (skb->encapsulation)
+                       features &= dev->hw_enc_features;
+
                if (netif_needs_gso(skb, features)) {
                        if (unlikely(dev_gso_segment(skb, features)))
                                goto out_kfree_skb;
@@ -2252,8 +2346,12 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
                         * checksumming here.
                         */
                        if (skb->ip_summed == CHECKSUM_PARTIAL) {
-                               skb_set_transport_header(skb,
-                                       skb_checksum_start_offset(skb));
+                               if (skb->encapsulation)
+                                       skb_set_inner_transport_header(skb,
+                                               skb_checksum_start_offset(skb));
+                               else
+                                       skb_set_transport_header(skb,
+                                               skb_checksum_start_offset(skb));
                                if (!(features & NETIF_F_ALL_CSUM) &&
                                     skb_checksum_help(skb))
                                        goto out_kfree_skb;
@@ -3446,9 +3544,9 @@ static void flush_backlog(void *arg)
 
 static int napi_gro_complete(struct sk_buff *skb)
 {
-       struct packet_type *ptype;
+       struct packet_offload *ptype;
        __be16 type = skb->protocol;
-       struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+       struct list_head *head = &offload_base;
        int err = -ENOENT;
 
        BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
@@ -3460,10 +3558,10 @@ static int napi_gro_complete(struct sk_buff *skb)
 
        rcu_read_lock();
        list_for_each_entry_rcu(ptype, head, list) {
-               if (ptype->type != type || ptype->dev || !ptype->gro_complete)
+               if (ptype->type != type || !ptype->callbacks.gro_complete)
                        continue;
 
-               err = ptype->gro_complete(skb);
+               err = ptype->callbacks.gro_complete(skb);
                break;
        }
        rcu_read_unlock();
@@ -3507,12 +3605,34 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old)
 }
 EXPORT_SYMBOL(napi_gro_flush);
 
-enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
+{
+       struct sk_buff *p;
+       unsigned int maclen = skb->dev->hard_header_len;
+
+       for (p = napi->gro_list; p; p = p->next) {
+               unsigned long diffs;
+
+               diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
+               diffs |= p->vlan_tci ^ skb->vlan_tci;
+               if (maclen == ETH_HLEN)
+                       diffs |= compare_ether_header(skb_mac_header(p),
+                                                     skb_gro_mac_header(skb));
+               else if (!diffs)
+                       diffs = memcmp(skb_mac_header(p),
+                                      skb_gro_mac_header(skb),
+                                      maclen);
+               NAPI_GRO_CB(p)->same_flow = !diffs;
+               NAPI_GRO_CB(p)->flush = 0;
+       }
+}
+
+static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
        struct sk_buff **pp = NULL;
-       struct packet_type *ptype;
+       struct packet_offload *ptype;
        __be16 type = skb->protocol;
-       struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+       struct list_head *head = &offload_base;
        int same_flow;
        int mac_len;
        enum gro_result ret;
@@ -3523,9 +3643,11 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
        if (skb_is_gso(skb) || skb_has_frag_list(skb))
                goto normal;
 
+       gro_list_prepare(napi, skb);
+
        rcu_read_lock();
        list_for_each_entry_rcu(ptype, head, list) {
-               if (ptype->type != type || ptype->dev || !ptype->gro_receive)
+               if (ptype->type != type || !ptype->callbacks.gro_receive)
                        continue;
 
                skb_set_network_header(skb, skb_gro_offset(skb));
@@ -3535,7 +3657,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
                NAPI_GRO_CB(skb)->flush = 0;
                NAPI_GRO_CB(skb)->free = 0;
 
-               pp = ptype->gro_receive(&napi->gro_list, skb);
+               pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
                break;
        }
        rcu_read_unlock();
@@ -3598,34 +3720,9 @@ normal:
        ret = GRO_NORMAL;
        goto pull;
 }
-EXPORT_SYMBOL(dev_gro_receive);
-
-static inline gro_result_t
-__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
-{
-       struct sk_buff *p;
-       unsigned int maclen = skb->dev->hard_header_len;
-
-       for (p = napi->gro_list; p; p = p->next) {
-               unsigned long diffs;
-
-               diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
-               diffs |= p->vlan_tci ^ skb->vlan_tci;
-               if (maclen == ETH_HLEN)
-                       diffs |= compare_ether_header(skb_mac_header(p),
-                                                     skb_gro_mac_header(skb));
-               else if (!diffs)
-                       diffs = memcmp(skb_mac_header(p),
-                                      skb_gro_mac_header(skb),
-                                      maclen);
-               NAPI_GRO_CB(p)->same_flow = !diffs;
-               NAPI_GRO_CB(p)->flush = 0;
-       }
 
-       return dev_gro_receive(napi, skb);
-}
 
-gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
+static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
 {
        switch (ret) {
        case GRO_NORMAL:
@@ -3651,7 +3748,6 @@ gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
 
        return ret;
 }
-EXPORT_SYMBOL(napi_skb_finish);
 
 static void skb_gro_reset_offset(struct sk_buff *skb)
 {
@@ -3674,7 +3770,7 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
        skb_gro_reset_offset(skb);
 
-       return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
+       return napi_skb_finish(dev_gro_receive(napi, skb), skb);
 }
 EXPORT_SYMBOL(napi_gro_receive);
 
@@ -3703,7 +3799,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(napi_get_frags);
 
-gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
+static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
                               gro_result_t ret)
 {
        switch (ret) {
@@ -3728,7 +3824,6 @@ gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
 
        return ret;
 }
-EXPORT_SYMBOL(napi_frags_finish);
 
 static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
 {
@@ -3773,7 +3868,7 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
        if (!skb)
                return GRO_DROP;
 
-       return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
+       return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
 }
 EXPORT_SYMBOL(napi_gro_frags);
 
@@ -4075,6 +4170,7 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
 {
        struct net_device *dev;
        struct ifreq ifr;
+       unsigned seq;
 
        /*
         *      Fetch the caller's info block.
@@ -4083,6 +4179,8 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
        if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
                return -EFAULT;
 
+retry:
+       seq = read_seqbegin(&devnet_rename_seq);
        rcu_read_lock();
        dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
        if (!dev) {
@@ -4092,6 +4190,8 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
 
        strcpy(ifr.ifr_name, dev->name);
        rcu_read_unlock();
+       if (read_seqretry(&devnet_rename_seq, seq))
+               goto retry;
 
        if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
                return -EFAULT;
@@ -4884,7 +4984,7 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
        else
                dev->mtu = new_mtu;
 
-       if (!err && dev->flags & IFF_UP)
+       if (!err)
                call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
        return err;
 }
@@ -5204,7 +5304,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
        case SIOCGMIIPHY:
        case SIOCGMIIREG:
        case SIOCSIFNAME:
-               if (!capable(CAP_NET_ADMIN))
+               if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
                        return -EPERM;
                dev_load(net, ifr.ifr_name);
                rtnl_lock();
@@ -5225,16 +5325,25 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
         *      - require strict serialization.
         *      - do not return a value
         */
+       case SIOCSIFMAP:
+       case SIOCSIFTXQLEN:
+               if (!capable(CAP_NET_ADMIN))
+                       return -EPERM;
+               /* fall through */
+       /*
+        *      These ioctl calls:
+        *      - require local superuser power.
+        *      - require strict serialization.
+        *      - do not return a value
+        */
        case SIOCSIFFLAGS:
        case SIOCSIFMETRIC:
        case SIOCSIFMTU:
-       case SIOCSIFMAP:
        case SIOCSIFHWADDR:
        case SIOCSIFSLAVE:
        case SIOCADDMULTI:
        case SIOCDELMULTI:
        case SIOCSIFHWBROADCAST:
-       case SIOCSIFTXQLEN:
        case SIOCSMIIREG:
        case SIOCBONDENSLAVE:
        case SIOCBONDRELEASE:
@@ -5243,7 +5352,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
        case SIOCBRADDIF:
        case SIOCBRDELIF:
        case SIOCSHWTSTAMP:
-               if (!capable(CAP_NET_ADMIN))
+               if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
                        return -EPERM;
                /* fall through */
        case SIOCBONDSLAVEINFOQUERY:
@@ -6268,7 +6377,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
                goto out;
 
        /* Ensure the device has been registrered */
-       err = -EINVAL;
        if (dev->reg_state != NETREG_REGISTERED)
                goto out;
 
@@ -6323,6 +6431,9 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
        dev_uc_flush(dev);
        dev_mc_flush(dev);
 
+       /* Send a netdev-removed uevent to the old namespace */
+       kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
+
        /* Actually switch the network namespace */
        dev_net_set(dev, net);
 
@@ -6334,6 +6445,9 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
                        dev->iflink = dev->ifindex;
        }
 
+       /* Send a netdev-add uevent to the new namespace */
+       kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
+
        /* Fixup kobjects */
        err = device_rename(&dev->dev, dev->name);
        WARN_ON(err);
@@ -6666,6 +6780,8 @@ static int __init net_dev_init(void)
        for (i = 0; i < PTYPE_HASH_SIZE; i++)
                INIT_LIST_HEAD(&ptype_base[i]);
 
+       INIT_LIST_HEAD(&offload_base);
+
        if (register_pernet_subsys(&netdev_net_ops))
                goto out;