From: Michal Sojka Date: Sat, 4 Dec 2010 12:20:25 +0000 (+0100) Subject: Document ENOBUFS problem X-Git-Tag: fix-allnoconfig~253 X-Git-Url: http://rtime.felk.cvut.cz/gitweb/can-benchmark.git/commitdiff_plain/5614c1ec1b674755ca75b46b53389aad715bfd6b Document ENOBUFS problem --- diff --git a/doc/notes.org b/doc/notes.org index 08038b6..254ada5 100644 --- a/doc/notes.org +++ b/doc/notes.org @@ -81,3 +81,231 @@ Call Trace: [c03f3fc0] [c039f918] start_kernel+0x2e4/0x2fc [c03f3ff0] [00003438] 0x3438 Rebooting in 180 seconds.. + +* write returns ENOBUFS +The reason is queuing discipline - the queue is full and +pfifo_fast_enqueue returns failure. This failure is propagated down to +qdisc_enqueue_root(), __dev_xmit_skb(), dev_queue_xmit() and +can_send() + +** See the follwing trace + +# tracer: function_graph +# +# CPU DURATION FUNCTION CALLS +# | | | | | | | + 1) | can_send() { + 1) | dev_queue_xmit() { + 1) | rt_spin_lock() { + 1) 0.894 us | __might_sleep(); + 1) 2.750 us | } + 1) | pfifo_fast_enqueue() { + 1) | kfree_skb() { + 1) | __kfree_skb() { + 1) | skb_release_head_state() { + 1) | sock_wfree() { + 1) | sock_def_write_space() { + 1) | rt_read_lock() { + 1) | __rt_spin_lock() { + 1) 0.677 us | __might_sleep(); + 1) 2.386 us | } + 1) 4.150 us | } + 1) | rt_read_unlock() { + 1) 0.918 us | __rt_spin_unlock(); + 1) 2.644 us | } + 1) 9.375 us | } + 1) + 10.999 us | } + 1) + 12.708 us | } + 1) | skb_release_data() { + 1) | kfree() { + 1) | _slab_irq_disable() { + 1) | rt_spin_lock() { + 1) 0.849 us | __might_sleep(); + 1) 2.588 us | } + 1) 4.242 us | } + 1) 0.864 us | __cache_free(); + 1) 0.888 us | rt_spin_unlock(); + 1) 9.456 us | } + 1) + 11.185 us | } + 1) | kmem_cache_free() { + 1) | _slab_irq_disable() { + 1) | rt_spin_lock() { + 1) 0.812 us | __might_sleep(); + 1) 2.542 us | } + 1) 4.240 us | } + 1) 0.813 us | __cache_free(); + 1) 0.894 us | rt_spin_unlock(); + 1) 9.329 us | } + 1) + 36.444 us | } + 1) + 38.136 us | } + 1) + 39.974 us | } + 1) | /* qdisc_enqueue_root = 1 */ + 1) | __qdisc_run() { + 1) | sch_direct_xmit() { + 1) 0.855 us | rt_spin_unlock(); + 1) | rt_spin_lock() { + 1) 0.827 us | __might_sleep(); + 1) 2.521 us | } + 1) | dev_hard_start_xmit() { + 1) | sja1000_start_xmit() { + 1) 1.646 us | kvaser_pci_write_reg(); + 1) 1.460 us | kvaser_pci_write_reg(); + 1) 1.691 us | kvaser_pci_write_reg(); + 1) 1.450 us | kvaser_pci_write_reg(); + 1) 1.416 us | kvaser_pci_write_reg(); + 1) | can_put_echo_skb() { + 1) | sock_wfree() { + 1) | sock_def_write_space() { + 1) | rt_read_lock() { + 1) | __rt_spin_lock() { + 1) 0.939 us | __might_sleep(); + 1) 2.699 us | } + 1) 4.337 us | } + 1) | rt_read_unlock() { + 1) 0.844 us | __rt_spin_unlock(); + 1) 2.507 us | } + 1) 9.242 us | } + 1) + 11.047 us | } + 1) + 12.791 us | } + 1) | sja1000_write_cmdreg() { + 1) | rt_spin_lock() { + 1) 0.802 us | __might_sleep(); + 1) 2.532 us | } + 1) 1.481 us | kvaser_pci_write_reg(); + 1) 2.155 us | kvaser_pci_read_reg(); + 1) 0.843 us | rt_spin_unlock(); + 1) + 11.100 us | } + 1) + 38.263 us | } + 1) | /* ops->ndo_start_xmit(skb, dev) = 0 */ + 1) + 41.508 us | } + 1) | /* dev_hard_start_xmit(skb, dev) = 0 */ + 1) 0.884 us | rt_spin_unlock(); + 1) | rt_spin_lock() { + 1) 0.817 us | __might_sleep(); + 1) 2.583 us | } + 1) | /* qdisc_len = 9 */ + 1) | /* sch_direct_xmit returns 0 */ + 1) + 57.502 us | } + 1) + 59.288 us | } + 1) 0.877 us | rt_spin_unlock(); + 1) | /* __dev_xmit_skb = 1 */ + 1) ! 111.265 us | } + 1) | /* dev_queue_xmit = 1 */ + 1) | /* net_xmit_errno = -105 */ + 1) 0.956 us | kfree_skb(); + 1) ! 117.340 us | } + 1) | /* write returned -1 */ + +** The trace was generated by kernel v2.6.33.7-rt29-4-g92487e4 with the following patch applied: + +diff --git a/net/can/af_can.c b/net/can/af_can.c +index 4d8479d7..c91968d 100644 +--- a/net/can/af_can.c ++++ b/net/can/af_can.c +@@ -344,8 +344,11 @@ int can_send(struct sk_buff *skb, int loop) + + /* send to netdevice */ + err = dev_queue_xmit(skb); +- if (err > 0) ++ trace_printk("dev_queue_xmit = %d\n", err); ++ if (err > 0) { + err = net_xmit_errno(err); ++ trace_printk("net_xmit_errno = %d\n", err); ++ } + + if (err) { + #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) +diff --git a/net/core/dev.c b/net/core/dev.c +index 7a5412e..10ca9b4 100644 +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -1828,6 +1828,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, + skb_dst_drop(skb); + + rc = ops->ndo_start_xmit(skb, dev); ++ trace_printk("ops->ndo_start_xmit(skb, dev) = %d\n", rc); ++ + if (rc == NETDEV_TX_OK) + txq_trans_update(txq); + /* +@@ -1854,6 +1856,7 @@ gso: + skb->next = nskb->next; + nskb->next = NULL; + rc = ops->ndo_start_xmit(nskb, dev); ++ trace_printk("gso: ops->ndo_start_xmit(skb, dev) = %d\n", rc); + if (unlikely(rc != NETDEV_TX_OK)) { + if (rc & ~NETDEV_TX_MASK) + goto out_kfree_gso_skb; +@@ -1949,6 +1952,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, + spin_lock(root_lock); + if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { + kfree_skb(skb); ++ trace_printk("QDISC_STATE_DEACTIVATED\n"); ++ + rc = NET_XMIT_DROP; + } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && + !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) { +@@ -1966,6 +1971,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, + rc = NET_XMIT_SUCCESS; + } else { + rc = qdisc_enqueue_root(skb, q); ++ trace_printk("qdisc_enqueue_root = %d\n", rc); + qdisc_run(q); + } + spin_unlock(root_lock); +@@ -2047,6 +2053,7 @@ gso: + #endif + if (q->enqueue) { + rc = __dev_xmit_skb(skb, q, dev, txq); ++ trace_printk("__dev_xmit_skb = %d\n", rc); + goto out; + } + +@@ -2072,6 +2079,8 @@ gso: + + if (!netif_tx_queue_stopped(txq)) { + rc = dev_hard_start_xmit(skb, dev, txq); ++ trace_printk("dev_hard_start_xmit = %d\n", rc); ++ + if (dev_xmit_complete(rc)) { + HARD_TX_UNLOCK(dev, txq); + goto out; +diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c +index bf7f50e..aea0833 100644 +--- a/net/sched/sch_generic.c ++++ b/net/sched/sch_generic.c +@@ -125,6 +125,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, + HARD_TX_LOCK(dev, txq); + if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) + ret = dev_hard_start_xmit(skb, dev, txq); ++ trace_printk("dev_hard_start_xmit(skb, dev) = %d\n", ret); + + HARD_TX_UNLOCK(dev, txq); + +@@ -133,9 +134,11 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, + if (dev_xmit_complete(ret)) { + /* Driver sent out skb successfully or skb was consumed */ + ret = qdisc_qlen(q); ++ trace_printk("qdisc_len = %d\n", ret); + } else if (ret == NETDEV_TX_LOCKED) { + /* Driver try lock failed */ + ret = handle_dev_cpu_collision(skb, txq, q); ++ trace_printk("handle_dev_cpu_collision = %d\n", ret); + } else { + /* Driver returned NETDEV_TX_BUSY - requeue skb */ + if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) +@@ -143,12 +146,13 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, + dev->name, ret, q->q.qlen); + + ret = dev_requeue_skb(skb, q); ++ trace_printk("dev_requeue_skb = %d\n", ret); + } + + if (ret && (netif_tx_queue_stopped(txq) || + netif_tx_queue_frozen(txq))) + ret = 0; +- ++ trace_printk("%s returns %d\n", __FUNCTION__, ret); + return ret; + } +