[PATCH 5/6] myri10ge - Second half of the driver

From: Brice Goglin
Date: Wed May 10 2006 - 17:42:23 EST


[PATCH 5/6] myri10ge - Second half of the driver

The second half of the myri10ge driver core.

Signed-off-by: Brice Goglin <brice@xxxxxxxx>
Signed-off-by: Andrew J. Gallatin <gallatin@xxxxxxxx>

myri10ge.c | 1540 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 1540 insertions(+)

--- linux/drivers/net/myri10ge/myri10ge.c.old 2006-05-09 23:00:54.000000000 +0200
+++ linux/drivers/net/myri10ge/myri10ge.c 2006-05-09 23:00:54.000000000 +0200
@@ -1481,3 +1481,1543 @@ static struct ethtool_ops myri10ge_ethto
.get_stats_count = myri10ge_get_stats_count,
.get_ethtool_stats = myri10ge_get_ethtool_stats
};
+
+static int
+myri10ge_open(struct net_device *dev)
+{
+ struct myri10ge_priv *mgp;
+ size_t bytes;
+ myri10ge_cmd_t cmd;
+ int tx_ring_size, rx_ring_size;
+ int tx_ring_entries, rx_ring_entries;
+ int i, status, big_pow2;
+
+ mgp = dev->priv;
+
+ if (mgp->running != MYRI10GE_ETH_STOPPED)
+ return -EBUSY;
+
+ mgp->running = MYRI10GE_ETH_STARTING;
+ status = myri10ge_reset(mgp);
+ if (status != 0) {
+ printk(KERN_ERR "myri10ge: %s: failed reset\n", dev->name);
+ mgp->running = MYRI10GE_ETH_STOPPED;
+ return -ENXIO;
+ }
+
+ /* decide what small buffer size to use. For good TCP rx
+ * performance, it is important to not receive 1514 byte
+ * frames into jumbo buffers, as it confuses the socket buffer
+ * accounting code, leading to drops and erratic performance.
+ */
+
+ if (dev->mtu <= ETH_DATA_LEN) {
+ mgp->small_bytes = 128; /* enough for a TCP header */
+ } else {
+ mgp->small_bytes = ETH_FRAME_LEN; /* enough for an ETH_DATA_LEN frame */
+ }
+ /* Override the small buffer size? */
+ if (myri10ge_small_bytes > 0) {
+ mgp->small_bytes = myri10ge_small_bytes;
+ }
+
+ /* If the user sets an obscenely small MTU, adjust the small
+ * bytes down to nearly nothing */
+ if (mgp->small_bytes >= (dev->mtu + ETH_HLEN))
+ mgp->small_bytes = 64;
+
+ /* get ring sizes */
+ status = myri10ge_send_cmd(mgp, MYRI10GE_MCP_CMD_GET_SEND_RING_SIZE, &cmd);
+ tx_ring_size = cmd.data0;
+ status |= myri10ge_send_cmd(mgp, MYRI10GE_MCP_CMD_GET_RX_RING_SIZE, &cmd);
+ rx_ring_size = cmd.data0;
+
+ /* get the lanai pointers to the send and receive rings */
+
+ status = myri10ge_send_cmd(mgp, MYRI10GE_MCP_CMD_GET_SEND_OFFSET, &cmd);
+ mgp->tx.lanai = (mcp_kreq_ether_send_t __iomem *) (mgp->sram + cmd.data0);
+
+
+ status |= myri10ge_send_cmd(mgp, MYRI10GE_MCP_CMD_GET_SMALL_RX_OFFSET, &cmd);
+ mgp->rx_small.lanai = (mcp_kreq_ether_recv_t __iomem *) (mgp->sram + cmd.data0);
+
+ status |= myri10ge_send_cmd(mgp, MYRI10GE_MCP_CMD_GET_BIG_RX_OFFSET, &cmd);
+ mgp->rx_big.lanai = (mcp_kreq_ether_recv_t __iomem *) (mgp->sram + cmd.data0);
+
+ if (status != 0) {
+ printk(KERN_ERR "myri10ge: %s: failed to get ring sizes or locations\n",
+ dev->name);
+ mgp->running = MYRI10GE_ETH_STOPPED;
+ return -ENXIO;
+ }
+
+ if (mgp->mtrr >= 0) {
+ mgp->tx.wc_fifo = mgp->sram + 0x200000;
+ mgp->rx_small.wc_fifo = mgp->sram + 0x300000;
+ mgp->rx_big.wc_fifo = mgp->sram + 0x340000;
+ } else {
+ mgp->tx.wc_fifo = NULL;
+ mgp->rx_small.wc_fifo = NULL;
+ mgp->rx_big.wc_fifo = NULL;
+ }
+
+ tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
+ rx_ring_entries = rx_ring_size / sizeof (mcp_dma_addr_t);
+ mgp->tx.mask = tx_ring_entries - 1;
+ mgp->rx_small.mask = mgp->rx_big.mask = rx_ring_entries - 1;
+
+ /* allocate the host shadow rings */
+
+ bytes = 8 + (MYRI10GE_MCP_ETHER_MAX_SEND_DESC_TSO + 4)
+ * sizeof (*mgp->tx.req_list);
+ mgp->tx.req_bytes = kmalloc(bytes, GFP_KERNEL);
+ if (mgp->tx.req_bytes == NULL)
+ goto abort_with_nothing;
+ memset(mgp->tx.req_bytes, 0, bytes);
+
+ /* ensure req_list entries are aligned to 8 bytes */
+ mgp->tx.req_list = (mcp_kreq_ether_send_t *)
+ ((unsigned long)(mgp->tx.req_bytes + 7) & ~7UL);
+
+ bytes = rx_ring_entries * sizeof (*mgp->rx_small.shadow);
+ mgp->rx_small.shadow = kmalloc(bytes, GFP_KERNEL);
+ if (mgp->rx_small.shadow == NULL)
+ goto abort_with_tx_req_bytes;
+ memset(mgp->rx_small.shadow, 0, bytes);
+
+ bytes = rx_ring_entries * sizeof (*mgp->rx_big.shadow);
+ mgp->rx_big.shadow = kmalloc(bytes, GFP_KERNEL);
+ if (mgp->rx_big.shadow == NULL)
+ goto abort_with_rx_small_shadow;
+ memset(mgp->rx_big.shadow, 0, bytes);
+
+ /* allocate the host info rings */
+
+ bytes = tx_ring_entries * sizeof (*mgp->tx.info);
+ mgp->tx.info = kmalloc(bytes, GFP_KERNEL);
+ if (mgp->tx.info == NULL)
+ goto abort_with_rx_big_shadow;
+ memset(mgp->tx.info, 0, bytes);
+
+ bytes = rx_ring_entries * sizeof (*mgp->rx_small.info);
+ mgp->rx_small.info = kmalloc(bytes, GFP_KERNEL);
+ if (mgp->rx_small.info == NULL)
+ goto abort_with_tx_info;
+ memset(mgp->rx_small.info, 0, bytes);
+
+ bytes = rx_ring_entries * sizeof (*mgp->rx_big.info);
+ mgp->rx_big.info = kmalloc(bytes, GFP_KERNEL);
+ if (mgp->rx_big.info == NULL)
+ goto abort_with_rx_small_info;
+ memset(mgp->rx_big.info, 0, bytes);
+
+ /* Fill the receive rings */
+ for (i = 0; i <= mgp->rx_small.mask; i++) {
+ status = myri10ge_getbuf(&mgp->rx_small, mgp->pdev,
+ mgp->small_bytes, i);
+ if (status) {
+ printk(KERN_ERR "myri10ge: %s: alloced only %d small bufs\n",
+ dev->name, i);
+ goto abort_with_rx_small_ring;
+ }
+ }
+
+ for (i = 0; i <= mgp->rx_big.mask; i++) {
+ status = myri10ge_getbuf(&mgp->rx_big, mgp->pdev, dev->mtu + ETH_HLEN, i);
+ if (status) {
+ printk(KERN_ERR "myri10ge: %s: alloced only %d big bufs\n",
+ dev->name, i);
+ goto abort_with_rx_big_ring;
+ }
+ }
+
+ /* Firmware needs the big buff size as a power of 2. Lie and
+ * tell him the buffer is larger, because we only use 1
+ * buffer/pkt, and the mtu will prevent overruns.
+ */
+ big_pow2 = dev->mtu + ETH_HLEN + MYRI10GE_MCP_ETHER_PAD;
+ while ((big_pow2 & (big_pow2 - 1)) != 0)
+ big_pow2++;
+
+ /* now give firmware buffers sizes, and MTU */
+ cmd.data0 = dev->mtu + ETH_HLEN + VLAN_HLEN;
+ status = myri10ge_send_cmd(mgp, MYRI10GE_MCP_CMD_SET_MTU, &cmd);
+ cmd.data0 = mgp->small_bytes;
+ status |= myri10ge_send_cmd(mgp, MYRI10GE_MCP_CMD_SET_SMALL_BUFFER_SIZE, &cmd);
+ cmd.data0 = big_pow2;
+ status |= myri10ge_send_cmd(mgp, MYRI10GE_MCP_CMD_SET_BIG_BUFFER_SIZE, &cmd);
+ if (status) {
+ printk(KERN_ERR "myri10ge: %s: Couldn't set buffer sizes\n",
+ dev->name);
+ goto abort_with_rx_big_ring;
+ }
+
+ cmd.data0 = MYRI10GE_LOWPART_TO_U32(mgp->fw_stats_bus);
+ cmd.data1 = MYRI10GE_HIGHPART_TO_U32(mgp->fw_stats_bus);
+ status = myri10ge_send_cmd(mgp, MYRI10GE_MCP_CMD_SET_STATS_DMA, &cmd);
+ if (status) {
+ printk(KERN_ERR "myri10ge: %s: Couldn't set stats DMA\n",
+ dev->name);
+ goto abort_with_rx_big_ring;
+ }
+
+ mgp->link_state = -1;
+ mgp->rdma_tags_available = 15;
+
+ if (myri10ge_napi) /* must happen prior to any irq */
+ netif_poll_enable(mgp->dev);
+
+ status = myri10ge_send_cmd(mgp, MYRI10GE_MCP_CMD_ETHERNET_UP, &cmd);
+ if (status) {
+ printk(KERN_ERR "myri10ge: %s: Couldn't bring up link\n",
+ dev->name);
+ goto abort_with_rx_big_ring;
+ }
+
+ mgp->wake_queue = 0;
+ mgp->stop_queue = 0;
+ mgp->running = MYRI10GE_ETH_RUNNING;
+ mgp->watchdog_timer.expires =
+ jiffies + myri10ge_watchdog_timeout * HZ;
+ add_timer(&mgp->watchdog_timer);
+ netif_wake_queue(dev);
+ return 0;
+
+abort_with_rx_big_ring:
+ for (i = 0; i <= mgp->rx_big.mask; i++) {
+ if (mgp->rx_big.info[i].skb != NULL)
+ dev_kfree_skb_any(mgp->rx_big.info[i].skb);
+ if (pci_unmap_len(&mgp->rx_big.info[i], len)) {
+ pci_unmap_single(mgp->pdev,
+ pci_unmap_addr(&mgp->rx_big.info[i], bus),
+ pci_unmap_len(&mgp->rx_big.info[i], len),
+ PCI_DMA_FROMDEVICE);
+ }
+ }
+
+abort_with_rx_small_ring:
+ for (i = 0; i <= mgp->rx_small.mask; i++) {
+ if (mgp->rx_small.info[i].skb != NULL)
+ dev_kfree_skb_any(mgp->rx_small.info[i].skb);
+ if (pci_unmap_len(&mgp->rx_small.info[i], len)) {
+ pci_unmap_single(mgp->pdev,
+ pci_unmap_addr(&mgp->rx_small.info[i], bus),
+ pci_unmap_len(&mgp->rx_small.info[i], len),
+ PCI_DMA_FROMDEVICE);
+ }
+ }
+ kfree(mgp->rx_big.info);
+
+abort_with_rx_small_info:
+ kfree(mgp->rx_small.info);
+
+abort_with_tx_info:
+ kfree(mgp->tx.info);
+
+abort_with_rx_big_shadow:
+ kfree(mgp->rx_big.shadow);
+
+abort_with_rx_small_shadow:
+ kfree(mgp->rx_small.shadow);
+
+abort_with_tx_req_bytes:
+ kfree(mgp->tx.req_bytes);
+ mgp->tx.req_bytes = NULL;
+ mgp->tx.req_list = NULL;
+
+abort_with_nothing:
+ mgp->running = MYRI10GE_ETH_STOPPED;
+ return -ENOMEM;
+
+}
+static int
+myri10ge_close(struct net_device *dev)
+{
+ struct myri10ge_priv *mgp;
+ struct sk_buff *skb;
+ myri10ge_tx_buf_t *tx;
+ int status, i, old_down_cnt, len, idx;
+ myri10ge_cmd_t cmd;
+
+ mgp = dev->priv;
+
+ if (mgp->running != MYRI10GE_ETH_RUNNING)
+ return 0;
+
+ if (mgp->tx.req_bytes == NULL)
+ return 0;
+
+ del_timer_sync(&mgp->watchdog_timer);
+ mgp->running = MYRI10GE_ETH_STOPPING;
+ if (myri10ge_napi)
+ netif_poll_disable(mgp->dev);
+ netif_carrier_off(dev);
+ netif_stop_queue(dev);
+ old_down_cnt = mgp->down_cnt;
+ mb();
+ status = myri10ge_send_cmd(mgp, MYRI10GE_MCP_CMD_ETHERNET_DOWN, &cmd);
+ if (status) {
+ printk(KERN_ERR "myri10ge: %s: Couldn't bring down link\n",
+ dev->name);
+ }
+ set_current_state (TASK_UNINTERRUPTIBLE);
+ if (old_down_cnt == mgp->down_cnt)
+ schedule_timeout(HZ);
+ set_current_state(TASK_RUNNING);
+ if (old_down_cnt == mgp->down_cnt) {
+ printk(KERN_ERR "myri10ge: %s never got down irq\n",
+ dev->name);
+ }
+ netif_tx_disable(dev);
+
+ for (i = 0; i <= mgp->rx_big.mask; i++) {
+ if (mgp->rx_big.info[i].skb != NULL)
+ dev_kfree_skb_any(mgp->rx_big.info[i].skb);
+ if (pci_unmap_len(&mgp->rx_big.info[i], len)) {
+ pci_unmap_single(mgp->pdev,
+ pci_unmap_addr(&mgp->rx_big.info[i], bus),
+ pci_unmap_len(&mgp->rx_big.info[i], len),
+ PCI_DMA_FROMDEVICE);
+ }
+ }
+
+ for (i = 0; i <= mgp->rx_small.mask; i++) {
+ if (mgp->rx_small.info[i].skb != NULL)
+ dev_kfree_skb_any(mgp->rx_small.info[i].skb);
+ if (pci_unmap_len(&mgp->rx_small.info[i], len)) {
+ pci_unmap_single(mgp->pdev,
+ pci_unmap_addr(&mgp->rx_small.info[i], bus),
+ pci_unmap_len(&mgp->rx_small.info[i], len),
+ PCI_DMA_FROMDEVICE);
+ }
+ }
+
+ tx = &mgp->tx;
+ while (tx->done != tx->req) {
+ idx = tx->done & tx->mask;
+ skb = tx->info[idx].skb;
+
+ /* Mark as free */
+ tx->info[idx].skb = NULL;
+ tx->done++;
+ len = pci_unmap_len(&tx->info[idx], len);
+ pci_unmap_len_set(&tx->info[idx], len, 0);
+ if (skb) {
+ mgp->stats.tx_dropped++;
+ dev_kfree_skb_any(skb);
+ if (len)
+ pci_unmap_single(mgp->pdev,
+ pci_unmap_addr(&tx->info[idx], bus),
+ len, PCI_DMA_TODEVICE);
+ } else {
+ if (len)
+ pci_unmap_page(mgp->pdev,
+ pci_unmap_addr(&tx->info[idx], bus),
+ len, PCI_DMA_TODEVICE);
+ }
+ }
+ kfree(mgp->rx_big.info);
+
+ kfree(mgp->rx_small.info);
+
+ kfree(mgp->tx.info);
+
+ kfree(mgp->rx_big.shadow);
+
+ kfree(mgp->rx_small.shadow);
+
+ kfree(mgp->tx.req_bytes);
+ mgp->tx.req_bytes = NULL;
+ mgp->tx.req_list = NULL;
+ mgp->running = MYRI10GE_ETH_STOPPED;
+ return 0;
+}
+
+/* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
+ * backwards one at a time and handle ring wraps */
+
+static inline void
+myri10ge_submit_req_backwards(myri10ge_tx_buf_t *tx,
+ mcp_kreq_ether_send_t *src, int cnt)
+{
+ int idx, starting_slot;
+ starting_slot = tx->req;
+ while (cnt > 1) {
+ cnt--;
+ idx = (starting_slot + cnt) & tx->mask;
+ myri10ge_pio_copy(&tx->lanai[idx],
+ &src[cnt], sizeof(*src));
+ mb();
+ }
+}
+
+/*
+ * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
+ * at most 32 bytes at a time, so as to avoid involving the software
+ * pio handler in the nic. We re-write the first segment's flags
+ * to mark them valid only after writing the entire chain.
+ */
+
+static inline void
+myri10ge_submit_req(myri10ge_tx_buf_t *tx, mcp_kreq_ether_send_t *src,
+ int cnt)
+{
+ int idx, i;
+ uint32_t __iomem *dst_ints;
+ uint32_t *src_ints;
+ mcp_kreq_ether_send_t __iomem *dstp, *dst;
+ mcp_kreq_ether_send_t *srcp;
+ uint8_t last_flags;
+
+ idx = tx->req & tx->mask;
+
+ last_flags = src->flags;
+ src->flags = 0;
+ mb();
+ dst = dstp = &tx->lanai[idx];
+ srcp = src;
+
+ if ((idx + cnt) < tx->mask) {
+ for (i = 0; i < (cnt - 1); i += 2) {
+ myri10ge_pio_copy(dstp, srcp, 2 * sizeof(*src));
+ mb(); /* force write every 32 bytes */
+ srcp += 2;
+ dstp += 2;
+ }
+ } else {
+ /* submit all but the first request, and ensure
+ that it is submitted below */
+ myri10ge_submit_req_backwards(tx, src, cnt);
+ i = 0;
+ }
+ if (i < cnt) {
+ /* submit the first request */
+ myri10ge_pio_copy(dstp, srcp, sizeof(*src));
+ mb(); /* barrier before setting valid flag */
+ }
+
+ /* re-write the last 32-bits with the valid flags */
+ src->flags = last_flags;
+ src_ints = (uint32_t *) src;
+ src_ints += 3;
+ dst_ints = (uint32_t __iomem *) dst;
+ dst_ints += 3;
+ *(uint32_t __force *) dst_ints = *src_ints;
+ tx->req += cnt;
+ mb();
+}
+
+static inline void
+myri10ge_submit_req_wc(myri10ge_tx_buf_t *tx,
+ mcp_kreq_ether_send_t *src, int cnt)
+{
+ tx->req += cnt;
+ mb();
+ while (cnt >= 4) {
+ myri10ge_pio_copy((void __iomem *) tx->wc_fifo, src, 64);
+ mb();
+ src += 4;
+ cnt -= 4;
+ }
+ if (cnt > 0) {
+ /* pad it to 64 bytes. The src is 64 bytes bigger than it
+ * needs to be so that we don't overrun it */
+ myri10ge_pio_copy((void __iomem *) tx->wc_fifo + (cnt<<18), src, 64);
+ mb();
+ }
+}
+
+#ifdef NETIF_F_TSO
+static inline unsigned long
+myri10ge_tcpend(struct sk_buff *skb)
+{
+ struct iphdr *ip;
+ int iphlen, tcplen;
+ struct tcphdr *tcp;
+
+ ip = (struct iphdr *) ((char *) skb->data + 14);
+ iphlen = ip->ihl << 2;
+ tcp = (struct tcphdr *) ((char *) ip + iphlen);
+ tcplen = tcp->doff << 2;
+ return (tcplen + iphlen + 14);
+}
+#endif
+
+static inline void
+myri10ge_csum_fixup(struct sk_buff *skb, int cksum_offset,
+ int pseudo_hdr_offset)
+{
+ int csum;
+ uint16_t *csum_ptr;
+
+
+ csum = skb_checksum(skb, cksum_offset,
+ skb->len - cksum_offset, 0);
+ csum_ptr = (uint16_t *) (skb->h.raw + skb->csum);
+ if (!pskb_may_pull(skb, pseudo_hdr_offset)) {
+ printk(KERN_ERR "myri10ge: can't pull skb %d\n",
+ pseudo_hdr_offset);
+ return;
+ }
+ *csum_ptr = csum_fold(csum);
+ /* need to fixup IPv4 UDP packets according to RFC768 */
+ if (unlikely(*csum_ptr == 0 &&
+ skb->protocol == htons(ETH_P_IP) &&
+ skb->nh.iph->protocol == IPPROTO_UDP)) {
+ *csum_ptr = 0xffff;
+ }
+}
+
+/*
+ * Transmit a packet. We need to split the packet so that a single
+ * segment does not cross myri10ge->tx.boundary, so this makes segment
+ * counting tricky. So rather than try to count segments up front, we
+ * just give up if there are too few segments to hold a reasonably
+ * fragmented packet currently available. If we run
+ * out of segments while preparing a packet for DMA, we just linearize
+ * it and try again.
+ */
+
+static int
+myri10ge_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct myri10ge_priv *mgp = dev->priv;
+ mcp_kreq_ether_send_t *req;
+ myri10ge_tx_buf_t *tx = &mgp->tx;
+ struct skb_frag_struct *frag;
+ dma_addr_t bus;
+ uint32_t low, high_swapped;
+ unsigned int len;
+ int idx, last_idx, avail, frag_cnt, frag_idx, count, mss, max_segments;
+ uint16_t pseudo_hdr_offset, cksum_offset;
+ int cum_len, seglen, boundary, rdma_count;
+ uint8_t flags, odd_flag;
+
+again:
+ req = tx->req_list;
+ avail = tx->mask - 1 - (tx->req - tx->done);
+
+ mss = 0;
+ max_segments = MYRI10GE_MCP_ETHER_MAX_SEND_DESC;
+
+#ifdef NETIF_F_TSO
+ if (skb->len > (dev->mtu + ETH_HLEN)) {
+ mss = skb_shinfo(skb)->tso_size;
+ if (mss != 0)
+ max_segments = MYRI10GE_MCP_ETHER_MAX_SEND_DESC_TSO;
+ }
+#endif /*NETIF_F_TSO */
+
+ if ((unlikely(avail < max_segments))) {
+ /* we are out of transmit resources */
+ mgp->stop_queue++;
+ netif_stop_queue(dev);
+ return 1;
+ }
+
+ /* Setup checksum offloading, if needed */
+ cksum_offset = 0;
+ pseudo_hdr_offset = 0;
+ odd_flag = 0;
+ flags = (MYRI10GE_MCP_ETHER_FLAGS_NO_TSO |
+ MYRI10GE_MCP_ETHER_FLAGS_FIRST);
+ if (likely(skb->ip_summed == CHECKSUM_HW)) {
+ cksum_offset = (skb->h.raw - skb->data);
+ pseudo_hdr_offset = (skb->h.raw + skb->csum) - skb->data;
+ /* If the headers are excessively large, then we must
+ * fall back to a software checksum */
+ if (unlikely(cksum_offset > 255 ||
+ pseudo_hdr_offset > 127)) {
+ myri10ge_csum_fixup(skb, cksum_offset, pseudo_hdr_offset);
+ cksum_offset = 0;
+ pseudo_hdr_offset = 0;
+ } else {
+ pseudo_hdr_offset = htons(pseudo_hdr_offset);
+ odd_flag = MYRI10GE_MCP_ETHER_FLAGS_ALIGN_ODD;
+ flags |= MYRI10GE_MCP_ETHER_FLAGS_CKSUM;
+ }
+ }
+
+ cum_len = 0;
+
+#ifdef NETIF_F_TSO
+ if (mss) { /* TSO */
+ /* this removes any CKSUM flag from before */
+ flags = (MYRI10GE_MCP_ETHER_FLAGS_TSO_HDR |
+ MYRI10GE_MCP_ETHER_FLAGS_FIRST);
+
+ /* negative cum_len signifies to the
+ * send loop that we are still in the
+ * header portion of the TSO packet.
+ * TSO header must be at most 134 bytes long */
+ cum_len = -myri10ge_tcpend(skb);
+
+ /* for TSO, pseudo_hdr_offset holds mss.
+ * The firmware figures out where to put
+ * the checksum by parsing the header. */
+ pseudo_hdr_offset = htons(mss);
+ } else
+#endif /*NETIF_F_TSO */
+ /* Mark small packets, and pad out tiny packets */
+ if (skb->len <= MYRI10GE_MCP_ETHER_SEND_SMALL_SIZE) {
+ flags |= MYRI10GE_MCP_ETHER_FLAGS_SMALL;
+
+ /* pad frames to at least ETH_ZLEN bytes */
+ if (unlikely(skb->len < ETH_ZLEN)) {
+ skb = skb_padto(skb, ETH_ZLEN);
+ if (skb == NULL) {
+ /* The packet is gone, so we must
+ return 0 */
+ mgp->stats.tx_dropped += 1;
+ return 0;
+ }
+ /* adjust the len to account for the zero pad
+ so that the nic can know how long it is */
+ skb->len = ETH_ZLEN;
+ }
+ }
+
+ /* map the skb for DMA */
+ len = skb->len - skb->data_len;
+ idx = tx->req & tx->mask;
+ tx->info[idx].skb = skb;
+ bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE);
+ pci_unmap_addr_set(&tx->info[idx], bus, bus);
+ pci_unmap_len_set(&tx->info[idx], len, len);
+
+ frag_cnt = skb_shinfo(skb)->nr_frags;
+ frag_idx = 0;
+ count = 0;
+ rdma_count = 0;
+
+ /* "rdma_count" is the number of RDMAs belonging to the
+ * current packet BEFORE the current send request. For
+ * non-TSO packets, this is equal to "count".
+ * For TSO packets, rdma_count needs to be reset
+ * to 0 after a segment cut.
+ *
+ * The rdma_count field of the send request is
+ * the number of RDMAs of the packet starting at
+ * that request. For TSO send requests with one ore more cuts
+ * in the middle, this is the number of RDMAs starting
+ * after the last cut in the request. All previous
+ * segments before the last cut implicitly have 1 RDMA.
+ *
+ * Since the number of RDMAs is not known beforehand,
+ * it must be filled-in retroactively - after each
+ * segmentation cut or at the end of the entire packet.
+ */
+
+ while (1) {
+ /* Break the SKB or Fragment up into pieces which
+ do not cross mgp->tx.boundary */
+ low = MYRI10GE_LOWPART_TO_U32(bus);
+ high_swapped = htonl(MYRI10GE_HIGHPART_TO_U32(bus));
+ while (len) {
+ uint8_t flags_next;
+ int cum_len_next;
+
+ if (unlikely(count == max_segments))
+ goto abort_linearize;
+
+ boundary = (low + tx->boundary) & ~(tx->boundary - 1);
+ seglen = boundary - low;
+ if (seglen > len)
+ seglen = len;
+ flags_next = flags & ~MYRI10GE_MCP_ETHER_FLAGS_FIRST;
+ cum_len_next = cum_len + seglen;
+#ifdef NETIF_F_TSO
+ if (mss) { /* TSO */
+ (req-rdma_count)->rdma_count = rdma_count + 1;
+
+ if (likely(cum_len >= 0)) { /* payload */
+ int next_is_first, chop;
+
+ chop = (cum_len_next>mss);
+ cum_len_next = cum_len_next % mss;
+ next_is_first = (cum_len_next == 0);
+ flags |= chop *
+ MYRI10GE_MCP_ETHER_FLAGS_TSO_CHOP;
+ flags_next |= next_is_first *
+ MYRI10GE_MCP_ETHER_FLAGS_FIRST;
+ rdma_count |= -(chop | next_is_first);
+ rdma_count += chop & !next_is_first;
+ } else if (likely(cum_len_next >= 0)) { /* header ends */
+ int small;
+
+ rdma_count = -1;
+ cum_len_next = 0;
+ seglen = -cum_len;
+ small = (mss <= MYRI10GE_MCP_ETHER_SEND_SMALL_SIZE);
+ flags_next = MYRI10GE_MCP_ETHER_FLAGS_TSO_PLD |
+ MYRI10GE_MCP_ETHER_FLAGS_FIRST |
+ (small * MYRI10GE_MCP_ETHER_FLAGS_SMALL);
+ }
+ }
+#endif /* NETIF_F_TSO */
+ req->addr_high = high_swapped;
+ req->addr_low = htonl(low);
+ req->pseudo_hdr_offset = pseudo_hdr_offset;
+ req->pad = 0; /* complete solid 16-byte block; does this matter? */
+ req->rdma_count = 1;
+ req->length = htons(seglen);
+ req->cksum_offset = cksum_offset;
+ req->flags = flags | ((cum_len & 1) * odd_flag);
+
+ low += seglen;
+ len -= seglen;
+ cum_len = cum_len_next;
+ flags = flags_next;
+ req++;
+ count++;
+ rdma_count++;
+ if (unlikely(cksum_offset > seglen))
+ cksum_offset -= seglen;
+ else
+ cksum_offset = 0;
+ }
+ if (frag_idx == frag_cnt)
+ break;
+
+ /* map next fragment for DMA */
+ idx = (count + tx->req) & tx->mask;
+ frag = &skb_shinfo(skb)->frags[frag_idx];
+ frag_idx++;
+ len = frag->size;
+ bus = pci_map_page(mgp->pdev, frag->page, frag->page_offset,
+ len, PCI_DMA_TODEVICE);
+ pci_unmap_addr_set(&tx->info[idx], bus, bus);
+ pci_unmap_len_set(&tx->info[idx], len, len);
+ }
+
+ (req-rdma_count)->rdma_count = rdma_count;
+#ifdef NETIF_F_TSO
+ if (mss) {
+ do {
+ req--;
+ req->flags |= MYRI10GE_MCP_ETHER_FLAGS_TSO_LAST;
+ } while (!(req->flags & (MYRI10GE_MCP_ETHER_FLAGS_TSO_CHOP |
+ MYRI10GE_MCP_ETHER_FLAGS_FIRST)));
+ }
+#endif
+ idx = ((count - 1) + tx->req) & tx->mask;
+ tx->info[idx].last = 1;
+ if (tx->wc_fifo == NULL)
+ myri10ge_submit_req(tx, tx->req_list, count);
+ else
+ myri10ge_submit_req_wc(tx, tx->req_list, count);
+ tx->pkt_start++;
+ if ((avail - count) < MYRI10GE_MCP_ETHER_MAX_SEND_DESC) {
+ mgp->stop_queue++;
+ netif_stop_queue(dev);
+ }
+ dev->trans_start = jiffies;
+ return 0;
+
+
+abort_linearize:
+ /* Free any DMA resources we've alloced and clear out the skb
+ * slot so as to not trip up assertions, and to avoid a
+ * double-free if linearizing fails */
+
+ last_idx = (idx + 1) & tx->mask;
+ idx = tx->req & tx->mask;
+ tx->info[idx].skb = NULL;
+ do {
+ len = pci_unmap_len(&tx->info[idx], len);
+ if (len) {
+ if (tx->info[idx].skb != NULL) {
+ pci_unmap_single(mgp->pdev,
+ pci_unmap_addr(&tx->info[idx], bus),
+ len, PCI_DMA_TODEVICE);
+ } else {
+ pci_unmap_page(mgp->pdev,
+ pci_unmap_addr(&tx->info[idx], bus),
+ len, PCI_DMA_TODEVICE);
+ }
+ pci_unmap_len_set(&tx->info[idx], len, 0);
+ tx->info[idx].skb = NULL;
+ }
+ idx = (idx + 1) & tx->mask;
+ } while (idx != last_idx);
+ if (skb_shinfo(skb)->tso_size) {
+ printk(KERN_ERR "myri10ge: %s: TSO but wanted to linearize?!?!?\n",
+ mgp->dev->name);
+ goto drop;
+ }
+
+ if (skb_linearize(skb, GFP_ATOMIC)) {
+ goto drop;
+ }
+ mgp->tx_linearized++;
+ goto again;
+
+drop:
+ dev_kfree_skb_any(skb);
+ mgp->stats.tx_dropped += 1;
+ return 0;
+
+
+}
+
+static struct net_device_stats *
+myri10ge_get_stats(struct net_device *dev)
+{
+ struct myri10ge_priv *mgp = dev->priv;
+ return &mgp->stats;
+}
+
+static void
+myri10ge_set_multicast_list(struct net_device *dev)
+{
+ myri10ge_change_promisc(dev->priv, dev->flags & IFF_PROMISC);
+}
+
+
+static int
+myri10ge_set_mac_address (struct net_device *dev, void *addr)
+{
+ struct sockaddr *sa = (struct sockaddr *) addr;
+ struct myri10ge_priv *mgp = dev->priv;
+ int status;
+
+ if (!is_valid_ether_addr(sa->sa_data))
+ return -EADDRNOTAVAIL;
+
+ status = myri10ge_update_mac_address(mgp, sa->sa_data);
+ if (status != 0) {
+ printk(KERN_ERR "myri10ge: %s: changing mac address failed with %d\n",
+ dev->name, status);
+ return status;
+ }
+
+ /* change the dev structure */
+ memcpy(dev->dev_addr, sa->sa_data, 6);
+ return 0;
+}
+static int
+myri10ge_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ return -EOPNOTSUPP;
+}
+
+static int
+myri10ge_init(struct net_device *dev)
+{
+ return 0;
+}
+
+static int
+myri10ge_change_mtu(struct net_device *dev, int new_mtu)
+{
+ struct myri10ge_priv *mgp = dev->priv;
+ int error = 0;
+
+ if ((new_mtu < 68) || (ETH_HLEN + new_mtu > MYRI10GE_MAX_ETHER_MTU)) {
+ printk(KERN_ERR "myri10ge: %s: new mtu (%d) is not valid\n",
+ dev->name, new_mtu);
+ return -EINVAL;
+ }
+ printk("%s: changing mtu from %d to %d\n",
+ dev->name, dev->mtu, new_mtu);
+ if (mgp->running) {
+ /* if we change the mtu on an active device, we must
+ * reset the device so the firmware sees the change */
+ myri10ge_close(dev);
+ dev->mtu = new_mtu;
+ myri10ge_open(dev);
+ } else {
+ dev->mtu = new_mtu;
+ }
+ return error;
+}
+
+#if defined(CONFIG_X86) || defined(CONFIG_X86_64)
+
+/*
+ * Enable ECRC to align PCI-E Completion packets. Rather than using
+ * normal pci config space writes, we must map the Nvidia config space
+ * ourselves. This is because on opteron/nvidia class machine the
+ * 0xe000000 mapping is handled by the nvidia chipset, that means
+ * the internal PCI device (the on-chip northbridge), or the amd-8131
+ * bridge and things behind them are not visible by this method.
+ */
+
+static void
+myri10ge_enable_ecrc(struct myri10ge_priv *mgp)
+{
+ struct pci_dev *bridge = mgp->pdev->bus->self;
+ struct device * dev = &mgp->pdev->dev;
+ unsigned cap;
+ unsigned err_cap;
+ int ret;
+
+ if (!myri10ge_ecrc_enable || !bridge)
+ return;
+
+ cap = pci_find_ext_capability(bridge, PCI_EXT_CAP_ID_ERR);
+ /* nvidia ext cap is not always linked in ext cap chain */
+ if (!cap
+ && bridge->vendor == PCI_VENDOR_ID_NVIDIA
+ && bridge->device == PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_PCIE)
+ cap = 0x160;
+
+ if (!cap)
+ return;
+
+ ret = pci_read_config_dword(bridge, cap + PCI_ERR_CAP, &err_cap);
+ if (ret) {
+ dev_err(dev, "failed reading ext-conf-space of %s\n",
+ pci_name(bridge));
+ dev_err(dev, "\t pci=nommconf in use? "
+ "or buggy/incomplete/absent acpi MCFG attr?\n");
+ return;
+ }
+ if (!(err_cap & PCI_ERR_CAP_ECRC_GENC))
+ return;
+
+ err_cap |= PCI_ERR_CAP_ECRC_GENE;
+ pci_write_config_dword(bridge, cap + PCI_ERR_CAP, err_cap);
+ dev_info(dev,
+ "Enabled ECRC on upstream bridge %s\n",
+ pci_name(bridge));
+ mgp->tx.boundary = 4096;
+ mgp->fw_name = myri10ge_fw_aligned;
+}
+#endif /* defined(CONFIG_X86) || defined(CONFIG_X86_64) */
+
+/*
+ * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
+ * when the PCI-E Completion packets are aligned on an 8-byte
+ * boundary. Some PCI-E chip sets always align Completion packets; on
+ * the ones that do not, the alignment can be enforced by enabling
+ * ECRC generation (if supported).
+ *
+ * When PCI-E Completion packets are not aligned, it is actually more
+ * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
+ *
+ * If the driver can neither enable ECRC nor verify that it has
+ * already been enabled, then it must use a firmware image which works
+ * around unaligned completion packets (myri10ge_ethp_z8e.dat), and it
+ * should also ensure that it never gives the device a Read-DMA which is
+ * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is
+ * enabled, then the driver should use the aligned (myri10ge_eth_z8e.dat)
+ * firmware image, and set tx.boundary to 4KB.
+ */
+
+static void
+myri10ge_select_firmware(struct myri10ge_priv *mgp)
+{
+ struct pci_dev *bridge = mgp->pdev->bus->self;
+
+ mgp->tx.boundary = 2048;
+ mgp->fw_name = myri10ge_fw_unaligned;
+
+ if (myri10ge_force_firmware == 0) {
+#if defined(CONFIG_X86) || defined(CONFIG_X86_64)
+ myri10ge_enable_ecrc(mgp);
+#endif
+ /* Check to see if the upstream bridge is known to
+ * provide aligned completions */
+ if (bridge
+ /* ServerWorks HT2000/HT1000 */
+ && bridge->vendor == PCI_VENDOR_ID_SERVERWORKS
+ && bridge->device == PCI_DEVICE_ID_SERVERWORKS_HT2000_PCIE) {
+ dev_info(&mgp->pdev->dev,
+ "Assuming aligned completions (0x%x:0x%x)\n",
+ bridge->vendor, bridge->device);
+ mgp->tx.boundary = 4096;
+ mgp->fw_name = myri10ge_fw_aligned;
+ }
+ } else {
+ if (myri10ge_force_firmware == 1) {
+ dev_info(&mgp->pdev->dev,
+ "Assuming aligned completions (forced)\n");
+ mgp->tx.boundary = 4096;
+ mgp->fw_name = myri10ge_fw_aligned;
+ } else {
+ dev_info(&mgp->pdev->dev,
+ "Assuming unaligned completions (forced)\n");
+ mgp->tx.boundary = 2048;
+ mgp->fw_name = myri10ge_fw_unaligned;
+ }
+ }
+ if (myri10ge_fw_name != NULL) {
+ dev_info(&mgp->pdev->dev, "overriding firmware to %s\n",
+ myri10ge_fw_name);
+ mgp->fw_name = myri10ge_fw_name;
+ }
+}
+
+
+static void
+myri10ge_save_state(struct myri10ge_priv *mgp)
+{
+ struct pci_dev *pdev = mgp->pdev;
+ int cap;
+
+ pci_save_state(pdev);
+ /* now save PCIe and MSI state that Linux will not
+ save for us */
+ cap = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+ pci_read_config_dword(pdev, cap + PCI_EXP_DEVCTL, &mgp->devctl);
+ cap = pci_find_capability(pdev, PCI_CAP_ID_MSI);
+ pci_read_config_word(pdev, cap + PCI_MSI_FLAGS, &mgp->msi_flags);
+ pci_read_config_dword(pdev, cap + PCI_MSI_ADDRESS_LO,
+ &mgp->msi_addr_low);
+ pci_read_config_dword(pdev, cap + PCI_MSI_ADDRESS_HI,
+ &mgp->msi_addr_high);
+ pci_read_config_word(pdev, cap + PCI_MSI_DATA_32,
+ &mgp->msi_data_32);
+ pci_read_config_word(pdev, cap + PCI_MSI_DATA_64,
+ &mgp->msi_data_64);
+}
+
+static void
+myri10ge_restore_state(struct myri10ge_priv *mgp)
+{
+ struct pci_dev *pdev = mgp->pdev;
+ int cap;
+
+ pci_restore_state(pdev);
+ /* restore PCIe and MSI state that linux will not */
+ cap = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+ pci_write_config_dword(pdev, cap + PCI_CAP_ID_EXP, mgp->devctl);
+ cap = pci_find_capability(pdev, PCI_CAP_ID_MSI);
+ pci_write_config_word(pdev, cap + PCI_MSI_FLAGS, mgp->msi_flags);
+ pci_write_config_dword(pdev, cap + PCI_MSI_ADDRESS_LO,
+ mgp->msi_addr_low);
+ pci_write_config_dword(pdev, cap + PCI_MSI_ADDRESS_HI,
+ mgp->msi_addr_high);
+ pci_write_config_word(pdev, cap + PCI_MSI_DATA_32,
+ mgp->msi_data_32);
+ pci_write_config_word(pdev, cap + PCI_MSI_DATA_64,
+ mgp->msi_data_64);
+}
+
+#ifdef CONFIG_PM
+
+static int
+myri10ge_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+ struct myri10ge_priv *mgp;
+ struct net_device *netdev;
+
+ mgp = (struct myri10ge_priv *) pci_get_drvdata(pdev);
+ if (mgp == NULL)
+ return -EINVAL;
+ netdev = mgp->dev;
+
+ if (netif_running(netdev)) {
+ printk("myri10ge: closing %s\n", netdev->name);
+ myri10ge_close(netdev);
+ }
+ myri10ge_dummy_rdma(mgp, 0);
+ free_irq(pdev->irq, mgp);
+#ifdef CONFIG_PCI_MSI
+ if (mgp->msi_enabled)
+ pci_disable_msi(pdev);
+#endif
+ netif_device_detach(netdev);
+ myri10ge_save_state(mgp);
+ pci_disable_device(pdev);
+ pci_set_power_state(pdev, pci_choose_state(pdev, state));
+ return 0;
+}
+
+static int
+myri10ge_resume(struct pci_dev *pdev)
+{
+ struct myri10ge_priv *mgp;
+ struct net_device *netdev;
+ int status;
+
+ mgp = (struct myri10ge_priv *) pci_get_drvdata(pdev);
+ if (mgp == NULL)
+ return -EINVAL;
+ netdev = mgp->dev;
+ pci_set_power_state(pdev, 0); /* zeros conf space as a side effect */
+ udelay(5000); /* give card time to respond */
+ myri10ge_restore_state(mgp);
+ pci_enable_device(pdev);
+ pci_set_master(pdev);
+
+#ifdef CONFIG_PCI_MSI
+ if (myri10ge_use_msi(pdev)) {
+ status = pci_enable_msi(pdev);
+ if (status != 0) {
+ dev_err(&pdev->dev,
+ "Error %d setting up MSI; falling back to xPIC\n",
+ status);
+
+ } else {
+ mgp->msi_enabled = 1;
+ }
+ }
+#endif
+ if (myri10ge_napi) {
+ status = request_irq(pdev->irq, myri10ge_napi_intr, SA_SHIRQ,
+ netdev->name, mgp);
+ } else {
+
+ status = request_irq(pdev->irq, myri10ge_intr, SA_SHIRQ,
+ netdev->name, mgp);
+ }
+ if (status != 0) {
+ dev_err(&pdev->dev, "failed to allocate IRQ\n");
+ goto abort_with_msi;
+ }
+
+ myri10ge_reset(mgp);
+ myri10ge_dummy_rdma(mgp, mgp->tx.boundary != 4096);
+
+ /* Save configuration space to be restored if the
+ nic resets due to a parity error */
+ myri10ge_save_state(mgp);
+
+ netif_device_attach(netdev);
+ if (netif_running(netdev))
+ myri10ge_open(netdev);
+ return 0;
+
+abort_with_msi:
+#ifdef CONFIG_PCI_MSI
+ if (mgp->msi_enabled)
+ pci_disable_msi(pdev);
+#endif
+ return -EIO;
+
+}
+
+#endif /* CONFIG_PM */
+
+static uint32_t
+myri10ge_read_reboot(struct myri10ge_priv *mgp)
+{
+ struct pci_dev *pdev = mgp->pdev;
+ int vs = mgp->vendor_specific_offset;
+ uint32_t reboot;
+
+ /*enter read32 mode */
+ pci_write_config_byte(pdev, vs + 0x10, 0x3);
+
+ /*read REBOOT_STATUS (0xfffffff0) */
+ pci_write_config_dword(pdev, vs + 0x18, 0xfffffff0);
+ pci_read_config_dword(pdev, vs + 0x14, &reboot);
+ return reboot;
+}
+
+static void
+myri10ge_watchdog(void *arg)
+{
+ struct myri10ge_priv *mgp = arg;
+ uint32_t reboot;
+ int status;
+ uint16_t cmd, vendor;
+
+ mgp->watchdog_resets++;
+ pci_read_config_word(mgp->pdev, PCI_COMMAND, &cmd);
+ if ((cmd & PCI_COMMAND_MASTER) == 0) {
+ /* Bus master DMA disabled? Check to see
+ * if the card rebooted due to a parity error
+ * For now, just report it */
+ reboot = myri10ge_read_reboot(mgp);
+ printk(KERN_ERR "myri10ge: %s: NIC rebooted (0x%x), resetting\n",
+ mgp->dev->name, reboot);
+ /*
+ * A rebooted nic will come back with config space as
+ * it was after power was applied to PCIe bus.
+ * Attempt to restore config space which was saved
+ * when the driver was loaded, or the last time the
+ * nic was resumed from power saving mode.
+ */
+ myri10ge_restore_state(mgp);
+ } else {
+ /* if we get back -1's from our slot, perhaps somebody
+ powered off our card. Don't try to reset it in
+ this case */
+ if (cmd == 0xffff) {
+ pci_read_config_word(mgp->pdev, PCI_VENDOR_ID, &vendor);
+ if (vendor == 0xffff) {
+ printk(KERN_ERR "myri10ge: %s: device disappeared!\n",
+ mgp->dev->name);
+ return;
+ }
+ }
+ /* Perhaps it is a software error. Try to reset */
+
+ printk(KERN_ERR "myri10ge: %s: device timeout, resetting\n",
+ mgp->dev->name);
+ printk("myri10ge: %s: %d %d %d %d %d\n", mgp->dev->name,
+ mgp->tx.req, mgp->tx.done, mgp->tx.pkt_start,
+ mgp->tx.pkt_done,
+ (int)ntohl(mgp->fw_stats->send_done_count));
+ set_current_state (TASK_UNINTERRUPTIBLE);
+ schedule_timeout(HZ*2);
+ set_current_state(TASK_RUNNING);
+ printk("myri10ge: %s: %d %d %d %d %d\n", mgp->dev->name,
+ mgp->tx.req, mgp->tx.done, mgp->tx.pkt_start,
+ mgp->tx.pkt_done,
+ (int)ntohl(mgp->fw_stats->send_done_count));
+ }
+ myri10ge_close(mgp->dev);
+ status = myri10ge_load_firmware(mgp);
+ if (status != 0) {
+ printk(KERN_ERR "myri10ge: %s: failed to load firmware\n",
+ mgp->dev->name);
+ return;
+ }
+ myri10ge_open(mgp->dev);
+}
+
+static void
+myri10ge_watchdog_timer(unsigned long arg)
+{
+ struct myri10ge_priv *mgp;
+
+ mgp = (struct myri10ge_priv *) arg;
+ if (mgp->tx.req != mgp->tx.done &&
+ mgp->tx.done == mgp->watchdog_tx_done) {
+ /* nic seems like it might be stuck.. */
+ schedule_work(&mgp->watchdog_work);
+ } else {
+ /* rearm timer */
+ mod_timer(&mgp->watchdog_timer,
+ jiffies + myri10ge_watchdog_timeout * HZ);
+ }
+ mgp->watchdog_tx_done = mgp->tx.done;
+}
+
+static int
+myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ struct net_device *netdev;
+ struct myri10ge_priv *mgp;
+ struct device *dev = &pdev->dev;
+ size_t bytes;
+ int i;
+ int status = -ENXIO;
+ int cap;
+ u16 val;
+
+ netdev = alloc_etherdev(sizeof(*mgp));
+ if (netdev == NULL) {
+ dev_err(dev, "Could not allocate ethernet device\n");
+ return -ENOMEM;
+ }
+
+ mgp = netdev_priv(netdev);
+ memset(mgp, 0, sizeof (*mgp));
+ mgp->dev = netdev;
+ mgp->pdev = pdev;
+ mgp->csum_flag = MYRI10GE_MCP_ETHER_FLAGS_CKSUM;
+ mgp->pause = myri10ge_flow_control;
+ mgp->intr_coal_delay = myri10ge_intr_coal_delay;
+
+ spin_lock_init(&mgp->cmd_lock);
+ if (pci_enable_device(pdev)) {
+ dev_err(&pdev->dev, "pci_enable_device call failed\n");
+ status = -ENODEV;
+ goto abort_with_netdev;
+ }
+ myri10ge_select_firmware(mgp);
+
+ /* Find the vendor-specific cap so we can check
+ the reboot register later on */
+ mgp->vendor_specific_offset
+ = pci_find_capability(pdev, PCI_CAP_ID_VNDR);
+
+ /* Set our max read request to 4KB */
+ cap = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+ if (cap < 64) {
+ dev_err(&pdev->dev,"Bad PCI_CAP_ID_EXP location %d\n", cap);
+ goto abort_with_netdev;
+ }
+ status = pci_read_config_word(pdev, cap + PCI_EXP_DEVCTL, &val);
+ if (status != 0) {
+ dev_err(&pdev->dev, "Error %d reading PCI_EXP_DEVCTL\n", status);
+ goto abort_with_netdev;
+ }
+ val = (val & ~PCI_EXP_DEVCTL_READRQ) | (5 << 12);
+ status = pci_write_config_word(pdev, cap + PCI_EXP_DEVCTL, val);
+ if (status != 0) {
+ dev_err(&pdev->dev, "Error %d writing PCI_EXP_DEVCTL\n", status);
+ goto abort_with_netdev;
+ }
+
+ pci_set_master(pdev);
+ status = pci_set_dma_mask(pdev, (dma_addr_t)~0ULL);
+ if (status != 0) {
+ dev_err(&pdev->dev, "64-bit pci address mask was refused, trying 32-bit");
+ status = pci_set_dma_mask(pdev, (dma_addr_t)0xffffffffULL);
+ }
+ if (status != 0) {
+ dev_err(&pdev->dev, "Error %d setting DMA mask\n", status);
+ goto abort_with_netdev;
+ }
+ mgp->cmd = (mcp_cmd_response_t *)
+ pci_alloc_consistent(pdev, sizeof (*mgp->cmd), &mgp->cmd_bus);
+ if (mgp->cmd == NULL) {
+ goto abort_with_netdev;
+ }
+
+ mgp->fw_stats = (mcp_irq_data_t *)
+ pci_alloc_consistent(pdev, sizeof (*mgp->fw_stats),
+ &mgp->fw_stats_bus);
+ if (mgp->fw_stats == NULL) {
+ goto abort_with_cmd;
+ }
+
+ strcpy(netdev->name, "eth%d");
+ mgp->board_span = pci_resource_len(pdev, 0);
+ mgp->iomem_base = pci_resource_start(pdev, 0);
+ mgp->mtrr = -1;
+#ifdef CONFIG_MTRR
+ mgp->mtrr = mtrr_add(mgp->iomem_base, mgp->board_span,
+ MTRR_TYPE_WRCOMB, 1);
+#endif
+ /* Hack. need to get rid of these magic numbers */
+ mgp->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
+ if (mgp->sram_size > mgp->board_span) {
+ dev_err(&pdev->dev, "board span %ld bytes too small\n",
+ mgp->board_span);
+ goto abort_with_wc;
+ }
+ mgp->sram = ioremap(mgp->iomem_base, mgp->board_span);
+ if (mgp->sram == NULL) {
+ dev_err(&pdev->dev, "ioremap failed for %ld bytes at 0x%lx\n",
+ mgp->board_span, mgp->iomem_base);
+ status = -ENXIO;
+ goto abort_with_wc;
+ }
+ memcpy_fromio(mgp->eeprom_strings,
+ mgp->sram + mgp->sram_size - MYRI10GE_EEPROM_STRINGS_SIZE,
+ MYRI10GE_EEPROM_STRINGS_SIZE);
+ memset(mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE - 2, 0, 2);
+ status = myri10ge_read_mac_addr(mgp);
+ if (status) {
+ goto abort_with_ioremap;
+ }
+ for (i = 0; i < 6; i++) {
+ netdev->dev_addr[i] = mgp->mac_addr[i];
+ }
+ /* allocate rx done ring */
+ bytes = myri10ge_max_intr_slots * sizeof (*mgp->rx_done.entry);
+ mgp->rx_done.entry = (mcp_slot_t *)
+ pci_alloc_consistent(pdev, bytes, &mgp->rx_done.bus);
+ if (mgp->rx_done.entry == NULL)
+ goto abort_with_ioremap;
+ memset(mgp->rx_done.entry, 0, bytes);
+
+ status = myri10ge_load_firmware(mgp);
+ if (status != 0) {
+ dev_err(&pdev->dev, "failed to load firmware\n");
+ goto abort_with_rx_done;
+ }
+
+ status = myri10ge_reset(mgp);
+ if (status != 0) {
+ dev_err(&pdev->dev, "failed reset\n");
+ goto abort_with_firmware;
+ }
+
+#ifdef CONFIG_PCI_MSI
+ if (myri10ge_use_msi(pdev)) {
+ status = pci_enable_msi(pdev);
+ if (status != 0) {
+ dev_err(&pdev->dev,
+ "Error %d setting up MSI; falling back to xPIC\n",
+ status);
+ } else {
+ mgp->msi_enabled = 1;
+ }
+ }
+#endif
+
+ if (myri10ge_napi) {
+ status = request_irq(pdev->irq, myri10ge_napi_intr, SA_SHIRQ,
+ netdev->name, mgp);
+ } else {
+ status = request_irq(pdev->irq, myri10ge_intr, SA_SHIRQ,
+ netdev->name, mgp);
+ }
+ if (status != 0) {
+ dev_err(&pdev->dev, "failed to allocate IRQ\n");
+ goto abort_with_firmware;
+ }
+
+ pci_set_drvdata(pdev, mgp);
+ if ((myri10ge_initial_mtu + ETH_HLEN) > MYRI10GE_MAX_ETHER_MTU)
+ myri10ge_initial_mtu = MYRI10GE_MAX_ETHER_MTU - ETH_HLEN;
+ if ((myri10ge_initial_mtu + ETH_HLEN) < 68)
+ myri10ge_initial_mtu = 68;
+ netdev->mtu = myri10ge_initial_mtu;
+ netdev->open = myri10ge_open;
+ netdev->stop = myri10ge_close;
+ netdev->hard_start_xmit = myri10ge_xmit;
+ netdev->get_stats = myri10ge_get_stats;
+ netdev->base_addr = mgp->iomem_base;
+ netdev->irq = pdev->irq;
+ netdev->init = myri10ge_init;
+ netdev->change_mtu = myri10ge_change_mtu;
+ netdev->set_multicast_list = myri10ge_set_multicast_list;
+ netdev->set_mac_address = myri10ge_set_mac_address;
+ netdev->do_ioctl = myri10ge_ioctl;
+ netdev->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA;
+#if 0
+ /* TSO can be enabled via ethtool -K eth1 tso on */
+#ifdef NETIF_F_TSO
+ netdev->features |= NETIF_F_TSO;
+#endif
+#endif
+ if (myri10ge_napi) {
+ netdev->poll = myri10ge_poll;
+ netdev->weight = myri10ge_napi_weight;
+ }
+
+ /* Save configuration space to be restored if the
+ * nic resets due to a parity error */
+ myri10ge_save_state(mgp);
+
+ /* Setup the watchdog timer */
+ init_timer(&mgp->watchdog_timer);
+ mgp->watchdog_timer.data = (unsigned long)mgp;
+ mgp->watchdog_timer.function = myri10ge_watchdog_timer;
+
+ SET_ETHTOOL_OPS(netdev, &myri10ge_ethtool_ops);
+ INIT_WORK(&mgp->watchdog_work, myri10ge_watchdog, mgp);
+ status = register_netdev(netdev);
+ if (status != 0) {
+ dev_err(&pdev->dev, "register_netdev failed: %d\n", status);
+ goto abort_with_irq;
+ }
+
+ printk("myri10ge: %s: %s IRQ %d, tx bndry %d, fw %s, WC %s\n",
+ netdev->name, (mgp->msi_enabled ? "MSI" : "xPIC"),
+ pdev->irq, mgp->tx.boundary, mgp->fw_name,
+ (mgp->mtrr >= 0 ? "Enabled" : "Disabled"));
+
+ return 0;
+
+abort_with_irq:
+ free_irq(pdev->irq, mgp);
+#ifdef CONFIG_PCI_MSI
+ if (mgp->msi_enabled)
+ pci_disable_msi(pdev);
+#endif
+
+abort_with_firmware:
+ myri10ge_dummy_rdma(mgp, 0);
+
+abort_with_rx_done:
+ bytes = myri10ge_max_intr_slots * sizeof (*mgp->rx_done.entry);
+ pci_free_consistent(pdev, bytes, mgp->rx_done.entry, mgp->rx_done.bus);
+
+abort_with_ioremap:
+ iounmap((void __iomem *) mgp->sram);
+
+abort_with_wc:
+#ifdef CONFIG_MTRR
+ if (mgp->mtrr >= 0)
+ mtrr_del(mgp->mtrr, mgp->iomem_base, mgp->board_span);
+#endif
+ pci_free_consistent(pdev, sizeof (*mgp->fw_stats),
+ mgp->fw_stats, mgp->fw_stats_bus);
+
+abort_with_cmd:
+ pci_free_consistent(pdev, sizeof (*mgp->cmd), mgp->cmd, mgp->cmd_bus);
+
+abort_with_netdev:
+
+ free_netdev(netdev);
+ return status;
+}
+
+/****************************************************************
+ * myri10ge_remove
+ *
+ * Does what is necessary to shutdown one Myrinet device. Called
+ * once for each Myrinet card by the kernel when a module is
+ * unloaded.
+ ****************************************************************/
+
+static void
+myri10ge_remove(struct pci_dev *pdev)
+{
+ struct myri10ge_priv *mgp;
+ struct net_device *netdev;
+ size_t bytes;
+
+ mgp = (struct myri10ge_priv *) pci_get_drvdata(pdev);
+ if (mgp == NULL)
+ return;
+
+ flush_scheduled_work();
+ netdev = mgp->dev;
+ unregister_netdev(netdev);
+ free_irq(pdev->irq, mgp);
+#ifdef CONFIG_PCI_MSI
+ if (mgp->msi_enabled)
+ pci_disable_msi(pdev);
+#endif
+
+ myri10ge_dummy_rdma(mgp, 0);
+
+
+ bytes = myri10ge_max_intr_slots * sizeof (*mgp->rx_done.entry);
+ pci_free_consistent(pdev, bytes, mgp->rx_done.entry, mgp->rx_done.bus);
+
+ iounmap((void __iomem *) mgp->sram);
+
+#ifdef CONFIG_MTRR
+ if (mgp->mtrr >= 0)
+ mtrr_del(mgp->mtrr, mgp->iomem_base, mgp->board_span);
+#endif
+ pci_free_consistent(pdev, sizeof (*mgp->fw_stats),
+ mgp->fw_stats, mgp->fw_stats_bus);
+
+ pci_free_consistent(pdev, sizeof (*mgp->cmd), mgp->cmd, mgp->cmd_bus);
+
+ free_netdev(netdev);
+ pci_set_drvdata(pdev, NULL);
+}
+
+
+#define MYRI10GE_PCI_VENDOR_MYRICOM 0x14c1
+#define MYRI10GE_PCI_DEVICE_Z8E 0x0008
+static struct pci_device_id myri10ge_pci_tbl[] = {
+ {MYRI10GE_PCI_VENDOR_MYRICOM, MYRI10GE_PCI_DEVICE_Z8E,
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+ {0,},
+};
+
+static struct pci_driver myri10ge_driver = {
+ .name = "myri10ge",
+ .probe = myri10ge_probe,
+ .remove = myri10ge_remove,
+ .id_table = myri10ge_pci_tbl,
+#ifdef CONFIG_PM
+ .suspend = myri10ge_suspend,
+ .resume = myri10ge_resume,
+#endif
+};
+
+static int
+myri10ge_init_module(void)
+{
+ int rc;
+ printk("%s: Version %s\n", myri10ge_driver.name,
+ MYRI10GE_VERSION_STR);
+ rc = pci_register_driver(&myri10ge_driver);
+ return rc < 0 ? rc : 0;
+}
+
+static void
+myri10ge_cleanup_module(void)
+{
+ pci_unregister_driver(&myri10ge_driver);
+}
+
+module_init(myri10ge_init_module);
+module_exit(myri10ge_cleanup_module);
+


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/