[PATCH][v5][9/24] Add Mellanox HCA low-level driver
From: Roland Dreier
Date: Tue Dec 28 2004 - 01:31:59 EST
Add a low-level driver for Mellanox MT23108 and MT25208 HCAs. The
MT25208 is only fully supported when in MT23108 compatibility mode;
only the very beginnings of support for native MT25208 mode (required
for HCAs without local memory) is present.
(As a side note, I believe this driver would be the first in-tree
consumer of the PCI MSI/MSI-X API)
Signed-off-by: Roland Dreier <roland@xxxxxxxxxxx>
--- linux-bk.orig/drivers/infiniband/Kconfig 2004-12-27 21:48:18.185289416 -0800
+++ linux-bk/drivers/infiniband/Kconfig 2004-12-27 21:48:21.258837002 -0800
@@ -7,4 +7,6 @@
any protocols you wish to use as well as drivers for your
InfiniBand hardware.
+source "drivers/infiniband/hw/mthca/Kconfig"
+
endmenu
--- linux-bk.orig/drivers/infiniband/Makefile 2004-12-27 21:48:18.216284854 -0800
+++ linux-bk/drivers/infiniband/Makefile 2004-12-27 21:48:21.219842741 -0800
@@ -1 +1,2 @@
obj-$(CONFIG_INFINIBAND) += core/
+obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-bk/drivers/infiniband/hw/mthca/Kconfig 2004-12-27 21:48:21.318828171 -0800
@@ -0,0 +1,26 @@
+config INFINIBAND_MTHCA
+ tristate "Mellanox HCA support"
+ depends on PCI && INFINIBAND
+ ---help---
+ This is a low-level driver for Mellanox InfiniHost host
+ channel adapters (HCAs), including the MT23108 PCI-X HCA
+ ("Tavor") and the MT25208 PCI Express HCA ("Arbel").
+
+config INFINIBAND_MTHCA_DEBUG
+ bool "Verbose debugging output"
+ depends on INFINIBAND_MTHCA
+ default n
+ ---help---
+ This option causes the mthca driver produce a bunch of debug
+ messages. Select this is you are developing the driver or
+ trying to diagnose a problem.
+
+config INFINIBAND_MTHCA_SSE_DOORBELL
+ bool "SSE doorbell code"
+ depends on INFINIBAND_MTHCA && X86 && !X86_64
+ default n
+ ---help---
+ This option will have the mthca driver use SSE instructions
+ to ring hardware doorbell registers. This may improve
+ performance for some workloads, but the driver will not run
+ on processors without SSE instructions.
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-bk/drivers/infiniband/hw/mthca/Makefile 2004-12-27 21:48:21.366821107 -0800
@@ -0,0 +1,12 @@
+EXTRA_CFLAGS += -Idrivers/infiniband/include
+
+ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
+EXTRA_CFLAGS += -DDEBUG
+endif
+
+obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o
+
+ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \
+ mthca_allocator.o mthca_eq.o mthca_pd.o mthca_cq.o \
+ mthca_mr.o mthca_qp.o mthca_av.o mthca_mcg.o mthca_mad.o \
+ mthca_provider.o
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_allocator.c 2004-12-27 21:48:21.428811982 -0800
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_allocator.c 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/bitmap.h>
+
+#include "mthca_dev.h"
+
+/* Trivial bitmap-based allocator */
+u32 mthca_alloc(struct mthca_alloc *alloc)
+{
+ u32 obj;
+
+ spin_lock(&alloc->lock);
+ obj = find_next_zero_bit(alloc->table, alloc->max, alloc->last);
+ if (obj >= alloc->max) {
+ alloc->top = (alloc->top + alloc->max) & alloc->mask;
+ obj = find_first_zero_bit(alloc->table, alloc->max);
+ }
+
+ if (obj < alloc->max) {
+ set_bit(obj, alloc->table);
+ obj |= alloc->top;
+ } else
+ obj = -1;
+
+ spin_unlock(&alloc->lock);
+
+ return obj;
+}
+
+void mthca_free(struct mthca_alloc *alloc, u32 obj)
+{
+ obj &= alloc->max - 1;
+ spin_lock(&alloc->lock);
+ clear_bit(obj, alloc->table);
+ alloc->last = min(alloc->last, obj);
+ alloc->top = (alloc->top + alloc->max) & alloc->mask;
+ spin_unlock(&alloc->lock);
+}
+
+int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask,
+ u32 reserved)
+{
+ int i;
+
+ /* num must be a power of 2 */
+ if (num != 1 << (ffs(num) - 1))
+ return -EINVAL;
+
+ alloc->last = 0;
+ alloc->top = 0;
+ alloc->max = num;
+ alloc->mask = mask;
+ spin_lock_init(&alloc->lock);
+ alloc->table = kmalloc(BITS_TO_LONGS(num) * sizeof (long),
+ GFP_KERNEL);
+ if (!alloc->table)
+ return -ENOMEM;
+
+ bitmap_zero(alloc->table, num);
+ for (i = 0; i < reserved; ++i)
+ set_bit(i, alloc->table);
+
+ return 0;
+}
+
+void mthca_alloc_cleanup(struct mthca_alloc *alloc)
+{
+ kfree(alloc->table);
+}
+
+/*
+ * Array of pointers with lazy allocation of leaf pages. Callers of
+ * _get, _set and _clear methods must use a lock or otherwise
+ * serialize access to the array.
+ */
+
+void *mthca_array_get(struct mthca_array *array, int index)
+{
+ int p = (index * sizeof (void *)) >> PAGE_SHIFT;
+
+ if (array->page_list[p].page) {
+ int i = index & (PAGE_SIZE / sizeof (void *) - 1);
+ return array->page_list[p].page[i];
+ } else
+ return NULL;
+}
+
+int mthca_array_set(struct mthca_array *array, int index, void *value)
+{
+ int p = (index * sizeof (void *)) >> PAGE_SHIFT;
+
+ /* Allocate with GFP_ATOMIC because we'll be called with locks held. */
+ if (!array->page_list[p].page)
+ array->page_list[p].page = (void **) get_zeroed_page(GFP_ATOMIC);
+
+ if (!array->page_list[p].page)
+ return -ENOMEM;
+
+ array->page_list[p].page[index & (PAGE_SIZE / sizeof (void *) - 1)] =
+ value;
+ ++array->page_list[p].used;
+
+ return 0;
+}
+
+void mthca_array_clear(struct mthca_array *array, int index)
+{
+ int p = (index * sizeof (void *)) >> PAGE_SHIFT;
+
+ if (--array->page_list[p].used == 0) {
+ free_page((unsigned long) array->page_list[p].page);
+ array->page_list[p].page = NULL;
+ }
+
+ if (array->page_list[p].used < 0)
+ pr_debug("Array %p index %d page %d with ref count %d < 0\n",
+ array, index, p, array->page_list[p].used);
+}
+
+int mthca_array_init(struct mthca_array *array, int nent)
+{
+ int npage = (nent * sizeof (void *) + PAGE_SIZE - 1) / PAGE_SIZE;
+ int i;
+
+ array->page_list = kmalloc(npage * sizeof *array->page_list, GFP_KERNEL);
+ if (!array->page_list)
+ return -ENOMEM;
+
+ for (i = 0; i < npage; ++i) {
+ array->page_list[i].page = NULL;
+ array->page_list[i].used = 0;
+ }
+
+ return 0;
+}
+
+void mthca_array_cleanup(struct mthca_array *array, int nent)
+{
+ int i;
+
+ for (i = 0; i < (nent * sizeof (void *) + PAGE_SIZE - 1) / PAGE_SIZE; ++i)
+ free_page((unsigned long) array->page_list[i].page);
+
+ kfree(array->page_list);
+}
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_config_reg.h 2004-12-27 21:48:21.473805359 -0800
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_config_reg.h 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#ifndef MTHCA_CONFIG_REG_H
+#define MTHCA_CONFIG_REG_H
+
+#include <asm/page.h>
+
+#define MTHCA_HCR_BASE 0x80680
+#define MTHCA_HCR_SIZE 0x0001c
+#define MTHCA_ECR_BASE 0x80700
+#define MTHCA_ECR_SIZE 0x00008
+#define MTHCA_ECR_CLR_BASE 0x80708
+#define MTHCA_ECR_CLR_SIZE 0x00008
+#define MTHCA_ECR_OFFSET (MTHCA_ECR_BASE - MTHCA_HCR_BASE)
+#define MTHCA_ECR_CLR_OFFSET (MTHCA_ECR_CLR_BASE - MTHCA_HCR_BASE)
+#define MTHCA_CLR_INT_BASE 0xf00d8
+#define MTHCA_CLR_INT_SIZE 0x00008
+
+#define MTHCA_MAP_HCR_SIZE (MTHCA_ECR_CLR_BASE + \
+ MTHCA_ECR_CLR_SIZE - \
+ MTHCA_HCR_BASE)
+
+#endif /* MTHCA_CONFIG_REG_H */
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_dev.h 2004-12-27 21:48:21.522798147 -0800
@@ -0,0 +1,391 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_dev.h 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#ifndef MTHCA_DEV_H
+#define MTHCA_DEV_H
+
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <asm/semaphore.h>
+#include <asm/scatterlist.h>
+
+#include "mthca_provider.h"
+#include "mthca_doorbell.h"
+
+#define DRV_NAME "ib_mthca"
+#define PFX DRV_NAME ": "
+#define DRV_VERSION "0.06-pre"
+#define DRV_RELDATE "November 8, 2004"
+
+/* Types of supported HCA */
+enum {
+ TAVOR, /* MT23108 */
+ ARBEL_COMPAT, /* MT25208 in Tavor compat mode */
+ ARBEL_NATIVE /* MT25208 with extended features */
+};
+
+enum {
+ MTHCA_FLAG_DDR_HIDDEN = 1 << 1,
+ MTHCA_FLAG_SRQ = 1 << 2,
+ MTHCA_FLAG_MSI = 1 << 3,
+ MTHCA_FLAG_MSI_X = 1 << 4,
+ MTHCA_FLAG_NO_LAM = 1 << 5
+};
+
+enum {
+ MTHCA_KAR_PAGE = 1,
+ MTHCA_MAX_PORTS = 2
+};
+
+enum {
+ MTHCA_MPT_ENTRY_SIZE = 0x40,
+ MTHCA_EQ_CONTEXT_SIZE = 0x40,
+ MTHCA_CQ_CONTEXT_SIZE = 0x40,
+ MTHCA_QP_CONTEXT_SIZE = 0x200,
+ MTHCA_AV_SIZE = 0x20,
+ MTHCA_MGM_ENTRY_SIZE = 0x40
+};
+
+enum {
+ MTHCA_EQ_CMD,
+ MTHCA_EQ_ASYNC,
+ MTHCA_EQ_COMP,
+ MTHCA_NUM_EQ
+};
+
+struct mthca_cmd {
+ int use_events;
+ struct semaphore hcr_sem;
+ struct semaphore poll_sem;
+ struct semaphore event_sem;
+ int max_cmds;
+ spinlock_t context_lock;
+ int free_head;
+ struct mthca_cmd_context *context;
+ u16 token_mask;
+};
+
+struct mthca_limits {
+ int num_ports;
+ int vl_cap;
+ int mtu_cap;
+ int gid_table_len;
+ int pkey_table_len;
+ int local_ca_ack_delay;
+ int max_sg;
+ int num_qps;
+ int reserved_qps;
+ int num_srqs;
+ int reserved_srqs;
+ int num_eecs;
+ int reserved_eecs;
+ int num_cqs;
+ int reserved_cqs;
+ int num_eqs;
+ int reserved_eqs;
+ int num_mpts;
+ int num_mtt_segs;
+ int mtt_seg_size;
+ int reserved_mtts;
+ int reserved_mrws;
+ int num_rdbs;
+ int reserved_uars;
+ int num_mgms;
+ int num_amgms;
+ int reserved_mcgs;
+ int num_pds;
+ int reserved_pds;
+};
+
+struct mthca_alloc {
+ u32 last;
+ u32 top;
+ u32 max;
+ u32 mask;
+ spinlock_t lock;
+ unsigned long *table;
+};
+
+struct mthca_array {
+ struct {
+ void **page;
+ int used;
+ } *page_list;
+};
+
+struct mthca_pd_table {
+ struct mthca_alloc alloc;
+};
+
+struct mthca_mr_table {
+ struct mthca_alloc mpt_alloc;
+ int max_mtt_order;
+ unsigned long **mtt_buddy;
+ u64 mtt_base;
+};
+
+struct mthca_eq_table {
+ struct mthca_alloc alloc;
+ void __iomem *clr_int;
+ u32 clr_mask;
+ struct mthca_eq eq[MTHCA_NUM_EQ];
+ int have_irq;
+ u8 inta_pin;
+};
+
+struct mthca_cq_table {
+ struct mthca_alloc alloc;
+ spinlock_t lock;
+ struct mthca_array cq;
+};
+
+struct mthca_qp_table {
+ struct mthca_alloc alloc;
+ int sqp_start;
+ spinlock_t lock;
+ struct mthca_array qp;
+};
+
+struct mthca_av_table {
+ struct pci_pool *pool;
+ int num_ddr_avs;
+ u64 ddr_av_base;
+ void __iomem *av_map;
+ struct mthca_alloc alloc;
+};
+
+struct mthca_mcg_table {
+ struct semaphore sem;
+ struct mthca_alloc alloc;
+};
+
+struct mthca_dev {
+ struct ib_device ib_dev;
+ struct pci_dev *pdev;
+
+ int hca_type;
+ unsigned long mthca_flags;
+
+ u32 rev_id;
+
+ /* firmware info */
+ u64 fw_ver;
+ union {
+ struct {
+ u64 fw_start;
+ u64 fw_end;
+ } tavor;
+ struct {
+ u64 clr_int_base;
+ u64 eq_arm_base;
+ u64 eq_set_ci_base;
+ struct scatterlist *mem;
+ u16 fw_pages;
+ } arbel;
+ } fw;
+
+ u64 ddr_start;
+ u64 ddr_end;
+
+ MTHCA_DECLARE_DOORBELL_LOCK(doorbell_lock)
+
+ void __iomem *hcr;
+ void __iomem *clr_base;
+ void __iomem *kar;
+
+ struct mthca_cmd cmd;
+ struct mthca_limits limits;
+
+ struct mthca_pd_table pd_table;
+ struct mthca_mr_table mr_table;
+ struct mthca_eq_table eq_table;
+ struct mthca_cq_table cq_table;
+ struct mthca_qp_table qp_table;
+ struct mthca_av_table av_table;
+ struct mthca_mcg_table mcg_table;
+
+ struct mthca_pd driver_pd;
+ struct mthca_mr driver_mr;
+
+ struct ib_mad_agent *send_agent[MTHCA_MAX_PORTS][2];
+ struct ib_ah *sm_ah[MTHCA_MAX_PORTS];
+ spinlock_t sm_lock;
+};
+
+#define mthca_dbg(mdev, format, arg...) \
+ dev_dbg(&mdev->pdev->dev, format, ## arg)
+#define mthca_err(mdev, format, arg...) \
+ dev_err(&mdev->pdev->dev, format, ## arg)
+#define mthca_info(mdev, format, arg...) \
+ dev_info(&mdev->pdev->dev, format, ## arg)
+#define mthca_warn(mdev, format, arg...) \
+ dev_warn(&mdev->pdev->dev, format, ## arg)
+
+extern void __buggy_use_of_MTHCA_GET(void);
+extern void __buggy_use_of_MTHCA_PUT(void);
+
+#define MTHCA_GET(dest, source, offset) \
+ do { \
+ void *__p = (char *) (source) + (offset); \
+ switch (sizeof (dest)) { \
+ case 1: (dest) = *(u8 *) __p; break; \
+ case 2: (dest) = be16_to_cpup(__p); break; \
+ case 4: (dest) = be32_to_cpup(__p); break; \
+ case 8: (dest) = be64_to_cpup(__p); break; \
+ default: __buggy_use_of_MTHCA_GET(); \
+ } \
+ } while (0)
+
+#define MTHCA_PUT(dest, source, offset) \
+ do { \
+ __typeof__(source) *__p = \
+ (__typeof__(source) *) ((char *) (dest) + (offset)); \
+ switch (sizeof(source)) { \
+ case 1: *__p = (source); break; \
+ case 2: *__p = cpu_to_be16(source); break; \
+ case 4: *__p = cpu_to_be32(source); break; \
+ case 8: *__p = cpu_to_be64(source); break; \
+ default: __buggy_use_of_MTHCA_PUT(); \
+ } \
+ } while (0)
+
+int mthca_reset(struct mthca_dev *mdev);
+
+u32 mthca_alloc(struct mthca_alloc *alloc);
+void mthca_free(struct mthca_alloc *alloc, u32 obj);
+int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask,
+ u32 reserved);
+void mthca_alloc_cleanup(struct mthca_alloc *alloc);
+void *mthca_array_get(struct mthca_array *array, int index);
+int mthca_array_set(struct mthca_array *array, int index, void *value);
+void mthca_array_clear(struct mthca_array *array, int index);
+int mthca_array_init(struct mthca_array *array, int nent);
+void mthca_array_cleanup(struct mthca_array *array, int nent);
+
+int mthca_init_pd_table(struct mthca_dev *dev);
+int mthca_init_mr_table(struct mthca_dev *dev);
+int mthca_init_eq_table(struct mthca_dev *dev);
+int mthca_init_cq_table(struct mthca_dev *dev);
+int mthca_init_qp_table(struct mthca_dev *dev);
+int mthca_init_av_table(struct mthca_dev *dev);
+int mthca_init_mcg_table(struct mthca_dev *dev);
+
+void mthca_cleanup_pd_table(struct mthca_dev *dev);
+void mthca_cleanup_mr_table(struct mthca_dev *dev);
+void mthca_cleanup_eq_table(struct mthca_dev *dev);
+void mthca_cleanup_cq_table(struct mthca_dev *dev);
+void mthca_cleanup_qp_table(struct mthca_dev *dev);
+void mthca_cleanup_av_table(struct mthca_dev *dev);
+void mthca_cleanup_mcg_table(struct mthca_dev *dev);
+
+int mthca_register_device(struct mthca_dev *dev);
+void mthca_unregister_device(struct mthca_dev *dev);
+
+int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd);
+void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd);
+
+int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
+ u32 access, struct mthca_mr *mr);
+int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
+ u64 *buffer_list, int buffer_size_shift,
+ int list_len, u64 iova, u64 total_size,
+ u32 access, struct mthca_mr *mr);
+void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr);
+
+int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
+ struct ib_wc *entry);
+void mthca_arm_cq(struct mthca_dev *dev, struct mthca_cq *cq,
+ int solicited);
+int mthca_init_cq(struct mthca_dev *dev, int nent,
+ struct mthca_cq *cq);
+void mthca_free_cq(struct mthca_dev *dev,
+ struct mthca_cq *cq);
+void mthca_cq_event(struct mthca_dev *dev, u32 cqn);
+void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn);
+
+void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
+ enum ib_event_type event_type);
+int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask);
+int mthca_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr);
+int mthca_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+int mthca_free_err_wqe(struct mthca_qp *qp, int is_send,
+ int index, int *dbd, u32 *new_wqe);
+int mthca_alloc_qp(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct mthca_cq *send_cq,
+ struct mthca_cq *recv_cq,
+ enum ib_qp_type type,
+ enum ib_sig_type send_policy,
+ enum ib_sig_type recv_policy,
+ struct mthca_qp *qp);
+int mthca_alloc_sqp(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct mthca_cq *send_cq,
+ struct mthca_cq *recv_cq,
+ enum ib_sig_type send_policy,
+ enum ib_sig_type recv_policy,
+ int qpn,
+ int port,
+ struct mthca_sqp *sqp);
+void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp);
+int mthca_create_ah(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct ib_ah_attr *ah_attr,
+ struct mthca_ah *ah);
+int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah);
+int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
+ struct ib_ud_header *header);
+
+int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+
+int mthca_process_mad(struct ib_device *ibdev,
+ int mad_flags,
+ u8 port_num,
+ u16 slid,
+ struct ib_mad *in_mad,
+ struct ib_mad *out_mad);
+int mthca_create_agents(struct mthca_dev *dev);
+void mthca_free_agents(struct mthca_dev *dev);
+
+static inline struct mthca_dev *to_mdev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct mthca_dev, ib_dev);
+}
+
+#endif /* MTHCA_DEV_H */
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_doorbell.h 2004-12-27 21:48:21.567791525 -0800
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_doorbell.h 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/preempt.h>
+
+#define MTHCA_RD_DOORBELL 0x00
+#define MTHCA_SEND_DOORBELL 0x10
+#define MTHCA_RECEIVE_DOORBELL 0x18
+#define MTHCA_CQ_DOORBELL 0x20
+#define MTHCA_EQ_DOORBELL 0x28
+
+#if BITS_PER_LONG == 64
+/*
+ * Assume that we can just write a 64-bit doorbell atomically. s390
+ * actually doesn't have writeq() but S/390 systems don't even have
+ * PCI so we won't worry about it.
+ */
+
+#define MTHCA_DECLARE_DOORBELL_LOCK(name)
+#define MTHCA_INIT_DOORBELL_LOCK(ptr) do { } while (0)
+#define MTHCA_GET_DOORBELL_LOCK(ptr) (NULL)
+
+static inline void mthca_write64(u32 val[2], void __iomem *dest,
+ spinlock_t *doorbell_lock)
+{
+ __raw_writeq(*(u64 *) val, dest);
+}
+
+#elif defined(CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL)
+/* Use SSE to write 64 bits atomically without a lock. */
+
+#define MTHCA_DECLARE_DOORBELL_LOCK(name)
+#define MTHCA_INIT_DOORBELL_LOCK(ptr) do { } while (0)
+#define MTHCA_GET_DOORBELL_LOCK(ptr) (NULL)
+
+static inline unsigned long mthca_get_fpu(void)
+{
+ unsigned long cr0;
+
+ preempt_disable();
+ asm volatile("mov %%cr0,%0; clts" : "=r" (cr0));
+ return cr0;
+}
+
+static inline void mthca_put_fpu(unsigned long cr0)
+{
+ asm volatile("mov %0,%%cr0" : : "r" (cr0));
+ preempt_enable();
+}
+
+static inline void mthca_write64(u32 val[2], void __iomem *dest,
+ spinlock_t *doorbell_lock)
+{
+ /* i386 stack is aligned to 8 bytes, so this should be OK: */
+ u8 xmmsave[8] __attribute__((aligned(8)));
+ unsigned long cr0;
+
+ cr0 = mthca_get_fpu();
+
+ asm volatile (
+ "movlps %%xmm0,(%0); \n\t"
+ "movlps (%1),%%xmm0; \n\t"
+ "movlps %%xmm0,(%2); \n\t"
+ "movlps (%0),%%xmm0; \n\t"
+ :
+ : "r" (xmmsave), "r" (val), "r" (dest)
+ : "memory" );
+
+ mthca_put_fpu(cr0);
+}
+
+#else
+/* Just fall back to a spinlock to protect the doorbell */
+
+#define MTHCA_DECLARE_DOORBELL_LOCK(name) spinlock_t name;
+#define MTHCA_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr)
+#define MTHCA_GET_DOORBELL_LOCK(ptr) (ptr)
+
+static inline void mthca_write64(u32 val[2], void __iomem *dest,
+ spinlock_t *doorbell_lock)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(doorbell_lock, flags);
+ __raw_writel(val[0], dest);
+ __raw_writel(val[1], dest + 4);
+ spin_unlock_irqrestore(doorbell_lock, flags);
+}
+
+#endif
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_main.c 2004-12-27 21:48:21.623783283 -0800
@@ -0,0 +1,936 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mthca_main.c 1396 2004-12-28 04:10:27Z roland $
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+
+#ifdef CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL
+#include <asm/cpufeature.h>
+#endif
+
+#include "mthca_dev.h"
+#include "mthca_config_reg.h"
+#include "mthca_cmd.h"
+#include "mthca_profile.h"
+
+MODULE_AUTHOR("Roland Dreier");
+MODULE_DESCRIPTION("Mellanox InfiniBand HCA low-level driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+#ifdef CONFIG_PCI_MSI
+
+static int msi_x = 0;
+module_param(msi_x, int, 0444);
+MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
+
+static int msi = 0;
+module_param(msi, int, 0444);
+MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero");
+
+#else /* CONFIG_PCI_MSI */
+
+#define msi_x (0)
+#define msi (0)
+
+#endif /* CONFIG_PCI_MSI */
+
+static const char mthca_version[] __devinitdata =
+ "ib_mthca: Mellanox InfiniBand HCA driver v"
+ DRV_VERSION " (" DRV_RELDATE ")\n";
+
+static int __devinit mthca_tune_pci(struct mthca_dev *mdev)
+{
+ int cap;
+ u16 val;
+
+ /* First try to max out Read Byte Count */
+ cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX);
+ if (cap) {
+ if (pci_read_config_word(mdev->pdev, cap + PCI_X_CMD, &val)) {
+ mthca_err(mdev, "Couldn't read PCI-X command register, "
+ "aborting.\n");
+ return -ENODEV;
+ }
+ val = (val & ~PCI_X_CMD_MAX_READ) | (3 << 2);
+ if (pci_write_config_word(mdev->pdev, cap + PCI_X_CMD, val)) {
+ mthca_err(mdev, "Couldn't write PCI-X command register, "
+ "aborting.\n");
+ return -ENODEV;
+ }
+ } else if (mdev->hca_type == TAVOR)
+ mthca_info(mdev, "No PCI-X capability, not setting RBC.\n");
+
+ cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_EXP);
+ if (cap) {
+ if (pci_read_config_word(mdev->pdev, cap + PCI_EXP_DEVCTL, &val)) {
+ mthca_err(mdev, "Couldn't read PCI Express device control "
+ "register, aborting.\n");
+ return -ENODEV;
+ }
+ val = (val & ~PCI_EXP_DEVCTL_READRQ) | (5 << 12);
+ if (pci_write_config_word(mdev->pdev, cap + PCI_EXP_DEVCTL, val)) {
+ mthca_err(mdev, "Couldn't write PCI Express device control "
+ "register, aborting.\n");
+ return -ENODEV;
+ }
+ } else if (mdev->hca_type == ARBEL_NATIVE ||
+ mdev->hca_type == ARBEL_COMPAT)
+ mthca_info(mdev, "No PCI Express capability, "
+ "not setting Max Read Request Size.\n");
+
+ return 0;
+}
+
+static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim)
+{
+ int err;
+ u8 status;
+
+ err = mthca_QUERY_DEV_LIM(mdev, dev_lim, &status);
+ if (err) {
+ mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n");
+ return err;
+ }
+ if (status) {
+ mthca_err(mdev, "QUERY_DEV_LIM returned status 0x%02x, "
+ "aborting.\n", status);
+ return -EINVAL;
+ }
+ if (dev_lim->min_page_sz > PAGE_SIZE) {
+ mthca_err(mdev, "HCA minimum page size of %d bigger than "
+ "kernel PAGE_SIZE of %ld, aborting.\n",
+ dev_lim->min_page_sz, PAGE_SIZE);
+ return -ENODEV;
+ }
+ if (dev_lim->num_ports > MTHCA_MAX_PORTS) {
+ mthca_err(mdev, "HCA has %d ports, but we only support %d, "
+ "aborting.\n",
+ dev_lim->num_ports, MTHCA_MAX_PORTS);
+ return -ENODEV;
+ }
+
+ mdev->limits.num_ports = dev_lim->num_ports;
+ mdev->limits.vl_cap = dev_lim->max_vl;
+ mdev->limits.mtu_cap = dev_lim->max_mtu;
+ mdev->limits.gid_table_len = dev_lim->max_gids;
+ mdev->limits.pkey_table_len = dev_lim->max_pkeys;
+ mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay;
+ mdev->limits.max_sg = dev_lim->max_sg;
+ mdev->limits.reserved_qps = dev_lim->reserved_qps;
+ mdev->limits.reserved_srqs = dev_lim->reserved_srqs;
+ mdev->limits.reserved_eecs = dev_lim->reserved_eecs;
+ mdev->limits.reserved_cqs = dev_lim->reserved_cqs;
+ mdev->limits.reserved_eqs = dev_lim->reserved_eqs;
+ mdev->limits.reserved_mtts = dev_lim->reserved_mtts;
+ mdev->limits.reserved_mrws = dev_lim->reserved_mrws;
+ mdev->limits.reserved_uars = dev_lim->reserved_uars;
+ mdev->limits.reserved_pds = dev_lim->reserved_pds;
+
+ if (dev_lim->flags & DEV_LIM_FLAG_SRQ)
+ mdev->mthca_flags |= MTHCA_FLAG_SRQ;
+
+ return 0;
+}
+
+static int __devinit mthca_init_tavor(struct mthca_dev *mdev)
+{
+ u8 status;
+ int err;
+ struct mthca_dev_lim dev_lim;
+ struct mthca_init_hca_param init_hca;
+ struct mthca_adapter adapter;
+
+ err = mthca_SYS_EN(mdev, &status);
+ if (err) {
+ mthca_err(mdev, "SYS_EN command failed, aborting.\n");
+ return err;
+ }
+ if (status) {
+ mthca_err(mdev, "SYS_EN returned status 0x%02x, "
+ "aborting.\n", status);
+ return -EINVAL;
+ }
+
+ err = mthca_QUERY_FW(mdev, &status);
+ if (err) {
+ mthca_err(mdev, "QUERY_FW command failed, aborting.\n");
+ goto err_out_disable;
+ }
+ if (status) {
+ mthca_err(mdev, "QUERY_FW returned status 0x%02x, "
+ "aborting.\n", status);
+ err = -EINVAL;
+ goto err_out_disable;
+ }
+ err = mthca_QUERY_DDR(mdev, &status);
+ if (err) {
+ mthca_err(mdev, "QUERY_DDR command failed, aborting.\n");
+ goto err_out_disable;
+ }
+ if (status) {
+ mthca_err(mdev, "QUERY_DDR returned status 0x%02x, "
+ "aborting.\n", status);
+ err = -EINVAL;
+ goto err_out_disable;
+ }
+
+ err = mthca_dev_lim(mdev, &dev_lim);
+
+ err = mthca_make_profile(mdev, &dev_lim, &init_hca);
+ if (err)
+ goto err_out_disable;
+
+ err = mthca_INIT_HCA(mdev, &init_hca, &status);
+ if (err) {
+ mthca_err(mdev, "INIT_HCA command failed, aborting.\n");
+ goto err_out_disable;
+ }
+ if (status) {
+ mthca_err(mdev, "INIT_HCA returned status 0x%02x, "
+ "aborting.\n", status);
+ err = -EINVAL;
+ goto err_out_disable;
+ }
+
+ err = mthca_QUERY_ADAPTER(mdev, &adapter, &status);
+ if (err) {
+ mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n");
+ goto err_out_disable;
+ }
+ if (status) {
+ mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, "
+ "aborting.\n", status);
+ err = -EINVAL;
+ goto err_out_close;
+ }
+
+ mdev->eq_table.inta_pin = adapter.inta_pin;
+ mdev->rev_id = adapter.revision_id;
+
+ return 0;
+
+err_out_close:
+ mthca_CLOSE_HCA(mdev, 0, &status);
+
+err_out_disable:
+ mthca_SYS_DIS(mdev, &status);
+
+ return err;
+}
+
+static int __devinit mthca_load_fw(struct mthca_dev *mdev)
+{
+ u8 status;
+ int err;
+ int num_ent, num_sg, fw_pages, cur_order;
+ int i;
+
+ /* FIXME: use HCA-attached memory for FW if present */
+
+ mdev->fw.arbel.mem = kmalloc(sizeof *mdev->fw.arbel.mem *
+ mdev->fw.arbel.fw_pages,
+ GFP_KERNEL);
+ if (!mdev->fw.arbel.mem) {
+ mthca_err(mdev, "Couldn't allocate FW area, aborting.\n");
+ return -ENOMEM;
+ }
+
+ memset(mdev->fw.arbel.mem, 0,
+ sizeof *mdev->fw.arbel.mem * mdev->fw.arbel.fw_pages);
+
+ fw_pages = mdev->fw.arbel.fw_pages;
+ num_ent = 0;
+
+ /*
+ * We allocate in as big chunks as we can, up to a maximum of
+ * 256 KB per chunk.
+ */
+ cur_order = get_order(1 << 18);
+
+ while (fw_pages > 0) {
+ while (1 << cur_order > fw_pages)
+ --cur_order;
+
+ /*
+ * We allocate with GFP_HIGHUSER because only the
+ * firmware is going to touch these pages, so there's
+ * no need for a kernel virtual address. We use
+ * __GFP_NOWARN because we'll deal with any allocation
+ * failures ourselves.
+ */
+ mdev->fw.arbel.mem[num_ent].page = alloc_pages(GFP_HIGHUSER | __GFP_NOWARN,
+ cur_order);
+ mdev->fw.arbel.mem[num_ent].length = PAGE_SIZE << cur_order;
+ if (!mdev->fw.arbel.mem[num_ent].page) {
+ --cur_order;
+ if (cur_order < 0) {
+ mthca_err(mdev, "Couldn't allocate FW area, aborting.\n");
+ err = -ENOMEM;
+ goto err_free;
+ }
+ } else {
+ ++num_ent;
+ fw_pages -= 1 << cur_order;
+ }
+ }
+
+ num_sg = pci_map_sg(mdev->pdev, mdev->fw.arbel.mem, num_ent,
+ PCI_DMA_BIDIRECTIONAL);
+ if (num_sg <= 0) {
+ mthca_err(mdev, "Couldn't allocate FW area, aborting.\n");
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ err = mthca_MAP_FA(mdev, num_sg, mdev->fw.arbel.mem, &status);
+ if (err) {
+ mthca_err(mdev, "MAP_FA command failed, aborting.\n");
+ goto err_unmap;
+ }
+ if (status) {
+ mthca_err(mdev, "MAP_FA returned status 0x%02x, aborting.\n", status);
+ err = -EINVAL;
+ goto err_unmap;
+ }
+ err = mthca_RUN_FW(mdev, &status);
+ if (err) {
+ mthca_err(mdev, "RUN_FW command failed, aborting.\n");
+ goto err_unmap_fa;
+ }
+ if (status) {
+ mthca_err(mdev, "RUN_FW returned status 0x%02x, aborting.\n", status);
+ err = -EINVAL;
+ goto err_unmap_fa;
+ }
+
+ return 0;
+
+err_unmap_fa:
+ mthca_UNMAP_FA(mdev, &status);
+
+err_unmap:
+ pci_unmap_sg(mdev->pdev, mdev->fw.arbel.mem,
+ mdev->fw.arbel.fw_pages, PCI_DMA_BIDIRECTIONAL);
+err_free:
+ for (i = 0; i < mdev->fw.arbel.fw_pages; ++i)
+ if (mdev->fw.arbel.mem[i].page)
+ __free_pages(mdev->fw.arbel.mem[i].page,
+ get_order(mdev->fw.arbel.mem[i].length));
+ kfree(mdev->fw.arbel.mem);
+ return err;
+}
+
+static int __devinit mthca_init_arbel(struct mthca_dev *mdev)
+{
+ struct mthca_dev_lim dev_lim;
+ u8 status;
+ int err;
+
+ err = mthca_QUERY_FW(mdev, &status);
+ if (err) {
+ mthca_err(mdev, "QUERY_FW command failed, aborting.\n");
+ return err;
+ }
+ if (status) {
+ mthca_err(mdev, "QUERY_FW returned status 0x%02x, "
+ "aborting.\n", status);
+ return -EINVAL;
+ }
+
+ err = mthca_ENABLE_LAM(mdev, &status);
+ if (err) {
+ mthca_err(mdev, "ENABLE_LAM command failed, aborting.\n");
+ return err;
+ }
+ if (status == MTHCA_CMD_STAT_LAM_NOT_PRE) {
+ mthca_dbg(mdev, "No HCA-attached memory (running in MemFree mode)\n");
+ mdev->mthca_flags |= MTHCA_FLAG_NO_LAM;
+ } else if (status) {
+ mthca_err(mdev, "ENABLE_LAM returned status 0x%02x, "
+ "aborting.\n", status);
+ return -EINVAL;
+ }
+
+ err = mthca_load_fw(mdev);
+ if (err) {
+ mthca_err(mdev, "Failed to start FW, aborting.\n");
+ goto err_out_disable;
+ }
+
+ err = mthca_dev_lim(mdev, &dev_lim);
+ if (err) {
+ mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n");
+ goto err_out_disable;
+ }
+
+ mthca_warn(mdev, "Sorry, native MT25208 mode support is not done, "
+ "aborting.\n");
+ err = -ENODEV;
+
+err_out_disable:
+ if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
+ mthca_DISABLE_LAM(mdev, &status);
+ return err;
+}
+
+static int __devinit mthca_init_hca(struct mthca_dev *mdev)
+{
+ if (mdev->hca_type == ARBEL_NATIVE)
+ return mthca_init_arbel(mdev);
+ else
+ return mthca_init_tavor(mdev);
+}
+
+static int __devinit mthca_setup_hca(struct mthca_dev *dev)
+{
+ int err;
+
+ MTHCA_INIT_DOORBELL_LOCK(&dev->doorbell_lock);
+
+ err = mthca_init_pd_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "protection domain table, aborting.\n");
+ return err;
+ }
+
+ err = mthca_init_mr_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "memory region table, aborting.\n");
+ goto err_out_pd_table_free;
+ }
+
+ err = mthca_pd_alloc(dev, &dev->driver_pd);
+ if (err) {
+ mthca_err(dev, "Failed to create driver PD, "
+ "aborting.\n");
+ goto err_out_mr_table_free;
+ }
+
+ err = mthca_init_eq_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "event queue table, aborting.\n");
+ goto err_out_pd_free;
+ }
+
+ err = mthca_cmd_use_events(dev);
+ if (err) {
+ mthca_err(dev, "Failed to switch to event-driven "
+ "firmware commands, aborting.\n");
+ goto err_out_eq_table_free;
+ }
+
+ err = mthca_init_cq_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "completion queue table, aborting.\n");
+ goto err_out_cmd_poll;
+ }
+
+ err = mthca_init_qp_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "queue pair table, aborting.\n");
+ goto err_out_cq_table_free;
+ }
+
+ err = mthca_init_av_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "address vector table, aborting.\n");
+ goto err_out_qp_table_free;
+ }
+
+ err = mthca_init_mcg_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "multicast group table, aborting.\n");
+ goto err_out_av_table_free;
+ }
+
+ return 0;
+
+err_out_av_table_free:
+ mthca_cleanup_av_table(dev);
+
+err_out_qp_table_free:
+ mthca_cleanup_qp_table(dev);
+
+err_out_cq_table_free:
+ mthca_cleanup_cq_table(dev);
+
+err_out_cmd_poll:
+ mthca_cmd_use_polling(dev);
+
+err_out_eq_table_free:
+ mthca_cleanup_eq_table(dev);
+
+err_out_pd_free:
+ mthca_pd_free(dev, &dev->driver_pd);
+
+err_out_mr_table_free:
+ mthca_cleanup_mr_table(dev);
+
+err_out_pd_table_free:
+ mthca_cleanup_pd_table(dev);
+ return err;
+}
+
+static int __devinit mthca_request_regions(struct pci_dev *pdev,
+ int ddr_hidden)
+{
+ int err;
+
+ /*
+ * We request our first BAR in two chunks, since the MSI-X
+ * vector table is right in the middle.
+ *
+ * This is why we can't just use pci_request_regions() -- if
+ * we did then setting up MSI-X would fail, since the PCI core
+ * wants to do request_mem_region on the MSI-X vector table.
+ */
+ if (!request_mem_region(pci_resource_start(pdev, 0) +
+ MTHCA_HCR_BASE,
+ MTHCA_MAP_HCR_SIZE,
+ DRV_NAME))
+ return -EBUSY;
+
+ if (!request_mem_region(pci_resource_start(pdev, 0) +
+ MTHCA_CLR_INT_BASE,
+ MTHCA_CLR_INT_SIZE,
+ DRV_NAME)) {
+ err = -EBUSY;
+ goto err_out_bar0_beg;
+ }
+
+ err = pci_request_region(pdev, 2, DRV_NAME);
+ if (err)
+ goto err_out_bar0_end;
+
+ if (!ddr_hidden) {
+ err = pci_request_region(pdev, 4, DRV_NAME);
+ if (err)
+ goto err_out_bar2;
+ }
+
+ return 0;
+
+err_out_bar0_beg:
+ release_mem_region(pci_resource_start(pdev, 0) +
+ MTHCA_HCR_BASE,
+ MTHCA_MAP_HCR_SIZE);
+
+err_out_bar0_end:
+ release_mem_region(pci_resource_start(pdev, 0) +
+ MTHCA_CLR_INT_BASE,
+ MTHCA_CLR_INT_SIZE);
+
+err_out_bar2:
+ pci_release_region(pdev, 2);
+ return err;
+}
+
+static void mthca_release_regions(struct pci_dev *pdev,
+ int ddr_hidden)
+{
+ release_mem_region(pci_resource_start(pdev, 0) +
+ MTHCA_HCR_BASE,
+ MTHCA_MAP_HCR_SIZE);
+ release_mem_region(pci_resource_start(pdev, 0) +
+ MTHCA_CLR_INT_BASE,
+ MTHCA_CLR_INT_SIZE);
+ pci_release_region(pdev, 2);
+ if (!ddr_hidden)
+ pci_release_region(pdev, 4);
+}
+
+static int __devinit mthca_enable_msi_x(struct mthca_dev *mdev)
+{
+ struct msix_entry entries[3];
+ int err;
+
+ entries[0].entry = 0;
+ entries[1].entry = 1;
+ entries[2].entry = 2;
+
+ err = pci_enable_msix(mdev->pdev, entries, ARRAY_SIZE(entries));
+ if (err) {
+ if (err > 0)
+ mthca_info(mdev, "Only %d MSI-X vectors available, "
+ "not using MSI-X\n", err);
+ return err;
+ }
+
+ mdev->eq_table.eq[MTHCA_EQ_COMP ].msi_x_vector = entries[0].vector;
+ mdev->eq_table.eq[MTHCA_EQ_ASYNC].msi_x_vector = entries[1].vector;
+ mdev->eq_table.eq[MTHCA_EQ_CMD ].msi_x_vector = entries[2].vector;
+
+ return 0;
+}
+
+static void mthca_close_hca(struct mthca_dev *mdev)
+{
+ u8 status;
+ int i;
+
+ mthca_CLOSE_HCA(mdev, 0, &status);
+
+ if (mdev->hca_type == ARBEL_NATIVE) {
+ mthca_UNMAP_FA(mdev, &status);
+
+ pci_unmap_sg(mdev->pdev, mdev->fw.arbel.mem,
+ mdev->fw.arbel.fw_pages, PCI_DMA_BIDIRECTIONAL);
+
+ for (i = 0; i < mdev->fw.arbel.fw_pages; ++i)
+ if (mdev->fw.arbel.mem[i].page)
+ __free_pages(mdev->fw.arbel.mem[i].page,
+ get_order(mdev->fw.arbel.mem[i].length));
+ kfree(mdev->fw.arbel.mem);
+
+ if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
+ mthca_DISABLE_LAM(mdev, &status);
+ } else
+ mthca_SYS_DIS(mdev, &status);
+}
+
+static int __devinit mthca_init_one(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ static int mthca_version_printed = 0;
+ int ddr_hidden = 0;
+ int err;
+ unsigned long mthca_base;
+ struct mthca_dev *mdev;
+
+ if (!mthca_version_printed) {
+ printk(KERN_INFO "%s", mthca_version);
+ ++mthca_version_printed;
+ }
+
+ printk(KERN_INFO PFX "Initializing %s (%s)\n",
+ pci_pretty_name(pdev), pci_name(pdev));
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "Cannot enable PCI device, "
+ "aborting.\n");
+ return err;
+ }
+
+ /*
+ * Check for BARs. We expect 0: 1MB, 2: 8MB, 4: DDR (may not
+ * be present)
+ */
+ if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
+ pci_resource_len(pdev, 0) != 1 << 20) {
+ dev_err(&pdev->dev, "Missing DCS, aborting.");
+ err = -ENODEV;
+ goto err_out_disable_pdev;
+ }
+ if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM) ||
+ pci_resource_len(pdev, 2) != 1 << 23) {
+ dev_err(&pdev->dev, "Missing UAR, aborting.");
+ err = -ENODEV;
+ goto err_out_disable_pdev;
+ }
+ if (!(pci_resource_flags(pdev, 4) & IORESOURCE_MEM))
+ ddr_hidden = 1;
+
+ err = mthca_request_regions(pdev, ddr_hidden);
+ if (err) {
+ dev_err(&pdev->dev, "Cannot obtain PCI resources, "
+ "aborting.\n");
+ goto err_out_disable_pdev;
+ }
+
+ pci_set_master(pdev);
+
+ err = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
+ if (err) {
+ dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n");
+ err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+ if (err) {
+ dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
+ goto err_out_free_res;
+ }
+ }
+ err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
+ if (err) {
+ dev_warn(&pdev->dev, "Warning: couldn't set 64-bit "
+ "consistent PCI DMA mask.\n");
+ err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+ if (err) {
+ dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
+ "aborting.\n");
+ goto err_out_free_res;
+ }
+ }
+
+ mdev = (struct mthca_dev *) ib_alloc_device(sizeof *mdev);
+ if (!mdev) {
+ dev_err(&pdev->dev, "Device struct alloc failed, "
+ "aborting.\n");
+ err = -ENOMEM;
+ goto err_out_free_res;
+ }
+
+ mdev->pdev = pdev;
+ mdev->hca_type = id->driver_data;
+
+ if (ddr_hidden)
+ mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN;
+
+ /*
+ * Now reset the HCA before we touch the PCI capabilities or
+ * attempt a firmware command, since a boot ROM may have left
+ * the HCA in an undefined state.
+ */
+ err = mthca_reset(mdev);
+ if (err) {
+ mthca_err(mdev, "Failed to reset HCA, aborting.\n");
+ goto err_out_free_dev;
+ }
+
+ if (msi_x && !mthca_enable_msi_x(mdev))
+ mdev->mthca_flags |= MTHCA_FLAG_MSI_X;
+ if (msi && !(mdev->mthca_flags & MTHCA_FLAG_MSI_X) &&
+ !pci_enable_msi(pdev))
+ mdev->mthca_flags |= MTHCA_FLAG_MSI;
+
+ sema_init(&mdev->cmd.hcr_sem, 1);
+ sema_init(&mdev->cmd.poll_sem, 1);
+ mdev->cmd.use_events = 0;
+
+ mthca_base = pci_resource_start(pdev, 0);
+ mdev->hcr = ioremap(mthca_base + MTHCA_HCR_BASE, MTHCA_MAP_HCR_SIZE);
+ if (!mdev->hcr) {
+ mthca_err(mdev, "Couldn't map command register, "
+ "aborting.\n");
+ err = -ENOMEM;
+ goto err_out_free_dev;
+ }
+ mdev->clr_base = ioremap(mthca_base + MTHCA_CLR_INT_BASE,
+ MTHCA_CLR_INT_SIZE);
+ if (!mdev->clr_base) {
+ mthca_err(mdev, "Couldn't map command register, "
+ "aborting.\n");
+ err = -ENOMEM;
+ goto err_out_iounmap;
+ }
+
+ mthca_base = pci_resource_start(pdev, 2);
+ mdev->kar = ioremap(mthca_base + PAGE_SIZE * MTHCA_KAR_PAGE, PAGE_SIZE);
+ if (!mdev->kar) {
+ mthca_err(mdev, "Couldn't map kernel access region, "
+ "aborting.\n");
+ err = -ENOMEM;
+ goto err_out_iounmap_clr;
+ }
+
+ err = mthca_tune_pci(mdev);
+ if (err)
+ goto err_out_iounmap_kar;
+
+ err = mthca_init_hca(mdev);
+ if (err)
+ goto err_out_iounmap_kar;
+
+ err = mthca_setup_hca(mdev);
+ if (err)
+ goto err_out_close;
+
+ err = mthca_register_device(mdev);
+ if (err)
+ goto err_out_cleanup;
+
+ err = mthca_create_agents(mdev);
+ if (err)
+ goto err_out_unregister;
+
+ pci_set_drvdata(pdev, mdev);
+
+ return 0;
+
+err_out_unregister:
+ mthca_unregister_device(mdev);
+
+err_out_cleanup:
+ mthca_cleanup_mcg_table(mdev);
+ mthca_cleanup_av_table(mdev);
+ mthca_cleanup_qp_table(mdev);
+ mthca_cleanup_cq_table(mdev);
+ mthca_cmd_use_polling(mdev);
+ mthca_cleanup_eq_table(mdev);
+
+ mthca_pd_free(mdev, &mdev->driver_pd);
+
+ mthca_cleanup_mr_table(mdev);
+ mthca_cleanup_pd_table(mdev);
+
+err_out_close:
+ mthca_close_hca(mdev);
+
+err_out_iounmap_kar:
+ iounmap(mdev->kar);
+
+err_out_iounmap_clr:
+ iounmap(mdev->clr_base);
+
+err_out_iounmap:
+ iounmap(mdev->hcr);
+
+err_out_free_dev:
+ if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
+ pci_disable_msix(pdev);
+ if (mdev->mthca_flags & MTHCA_FLAG_MSI)
+ pci_disable_msi(pdev);
+
+ ib_dealloc_device(&mdev->ib_dev);
+
+err_out_free_res:
+ mthca_release_regions(pdev, ddr_hidden);
+
+err_out_disable_pdev:
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+ return err;
+}
+
+static void __devexit mthca_remove_one(struct pci_dev *pdev)
+{
+ struct mthca_dev *mdev = pci_get_drvdata(pdev);
+ u8 status;
+ int p;
+
+ if (mdev) {
+ mthca_free_agents(mdev);
+ mthca_unregister_device(mdev);
+
+ for (p = 1; p <= mdev->limits.num_ports; ++p)
+ mthca_CLOSE_IB(mdev, p, &status);
+
+ mthca_cleanup_mcg_table(mdev);
+ mthca_cleanup_av_table(mdev);
+ mthca_cleanup_qp_table(mdev);
+ mthca_cleanup_cq_table(mdev);
+ mthca_cmd_use_polling(mdev);
+ mthca_cleanup_eq_table(mdev);
+
+ mthca_pd_free(mdev, &mdev->driver_pd);
+
+ mthca_cleanup_mr_table(mdev);
+ mthca_cleanup_pd_table(mdev);
+
+ mthca_close_hca(mdev);
+
+ iounmap(mdev->hcr);
+ iounmap(mdev->clr_base);
+
+ if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
+ pci_disable_msix(pdev);
+ if (mdev->mthca_flags & MTHCA_FLAG_MSI)
+ pci_disable_msi(pdev);
+
+ ib_dealloc_device(&mdev->ib_dev);
+ mthca_release_regions(pdev, mdev->mthca_flags &
+ MTHCA_FLAG_DDR_HIDDEN);
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+ }
+}
+
+static struct pci_device_id mthca_pci_table[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR),
+ .driver_data = TAVOR },
+ { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_TAVOR),
+ .driver_data = TAVOR },
+ { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT),
+ .driver_data = ARBEL_COMPAT },
+ { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT),
+ .driver_data = ARBEL_COMPAT },
+ { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_ARBEL),
+ .driver_data = ARBEL_NATIVE },
+ { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_ARBEL),
+ .driver_data = ARBEL_NATIVE },
+ { 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, mthca_pci_table);
+
+static struct pci_driver mthca_driver = {
+ .name = "ib_mthca",
+ .id_table = mthca_pci_table,
+ .probe = mthca_init_one,
+ .remove = __devexit_p(mthca_remove_one)
+};
+
+static int __init mthca_init(void)
+{
+ int ret;
+
+ /*
+ * TODO: measure whether dynamically choosing doorbell code at
+ * runtime affects our performance. Is there a "magic" way to
+ * choose without having to follow a function pointer every
+ * time we ring a doorbell?
+ */
+#ifdef CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL
+ if (!cpu_has_xmm) {
+ printk(KERN_ERR PFX "mthca was compiled with SSE doorbell code, but\n");
+ printk(KERN_ERR PFX "the current CPU does not support SSE.\n");
+ printk(KERN_ERR PFX "Turn off CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL "
+ "and recompile.\n");
+ return -ENODEV;
+ }
+#endif
+
+ ret = pci_register_driver(&mthca_driver);
+ return ret < 0 ? ret : 0;
+}
+
+static void __exit mthca_cleanup(void)
+{
+ pci_unregister_driver(&mthca_driver);
+}
+
+module_init(mthca_init);
+module_exit(mthca_cleanup);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/