aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoland Dreier <rolandd@cisco.com>2007-04-09 00:49:42 -0700
committerRoland Dreier <rolandd@cisco.com>2007-04-09 00:49:42 -0700
commitd049a1279b829c61576c0d17a6b29489ad5e9383 (patch)
tree345167534148263fe1d4529a8f6be0cf4c9cef0f
downloadlibmlx4-d049a1279b829c61576c0d17a6b29489ad5e9383.tar.gz
Initial import of libmlx4 repository
Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--.gitignore17
-rw-r--r--AUTHORS1
-rw-r--r--COPYING378
-rw-r--r--Makefile.am26
-rw-r--r--README58
-rwxr-xr-xautogen.sh8
-rw-r--r--config/.gitignore8
-rw-r--r--configure.in76
-rw-r--r--debian/changelog5
-rw-r--r--debian/compat1
-rw-r--r--debian/control52
-rw-r--r--debian/copyright43
-rw-r--r--debian/libmlx4-1.install1
-rw-r--r--debian/libmlx4-dev.install1
-rwxr-xr-xdebian/rules8
-rw-r--r--libmlx4.spec.in55
-rw-r--r--mlx4.driver1
-rw-r--r--src/.gitignore3
-rw-r--r--src/ah.c60
-rw-r--r--src/buf.c82
-rw-r--r--src/cq.c342
-rw-r--r--src/dbrec.c125
-rw-r--r--src/doorbell.h63
-rw-r--r--src/mlx4-abi.h87
-rw-r--r--src/mlx4.c262
-rw-r--r--src/mlx4.h338
-rw-r--r--src/mlx4.map6
-rw-r--r--src/qp.c442
-rw-r--r--src/srq.c163
-rw-r--r--src/verbs.c580
-rw-r--r--src/wqe.h120
31 files changed, 3412 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..4c45b09
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,17 @@
+*.o
+*.lo
+configure
+Makefile.in
+autom4te.cache
+aclocal.m4
+stamp-h.in
+config.h.in
+config.log
+config.h
+.libs
+.deps
+libmlx4.spec
+Makefile
+config.status
+stamp-h1
+libtool
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..ffe1800
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1 @@
+Roland Dreier <rolandd@cisco.com>
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..add3d19
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,378 @@
+This software is available to you under a choice of one of two
+licenses. You may choose to be licensed under the terms of the the
+OpenIB.org BSD license or the GNU General Public License (GPL) Version
+2, both included below.
+
+Copyright (c) 2007 Cisco, Inc. All rights reserved.
+
+==================================================================
+
+ OpenIB.org BSD license
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+==================================================================
+
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/Makefile.am b/Makefile.am
new file mode 100644
index 0000000..a7afb14
--- /dev/null
+++ b/Makefile.am
@@ -0,0 +1,26 @@
+AM_CFLAGS = -g -Wall -D_GNU_SOURCE
+
+mlx4_version_script = @MLX4_VERSION_SCRIPT@
+
+MLX4_SOURCES = src/buf.c src/cq.c src/dbrec.c src/mlx4.c src/qp.c \
+ src/srq.c src/verbs.c
+
+if HAVE_IBV_DEVICE_LIBRARY_EXTENSION
+ lib_LTLIBRARIES = src/libmlx4.la
+ src_libmlx4_la_SOURCES = $(MLX4_SOURCES)
+ src_libmlx4_la_LDFLAGS = -avoid-version -release @IBV_DEVICE_LIBRARY_EXTENSION@ \
+ $(mlx4_version_script)
+ mlx4confdir = $(sysconfdir)/libibverbs.d
+ mlx4conf_DATA = mlx4.driver
+else
+ mlx4libdir = $(libdir)/infiniband
+ mlx4lib_LTLIBRARIES = src/mlx4.la
+ src_mlx4_la_SOURCES = $(MLX4_SOURCES)
+ src_mlx4_la_LDFLAGS = -avoid-version -module $(mlx4_version_script)
+endif
+
+EXTRA_DIST = src/doorbell.h src/mlx4.h src/mlx4-abi.h src/wqe.h \
+ src/mlx4.map libmlx4.spec.in mlx4.driver
+
+dist-hook: libmlx4.spec
+ cp libmlx4.spec $(distdir)
diff --git a/README b/README
new file mode 100644
index 0000000..b66909c
--- /dev/null
+++ b/README
@@ -0,0 +1,58 @@
+Introduction
+============
+
+libmlx4 is a userspace driver for Mellanox ConnectX InfiniBand HCAs.
+It works as a plug-in module for libibverbs that allows programs to
+use Mellanox hardware directly from userspace. See the libibverbs
+package for more information.
+
+Using libmlx4
+==============
+
+libmlx4 will be loaded and used automatically by programs linked with
+libibverbs. The ib_mlx4 kernel module must be loaded for HCA devices
+to be detected and used.
+
+Supported Hardware
+==================
+
+libmlx4 currently supports HCAs based on the following Mellanox chip:
+
+ MT25408 ConnectX (PCI Express)
+
+These HCAs use the mlx4_ib kernel driver. Support for other Mellanox
+HCAs, which use the ib_mthca kernel driver, is provided by the
+libmthca userspace driver.
+
+Valgrind Support
+================
+
+When running applications that use libibverbs under the Valgrind
+memory-checking debugger, Valgrind will falsely report "read from
+uninitialized" for memory that was initialized by the kernel drivers
+or HCA hardware. Specifically, Valgrind cannot see when kernel
+drivers or HCA hardware write to userspace memory, so when the process
+reads from that memory, Valgrind incorrectly assumes that the memory
+contents are uninitialized, and therefore raises a warning.
+
+libmlx4 can be built with specific support for the Valgrind
+memory-checking debugger by specifying the --with-valgrind command
+line argument to configure. This flag enables code in libibverbs to
+tell Valgrind "this memory may look uninitialized, but it's really
+OK," which therefore suppresses the incorrect "read from
+uninitialized" warnings. This code adds trivial overhead to the
+critical performance path, so it is disabled by default. The intent
+is that production users can use a "normal" build of libmlx4 and
+developers can use the "valgrind debug" build by simply switching
+their OPENIB_DRIVER_PATH environment variables.
+
+Libmlx4 needs some header files from Valgrind in order to compile this
+support; it is important to use the header files from the same version
+of Valgrind that will be used at run time. You may need to specify
+the directory where Valgrind's header files are installed as an
+argument to --with-valgrind. For example
+
+ ./configure --with-valgrind=/opt/valgrind
+
+will make the libmlx4 build look for valgrind headers in
+/opt/valgrind/include
diff --git a/autogen.sh b/autogen.sh
new file mode 100755
index 0000000..fd47839
--- /dev/null
+++ b/autogen.sh
@@ -0,0 +1,8 @@
+#! /bin/sh
+
+set -x
+aclocal -I config
+libtoolize --force --copy
+autoheader
+automake --foreign --add-missing --copy
+autoconf
diff --git a/config/.gitignore b/config/.gitignore
new file mode 100644
index 0000000..4d4c7b1
--- /dev/null
+++ b/config/.gitignore
@@ -0,0 +1,8 @@
+mkinstalldirs
+depcomp
+compile
+missing
+config.guess
+config.sub
+ltmain.sh
+install-sh
diff --git a/configure.in b/configure.in
new file mode 100644
index 0000000..b7d274b
--- /dev/null
+++ b/configure.in
@@ -0,0 +1,76 @@
+dnl Process this file with autoconf to produce a configure script.
+
+AC_PREREQ(2.57)
+AC_INIT(libmlx4, 0.1, general@lists.openfabrics.org)
+AC_CONFIG_SRCDIR([src/mlx4.h])
+AC_CONFIG_AUX_DIR(config)
+AM_CONFIG_HEADER(config.h)
+AM_INIT_AUTOMAKE(libmlx4, 0.1)
+AM_PROG_LIBTOOL
+
+AC_ARG_WITH([valgrind],
+ AC_HELP_STRING([--with-valgrind],
+ [Enable Valgrind annotations (small runtime overhead, default NO)]))
+if test x$with_valgrind = x || test x$with_valgrind = xno; then
+ want_valgrind=no
+ AC_DEFINE([NVALGRIND], 1, [disable Valgrind annotations])
+else
+ want_valgrind=yes
+ if test -d $with_valgrind; then
+ CPPFLAGS="$CPPFLAGS -I$with_valgrind/include"
+ fi
+fi
+
+dnl Checks for programs
+AC_PROG_CC
+
+dnl Checks for libraries
+AC_CHECK_LIB(ibverbs, ibv_get_device_list, [],
+ AC_MSG_ERROR([ibv_get_device_list() not found. libmlx4 requires libibverbs.]))
+
+dnl Checks for header files.
+AC_CHECK_HEADER(infiniband/driver.h, [],
+ AC_MSG_ERROR([<infiniband/driver.h> not found. libmlx4 requires libibverbs.]))
+AC_HEADER_STDC
+AC_CHECK_HEADER(valgrind/memcheck.h, memcheck_ok=yes, memcheck_ok=no)
+
+if test $want_valgrind = yes && test $memcheck_ok = no; then
+ AC_MSG_ERROR([Valgrind memcheck support requested, but <valgrind/memcheck.h> not found.])
+fi
+
+dnl Checks for typedefs, structures, and compiler characteristics.
+AC_C_CONST
+AC_CHECK_SIZEOF(long)
+
+dnl Checks for library functions
+AC_CHECK_FUNCS(ibv_read_sysfs_file ibv_dontfork_range ibv_dofork_range \
+ ibv_register_driver)
+
+dnl Now check if for libibverbs 1.0 vs 1.1
+dummy=if$$
+cat <<IBV_VERSION > $dummy.c
+#include <infiniband/driver.h>
+IBV_DEVICE_LIBRARY_EXTENSION
+IBV_VERSION
+IBV_DEVICE_LIBRARY_EXTENSION=`$CC $CPPFLAGS -E $dummy.c 2> /dev/null | tail -1`
+rm -f $dummy.c
+AM_CONDITIONAL(HAVE_IBV_DEVICE_LIBRARY_EXTENSION,
+ test $IBV_DEVICE_LIBRARY_EXTENSION != IBV_DEVICE_LIBRARY_EXTENSION)
+AC_SUBST(IBV_DEVICE_LIBRARY_EXTENSION)
+
+AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script,
+ [if test -n "`$LD --help < /dev/null 2>/dev/null | grep version-script`"; then
+ ac_cv_version_script=yes
+ else
+ ac_cv_version_script=no
+ fi])
+
+if test $ac_cv_version_script = yes; then
+ MLX4_VERSION_SCRIPT='-Wl,--version-script=$(srcdir)/src/mlx4.map'
+else
+ MLX4_VERSION_SCRIPT=
+fi
+AC_SUBST(MLX4_VERSION_SCRIPT)
+
+AC_CONFIG_FILES([Makefile libmlx4.spec])
+AC_OUTPUT
diff --git a/debian/changelog b/debian/changelog
new file mode 100644
index 0000000..ba2961f
--- /dev/null
+++ b/debian/changelog
@@ -0,0 +1,5 @@
+libmlx4 (0.1-1) unstable; urgency=low
+
+ * Initial release.
+
+ -- Roland Dreier <rolandd@cisco.com> Fri, 6 Apr 2007 10:04:57 -0700
diff --git a/debian/compat b/debian/compat
new file mode 100644
index 0000000..7ed6ff8
--- /dev/null
+++ b/debian/compat
@@ -0,0 +1 @@
+5
diff --git a/debian/control b/debian/control
new file mode 100644
index 0000000..356a47a
--- /dev/null
+++ b/debian/control
@@ -0,0 +1,52 @@
+Source: libmlx4
+Priority: extra
+Maintainer: Roland Dreier <rolandd@cisco.com>
+Build-Depends: cdbs (>= 0.4.25-1), debhelper (>= 5), libibverbs-dev (>= 1.0), autotools-dev
+Standards-Version: 3.7.2
+Section: libs
+
+Package: libmlx4-1
+Section: libs
+Architecture: any
+Depends: ${shlibs:Depends}, ${misc:Depends}
+Description: A userspace driver for Mellanox ConnectX InfiniBand HCAs
+ libmlx4 is a device-specific driver for Mellanox ConnectX InfiniBand
+ host channel adapters (HCAs) for the libibverbs library. This allows
+ userspace processes to access Mellanox HCA hardware directly with
+ low latency and low overhead.
+ .
+ This package contains the loadable plug-in.
+ .
+ Homepage: http://www.openfabrics.org/
+
+Package: libmlx4-dev
+Section: libdevel
+Architecture: any
+Depends: ${misc:Depends}, libmlx4-1 (= ${Source-Version})
+Description: Development files for the libmlx4 driver
+ libmlx4 is a device-specific driver for Mellanox ConnectX InfiniBand
+ host channel adapters (HCAs) for the libibverbs library. This allows
+ userspace processes to access Mellanox HCA hardware directly with
+ low latency and low overhead.
+ .
+ This package contains static versions of libmlx4 that may be linked
+ directly to an application, which may be useful for debugging.
+ .
+ Homepage: http://www.openfabrics.org/
+
+Package: libmlx4-1-dbg
+Section: libdevel
+Priority: extra
+Architecture: any
+Depends: ${misc:Depends}, libmlx4-1 (= ${Source-Version})
+Description: Debugging symbols for the libmlx4 driver
+ libmlx4 is a device-specific driver for Mellanox ConnectX InfiniBand
+ host channel adapters (HCAs) for the libibverbs library. This allows
+ userspace processes to access Mellanox HCA hardware directly with
+ low latency and low overhead.
+ .
+ This package contains the debugging symbols associated with
+ libmlx4-1. They will automatically be used by gdb for debugging
+ libmlx4-related issues.
+ .
+ Homepage: http://www.openfabrics.org/
diff --git a/debian/copyright b/debian/copyright
new file mode 100644
index 0000000..91942cc
--- /dev/null
+++ b/debian/copyright
@@ -0,0 +1,43 @@
+Initial Debianization:
+This package was debianized by Roland Dreier <rolandd@cisco.com> on
+Fri, 6 Apr 2007 10:04:57 -0700
+
+Source:
+It was downloaded from the OpenFabrics web site at
+<https://openfabrics.org/downloads.html>
+
+Authors:
+ Roland Dreier <rolandd@cisco.com>
+
+Portions are copyrighted by:
+ * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
+
+libmlx4 is licensed under a choice of one of two licenses. You may
+choose to be licensed under the terms of the GNU General Public
+License (GPL) Version 2, available from the file
+/usr/share/common-licenses/GPL-2 on your Debian system, or the
+OpenIB.org BSD license below:
+
+ Redistribution and use in source and binary forms, with or
+ without modification, are permitted provided that the following
+ conditions are met:
+
+ - Redistributions of source code must retain the above
+ copyright notice, this list of conditions and the following
+ disclaimer.
+
+ - Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials
+ provided with the distribution.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/debian/libmlx4-1.install b/debian/libmlx4-1.install
new file mode 100644
index 0000000..4fe7596
--- /dev/null
+++ b/debian/libmlx4-1.install
@@ -0,0 +1 @@
+usr/lib/infiniband/mlx4.so
diff --git a/debian/libmlx4-dev.install b/debian/libmlx4-dev.install
new file mode 100644
index 0000000..77ea9e1
--- /dev/null
+++ b/debian/libmlx4-dev.install
@@ -0,0 +1 @@
+usr/lib/infiniband/mlx4.{a,la}
diff --git a/debian/rules b/debian/rules
new file mode 100755
index 0000000..15721aa
--- /dev/null
+++ b/debian/rules
@@ -0,0 +1,8 @@
+#!/usr/bin/make -f
+# -*- mode: makefile; coding: utf-8 -*-
+
+DEB_DH_INSTALL_SOURCEDIR := debian/tmp
+DEB_DH_STRIP_ARGS := --dbg-package=libmlx4-1-dbg
+
+include /usr/share/cdbs/1/rules/debhelper.mk
+include /usr/share/cdbs/1/class/autotools.mk
diff --git a/libmlx4.spec.in b/libmlx4.spec.in
new file mode 100644
index 0000000..52d6c52
--- /dev/null
+++ b/libmlx4.spec.in
@@ -0,0 +1,55 @@
+Name: libmlx4
+Version: 0.1
+Release: 1%{?dist}
+Summary: Mellanox InfiniBand HCA Userspace Driver
+
+Group: System Environment/Libraries
+License: GPL/BSD
+Url: http://openib.org/
+Source: http://openib.org/downloads/libmlx4-0.1.tar.gz
+BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
+
+BuildRequires: libibverbs-devel >= 1.1-0.1.rc2
+
+%description
+libmlx4 provides a device-specific userspace driver for Mellanox
+ConnectX HCAs for use with the libibverbs library.
+
+%package devel-static
+Summary: Development files for the libmlx4 driver
+Group: System Environment/Libraries
+Requires: %{name} = %{version}-%{release}
+
+%description devel-static
+Static version of libmlx4 that may be linked directly to an
+application, which may be useful for debugging.
+
+%prep
+%setup -q -n %{name}-@VERSION@
+
+%build
+%configure
+make %{?_smp_mflags}
+
+%install
+rm -rf $RPM_BUILD_ROOT
+make DESTDIR=%{buildroot} install
+# remove unpackaged files from the buildroot
+rm -f $RPM_BUILD_ROOT%{_libdir}/*.la $RPM_BUILD_ROOT%{_libdir}/libmlx4.so
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+%files
+%defattr(-,root,root,-)
+%{_libdir}/libmlx4-rdmav2.so
+%{_sysconfdir}/libibverbs.d/mlx4.driver
+%doc AUTHORS COPYING ChangeLog README
+
+%files devel-static
+%defattr(-,root,root,-)
+%{_libdir}/libmlx4.a
+
+%changelog
+* Fri Apr 6 2007 Roland Dreier <rdreier@cisco.com> - 0.1-1
+- Initial Fedora spec file
diff --git a/mlx4.driver b/mlx4.driver
new file mode 100644
index 0000000..4d29fa8
--- /dev/null
+++ b/mlx4.driver
@@ -0,0 +1 @@
+driver mlx4
diff --git a/src/.gitignore b/src/.gitignore
new file mode 100644
index 0000000..7297cbb
--- /dev/null
+++ b/src/.gitignore
@@ -0,0 +1,3 @@
+*.la
+.dirstamp
+.libs
diff --git a/src/ah.c b/src/ah.c
new file mode 100644
index 0000000..dd4bf88
--- /dev/null
+++ b/src/ah.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stdlib.h>
+#include <netinet/in.h>
+#include <pthread.h>
+#include <string.h>
+
+#include "mlx4.h"
+
+int mlx4_alloc_av(struct mlx4_pd *pd, struct ibv_ah_attr *attr,
+ struct mlx4_ah *ah)
+{
+ ah->av = malloc(sizeof *ah->av);
+ if (!ah->av)
+ return -1;
+
+ memset(ah->av, 0, sizeof *ah->av);
+
+
+ return 0;
+}
+
+void mlx4_free_av(struct mlx4_ah *ah)
+{
+ free(ah->av);
+}
diff --git a/src/buf.c b/src/buf.c
new file mode 100644
index 0000000..0e5f9b6
--- /dev/null
+++ b/src/buf.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2006, 2007 Cisco, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stdlib.h>
+
+#include "mlx4.h"
+
+#if !(defined(HAVE_IBV_DONTFORK_RANGE) && defined(HAVE_IBV_DOFORK_RANGE))
+
+/*
+ * If libibverbs isn't exporting these functions, then there's no
+ * point in doing it here, because the rest of libibverbs isn't going
+ * to be fork-safe anyway.
+ */
+static int ibv_dontfork_range(void *base, size_t size)
+{
+ return 0;
+}
+
+static int ibv_dofork_range(void *base, size_t size)
+{
+ return 0;
+}
+
+#endif /* HAVE_IBV_DONTFORK_RANGE && HAVE_IBV_DOFORK_RANGE */
+
+int mlx4_alloc_buf(struct mlx4_buf *buf, size_t size, int page_size)
+{
+ int ret;
+
+ ret = posix_memalign(&buf->buf, page_size, align(size, page_size));
+ if (ret)
+ return ret;
+
+ ret = ibv_dontfork_range(buf->buf, size);
+ if (ret)
+ free(buf->buf);
+
+ if (!ret)
+ buf->length = size;
+
+ return ret;
+}
+
+void mlx4_free_buf(struct mlx4_buf *buf)
+{
+ ibv_dofork_range(buf->buf, buf->length);
+ free(buf->buf);
+}
diff --git a/src/cq.c b/src/cq.c
new file mode 100644
index 0000000..aae5c37
--- /dev/null
+++ b/src/cq.c
@@ -0,0 +1,342 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <netinet/in.h>
+#include <string.h>
+
+#include <infiniband/opcode.h>
+
+#include "mlx4.h"
+#include "doorbell.h"
+
+enum {
+ MLX4_CQ_DOORBELL = 0x20
+};
+
+enum {
+ CQ_OK = 0,
+ CQ_EMPTY = -1,
+ CQ_POLL_ERR = -2
+};
+
+#define MLX4_CQ_DB_REQ_NOT_SOL (1 << 24)
+#define MLX4_CQ_DB_REQ_NOT (2 << 24)
+
+enum {
+ MLX4_CQE_OWNER_MASK = 0x80,
+ MLX4_CQE_IS_SEND_MASK = 0x40,
+ MLX4_CQE_OPCODE_MASK = 0x1f
+};
+
+enum {
+ SYNDROME_LOCAL_LENGTH_ERR = 0x01,
+ SYNDROME_LOCAL_QP_OP_ERR = 0x02,
+ SYNDROME_LOCAL_EEC_OP_ERR = 0x03,
+ SYNDROME_LOCAL_PROT_ERR = 0x04,
+ SYNDROME_WR_FLUSH_ERR = 0x05,
+ SYNDROME_MW_BIND_ERR = 0x06,
+ SYNDROME_BAD_RESP_ERR = 0x10,
+ SYNDROME_LOCAL_ACCESS_ERR = 0x11,
+ SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12,
+ SYNDROME_REMOTE_ACCESS_ERR = 0x13,
+ SYNDROME_REMOTE_OP_ERR = 0x14,
+ SYNDROME_RETRY_EXC_ERR = 0x15,
+ SYNDROME_RNR_RETRY_EXC_ERR = 0x16,
+ SYNDROME_LOCAL_RDD_VIOL_ERR = 0x20,
+ SYNDROME_REMOTE_INVAL_RD_REQ_ERR = 0x21,
+ SYNDROME_REMOTE_ABORTED_ERR = 0x22,
+ SYNDROME_INVAL_EECN_ERR = 0x23,
+ SYNDROME_INVAL_EEC_STATE_ERR = 0x24
+};
+
+struct mlx4_cqe {
+ uint32_t my_qpn;
+ uint32_t immed_rss_invalid;
+ uint32_t g_mlpath_rqpn;
+ uint8_t sl;
+ uint8_t reserved1;
+ uint16_t rlid;
+ uint32_t reserved2;
+ uint32_t byte_cnt;
+ uint16_t wqe_index;
+ uint16_t checksum;
+ uint8_t reserved3[3];
+ uint8_t owner_sr_opcode;
+};
+
+struct mlx4_err_cqe {
+ uint32_t my_qpn;
+ uint32_t reserved1[5];
+ uint16_t wqe_index;
+ uint8_t vendor_err;
+ uint8_t syndrome;
+ uint8_t reserved2[3];
+ uint8_t owner_sr_opcode;
+};
+
+static inline struct mlx4_cqe *get_cqe(struct mlx4_cq *cq, int entry)
+{
+ return cq->buf.buf + entry * MLX4_CQ_ENTRY_SIZE;
+}
+
+static inline struct mlx4_cqe *next_cqe_sw(struct mlx4_cq *cq)
+{
+ struct mlx4_cqe *cqe = get_cqe(cq, cq->cons_index & cq->ibv_cq.cqe);
+
+ return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
+ !!(cq->cons_index & (cq->ibv_cq.cqe + 1))) ? NULL : cqe;
+}
+
+static void update_cons_index(struct mlx4_cq *cq)
+{
+ *cq->set_ci_db = htonl(cq->cons_index & 0xffffff);
+}
+
+static int handle_error_cqe(struct mlx4_cq *cq, struct mlx4_qp *qp,
+ int wqe_index, int is_send,
+ struct mlx4_err_cqe *cqe,
+ struct ibv_wc *wc)
+{
+ /* XXX handle error CQE */
+ return 0;
+}
+
+static int mlx4_poll_one(struct mlx4_cq *cq,
+ struct mlx4_qp **cur_qp,
+ struct ibv_wc *wc)
+{
+ struct mlx4_wq *wq;
+ struct mlx4_cqe *cqe;
+ struct mlx4_srq *srq;
+ uint32_t qpn;
+ uint16_t wqe_index;
+ int is_error;
+ int is_send;
+ int err = 0;
+
+ cqe = next_cqe_sw(cq);
+ if (!cqe)
+ return CQ_EMPTY;
+
+ ++cq->cons_index;
+
+ VALGRIND_MAKE_MEM_DEFINED(cqe, sizeof *cqe);
+
+ /*
+ * Make sure we read CQ entry contents after we've checked the
+ * ownership bit.
+ */
+ rmb();
+
+ qpn = ntohl(cqe->my_qpn);
+
+ is_send = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK;
+ is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
+ MLX4_CQE_OPCODE_ERROR;
+
+ if (!*cur_qp ||
+ (ntohl(cqe->my_qpn) & 0xffffff) != (*cur_qp)->ibv_qp.qp_num) {
+ /*
+ * We do not have to take the QP table lock here,
+ * because CQs will be locked while QPs are removed
+ * from the table.
+ */
+ *cur_qp = mlx4_find_qp(to_mctx(cq->ibv_cq.context),
+ ntohl(cqe->my_qpn) & 0xffffff);
+ if (!*cur_qp)
+ return CQ_POLL_ERR;
+ }
+
+ wc->qp_num = (*cur_qp)->ibv_qp.qp_num;
+
+ if (is_send) {
+ wq = &(*cur_qp)->sq;
+ wqe_index = ntohs(cqe->wqe_index);
+ wq->tail += wqe_index - (uint16_t) wq->tail;
+ wc->wr_id = wq->wrid[wq->tail & (wq->max - 1)];
+ ++wq->tail;
+ } else if ((*cur_qp)->ibv_qp.srq) {
+ srq = to_msrq((*cur_qp)->ibv_qp.srq);
+ wqe_index = htons(cqe->wqe_index);
+ wc->wr_id = srq->wrid[wqe_index];
+ mlx4_free_srq_wqe(srq, wqe_index);
+ } else {
+ wq = &(*cur_qp)->rq;
+ wc->wr_id = wq->wrid[wq->tail & (wq->max - 1)];
+ ++wq->tail;
+ }
+
+ if (is_error) {
+ err = handle_error_cqe(cq, *cur_qp, wqe_index, is_send,
+ (struct mlx4_err_cqe *) cqe, wc);
+ return err;
+ }
+
+ wc->status = IBV_WC_SUCCESS;
+
+ if (is_send) {
+ wc->wc_flags = 0;
+ switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
+ case MLX4_OPCODE_RDMA_WRITE_IMM:
+ wc->wc_flags |= IBV_WC_WITH_IMM;
+ case MLX4_OPCODE_RDMA_WRITE:
+ wc->opcode = IBV_WC_RDMA_WRITE;
+ break;
+ case MLX4_OPCODE_SEND_IMM:
+ wc->wc_flags |= IBV_WC_WITH_IMM;
+ case MLX4_OPCODE_SEND:
+ wc->opcode = IBV_WC_SEND;
+ break;
+ case MLX4_OPCODE_RDMA_READ:
+ wc->opcode = IBV_WC_RDMA_READ;
+ wc->byte_len = ntohl(cqe->byte_cnt);
+ break;
+ case MLX4_OPCODE_ATOMIC_CS:
+ wc->opcode = IBV_WC_COMP_SWAP;
+ /* XXX byte_len? */
+ break;
+ case MLX4_OPCODE_ATOMIC_FA:
+ wc->opcode = IBV_WC_FETCH_ADD;
+ /* XXX byte_len? */
+ break;
+ case MLX4_OPCODE_BIND_MW:
+ wc->opcode = IBV_WC_BIND_MW;
+ break;
+ default:
+ /* assume it's a send completion */
+ wc->opcode = IBV_WC_SEND;
+ break;
+ }
+ } else {
+ wc->byte_len = ntohl(cqe->byte_cnt);
+
+ switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
+ case MLX4_RECV_OPCODE_RDMA_WRITE_IMM:
+ wc->opcode = IBV_WC_RECV_RDMA_WITH_IMM;
+ wc->wc_flags = IBV_WC_WITH_IMM;
+ wc->imm_data = cqe->immed_rss_invalid;
+ break;
+ case MLX4_RECV_OPCODE_SEND:
+ wc->opcode = IBV_WC_RECV;
+ wc->wc_flags = 0;
+ break;
+ case MLX4_RECV_OPCODE_SEND_IMM:
+ wc->opcode = IBV_WC_RECV;
+ wc->wc_flags = IBV_WC_WITH_IMM;
+ wc->imm_data = cqe->immed_rss_invalid;
+ break;
+ }
+
+ wc->slid = ntohs(cqe->rlid);
+ wc->sl = cqe->sl >> 4;
+ wc->src_qp = ntohl(cqe->g_mlpath_rqpn) & 0xffffff;
+ wc->dlid_path_bits = (ntohl(cqe->g_mlpath_rqpn) >> 24) & 0x7f;
+ wc->pkey_index = ntohl(cqe->immed_rss_invalid) >> 16;
+ wc->wc_flags |= ntohs(cqe->g_mlpath_rqpn) & 0x80000000 ?
+ IBV_WC_GRH : 0;
+ }
+
+ return 0;
+}
+
+int mlx4_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
+{
+ struct mlx4_cq *cq = to_mcq(ibcq);
+ struct mlx4_qp *qp = NULL;
+ int npolled;
+ int err = CQ_OK;
+
+ pthread_spin_lock(&cq->lock);
+
+ for (npolled = 0; npolled < ne; ++npolled) {
+ err = mlx4_poll_one(cq, &qp, wc + npolled);
+ if (err != CQ_OK)
+ break;
+ }
+
+ if (npolled)
+ update_cons_index(cq);
+
+ pthread_spin_unlock(&cq->lock);
+
+ return err == CQ_POLL_ERR ? err : npolled;
+}
+
+int mlx4_arm_cq(struct ibv_cq *ibvcq, int solicited)
+{
+ struct mlx4_cq *cq = to_mcq(ibvcq);
+ uint32_t doorbell[2];
+ uint32_t sn;
+ uint32_t ci;
+ uint32_t cmd;
+
+ sn = cq->arm_sn & 3;
+ ci = cq->cons_index & 0xffffff;
+ cmd = solicited ? MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT;
+
+ *cq->arm_db = htonl(sn << 28 | cmd | ci);
+
+ /*
+ * Make sure that the doorbell record in host memory is
+ * written before ringing the doorbell via PCI MMIO.
+ */
+ wmb();
+
+ doorbell[0] = htonl(sn << 28 | cmd | cq->cqn);
+ doorbell[1] = htonl(ci);
+
+ mlx4_write64(doorbell, to_mctx(ibvcq->context), MLX4_CQ_DOORBELL);
+
+ return 0;
+}
+
+void mlx4_cq_event(struct ibv_cq *cq)
+{
+ to_mcq(cq)->arm_sn++;
+}
+
+void mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq)
+{
+}
+
+void mlx4_cq_resize_copy_cqes(struct mlx4_cq *cq, void *buf, int old_cqe)
+{
+}
diff --git a/src/dbrec.c b/src/dbrec.c
new file mode 100644
index 0000000..9cff0d8
--- /dev/null
+++ b/src/dbrec.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stdlib.h>
+#include <netinet/in.h>
+#include <pthread.h>
+#include <string.h>
+
+#include "mlx4.h"
+
+struct mlx4_db_page {
+ struct mlx4_db_page *prev, *next;
+ struct mlx4_buf buf;
+ int num_db;
+ int use_cnt;
+ unsigned free[0];
+};
+
+static const int db_size[] = {
+ [MLX4_DB_TYPE_CQ] = 8,
+ [MLX4_DB_TYPE_RQ] = 4,
+};
+
+static struct mlx4_db_page *__add_page(struct mlx4_context *context,
+ enum mlx4_db_type type)
+{
+ struct mlx4_db_page *page;
+ int pp;
+ int i;
+
+ pp = to_mdev(context->ibv_ctx.device)->page_size / db_size[type];
+
+ page = malloc(sizeof *page + pp / 8);
+ if (!page)
+ return NULL;
+
+ if (mlx4_alloc_buf(&page->buf, to_mdev(context->ibv_ctx.device)->page_size,
+ to_mdev(context->ibv_ctx.device)->page_size)) {
+ free(page);
+ return NULL;
+ }
+
+ page->num_db = pp;
+ page->use_cnt = 0;
+ for (i = 0; i < pp / (sizeof (int) * 8); ++i)
+ page->free[i] = ~0;
+
+ page->prev = NULL;
+ page->next = context->db_list[type];
+ context->db_list[type] = page;
+ if (page->next)
+ page->next->prev = page;
+
+ return page;
+}
+
+uint32_t *mlx4_alloc_db(struct mlx4_context *context, enum mlx4_db_type type)
+{
+ struct mlx4_db_page *page;
+ uint32_t *db = NULL;
+ int i, j;
+
+ pthread_mutex_lock(&context->db_list_mutex);
+
+ for (page = context->db_list[type]; page; page = page->next)
+ if (page->use_cnt < page->num_db)
+ goto found;
+
+ page = __add_page(context, type);
+ if (!page)
+ goto out;
+
+found:
+ ++page->use_cnt;
+
+ for (i = 0; !page->free[i]; ++i)
+ /* nothing */;
+
+ j = ffs(page->free[i]);
+ page->free[i] &= ~(1 << (j - 1));
+ db = page->buf.buf + (i * 8 * sizeof (int) + (j - 1)) * db_size[type];
+
+out:
+ pthread_mutex_unlock(&context->db_list_mutex);
+
+ return db;
+}
+
+void mlx4_free_db(struct mlx4_context *context, uint32_t *db)
+{
+ /*XXX nothing for now*/
+}
diff --git a/src/doorbell.h b/src/doorbell.h
new file mode 100644
index 0000000..3171e76
--- /dev/null
+++ b/src/doorbell.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2007 Cisco, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef DOORBELL_H
+#define DOORBELL_H
+
+#if SIZEOF_LONG == 8
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+# define MLX4_PAIR_TO_64(val) ((uint64_t) val[1] << 32 | val[0])
+#elif __BYTE_ORDER == __BIG_ENDIAN
+# define MLX4_PAIR_TO_64(val) ((uint64_t) val[0] << 32 | val[1])
+#else
+# error __BYTE_ORDER not defined
+#endif
+
+static inline void mlx4_write64(uint32_t val[2], struct mlx4_context *ctx, int offset)
+{
+ *(volatile uint64_t *) (ctx->uar + offset) = MLX4_PAIR_TO_64(val);
+}
+
+#else
+
+static inline void mlx4_write64(uint32_t val[2], struct mlx4_context *ctx, int offset)
+{
+ pthread_spin_lock(&ctx->uar_lock);
+ *(volatile uint32_t *) (ctx->uar + offset) = val[0];
+ *(volatile uint32_t *) (ctx->uar + offset + 4) = val[1];
+ pthread_spin_unlock(&ctx->uar_lock);
+}
+
+#endif
+
+#endif /* DOORBELL_H */
diff --git a/src/mlx4-abi.h b/src/mlx4-abi.h
new file mode 100644
index 0000000..2a392cb
--- /dev/null
+++ b/src/mlx4-abi.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2007 Cisco, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX4_ABI_H
+#define MLX4_ABI_H
+
+#include <infiniband/kern-abi.h>
+
+#define MLX4_UVERBS_ABI_VERSION 1
+
+struct mlx4_alloc_ucontext_resp {
+ struct ibv_get_context_resp ibv_resp;
+ __u32 qp_tab_size;
+ __u32 bf_reg_size;
+};
+
+struct mlx4_alloc_pd_resp {
+ struct ibv_alloc_pd_resp ibv_resp;
+ __u32 pdn;
+ __u32 reserved;
+};
+
+struct mlx4_create_cq {
+ struct ibv_create_cq ibv_cmd;
+ __u64 buf_addr;
+ __u64 db_addr;
+};
+
+struct mlx4_create_cq_resp {
+ struct ibv_create_cq_resp ibv_resp;
+ __u32 cqn;
+ __u32 reserved;
+};
+
+struct mlx4_resize_cq {
+ struct ibv_resize_cq ibv_cmd;
+ __u64 buf_addr;
+};
+
+struct mlx4_create_srq {
+ struct ibv_create_srq ibv_cmd;
+ __u64 buf_addr;
+ __u64 db_addr;
+};
+
+struct mlx4_create_srq_resp {
+ struct ibv_create_srq_resp ibv_resp;
+ __u32 srqn;
+ __u32 reserved;
+};
+
+struct mlx4_create_qp {
+ struct ibv_create_qp ibv_cmd;
+ __u64 buf_addr;
+ __u64 db_addr;
+};
+
+#endif /* MLX4_ABI_H */
diff --git a/src/mlx4.c b/src/mlx4.c
new file mode 100644
index 0000000..95f30d0
--- /dev/null
+++ b/src/mlx4.c
@@ -0,0 +1,262 @@
+/*
+ * Copyright (c) 2007 Cisco, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <pthread.h>
+#include <string.h>
+
+#ifndef HAVE_IBV_REGISTER_DRIVER
+#include <sysfs/libsysfs.h>
+#endif
+
+#ifndef HAVE_IBV_READ_SYSFS_FILE
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#endif
+
+#include "mlx4.h"
+#include "mlx4-abi.h"
+
+#ifndef PCI_VENDOR_ID_MELLANOX
+#define PCI_VENDOR_ID_MELLANOX 0x15b3
+#endif
+
+#ifndef PCI_DEVICE_ID_MELLANOX_HERMON
+#define PCI_DEVICE_ID_MELLANOX_HERMON 0x6340
+#endif
+
+#define HCA(v, d) \
+ { .vendor = PCI_VENDOR_ID_##v, \
+ .device = PCI_DEVICE_ID_MELLANOX_##d }
+
+struct {
+ unsigned vendor;
+ unsigned device;
+} hca_table[] = {
+ HCA(MELLANOX, HERMON),
+};
+
+static struct ibv_context_ops mlx4_ctx_ops = {
+ .query_device = mlx4_query_device,
+ .query_port = mlx4_query_port,
+ .alloc_pd = mlx4_alloc_pd,
+ .dealloc_pd = mlx4_free_pd,
+ .reg_mr = mlx4_reg_mr,
+ .dereg_mr = mlx4_dereg_mr,
+ .create_cq = mlx4_create_cq,
+ .poll_cq = mlx4_poll_cq,
+ .req_notify_cq = mlx4_arm_cq,
+ .cq_event = mlx4_cq_event,
+ .resize_cq = mlx4_resize_cq,
+ .destroy_cq = mlx4_destroy_cq,
+ .create_srq = mlx4_create_srq,
+ .modify_srq = mlx4_modify_srq,
+ .query_srq = mlx4_query_srq,
+ .destroy_srq = mlx4_destroy_srq,
+ .post_srq_recv = mlx4_post_srq_recv,
+ .create_qp = mlx4_create_qp,
+ .query_qp = mlx4_query_qp,
+ .modify_qp = mlx4_modify_qp,
+ .destroy_qp = mlx4_destroy_qp,
+ .post_send = mlx4_post_send,
+ .post_recv = mlx4_post_recv,
+ .create_ah = mlx4_create_ah,
+ .destroy_ah = mlx4_destroy_ah,
+ .attach_mcast = mlx4_attach_mcast,
+ .detach_mcast = mlx4_detach_mcast
+};
+
+static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_fd)
+{
+ struct mlx4_context *context;
+ struct ibv_get_context cmd;
+ struct mlx4_alloc_ucontext_resp resp;
+ int i;
+
+ context = malloc(sizeof *context);
+ if (!context)
+ return NULL;
+
+ context->ibv_ctx.cmd_fd = cmd_fd;
+
+ if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd,
+ &resp.ibv_resp, sizeof resp))
+ goto err_free;
+
+ context->num_qps = resp.qp_tab_size;
+ context->qp_table_shift = ffs(context->num_qps) - 1 - MLX4_QP_TABLE_BITS;
+ context->qp_table_mask = (1 << context->qp_table_shift) - 1;
+
+ pthread_mutex_init(&context->qp_table_mutex, NULL);
+ for (i = 0; i < MLX4_QP_TABLE_SIZE; ++i)
+ context->qp_table[i].refcnt = 0;
+
+ for (i = 0; i < MLX4_NUM_DB_TYPE; ++i)
+ context->db_list[i] = NULL;
+
+ pthread_mutex_init(&context->db_list_mutex, NULL);
+
+ context->uar = mmap(NULL, to_mdev(ibdev)->page_size, PROT_WRITE,
+ MAP_SHARED, cmd_fd, 0);
+ if (context->uar == MAP_FAILED)
+ goto err_free;
+
+ pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
+
+ context->ibv_ctx.ops = mlx4_ctx_ops;
+
+ return &context->ibv_ctx;
+
+err_free:
+ free(context);
+ return NULL;
+}
+
+static void mlx4_free_context(struct ibv_context *ibctx)
+{
+ struct mlx4_context *context = to_mctx(ibctx);
+
+ munmap(context->uar, to_mdev(ibctx->device)->page_size);
+ free(context);
+}
+
+static struct ibv_device_ops mlx4_dev_ops = {
+ .alloc_context = mlx4_alloc_context,
+ .free_context = mlx4_free_context
+};
+
+/*
+ * Keep a private implementation of HAVE_IBV_READ_SYSFS_FILE to handle
+ * old versions of libibverbs that didn't implement it. This can be
+ * removed when libibverbs 1.0.3 or newer is available "everywhere."
+ */
+#ifndef HAVE_IBV_READ_SYSFS_FILE
+static int ibv_read_sysfs_file(const char *dir, const char *file,
+ char *buf, size_t size)
+{
+ char path[256];
+ int fd;
+ int len;
+
+ snprintf(path, sizeof path, "%s/%s", dir, file);
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ len = read(fd, buf, size);
+
+ close(fd);
+
+ if (len > 0 && buf[len - 1] == '\n')
+ buf[--len] = '\0';
+
+ return len;
+}
+#endif /* HAVE_IBV_READ_SYSFS_FILE */
+
+static struct ibv_device *mlx4_driver_init(const char *uverbs_sys_path,
+ int abi_version)
+{
+ char value[8];
+ struct mlx4_device *dev;
+ unsigned vendor, device;
+ int i;
+
+ if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor",
+ value, sizeof value) < 0)
+ return NULL;
+ sscanf(value, "%i", &vendor);
+
+ if (ibv_read_sysfs_file(uverbs_sys_path, "device/device",
+ value, sizeof value) < 0)
+ return NULL;
+ sscanf(value, "%i", &device);
+
+ for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i)
+ if (vendor == hca_table[i].vendor &&
+ device == hca_table[i].device)
+ goto found;
+
+ return NULL;
+
+found:
+ if (abi_version > MLX4_UVERBS_ABI_VERSION) {
+ fprintf(stderr, PFX "Fatal: ABI version %d of %s is too new (expected %d)\n",
+ abi_version, uverbs_sys_path, MLX4_UVERBS_ABI_VERSION);
+ return NULL;
+ }
+
+ dev = malloc(sizeof *dev);
+ if (!dev) {
+ fprintf(stderr, PFX "Fatal: couldn't allocate device for %s\n",
+ uverbs_sys_path);
+ return NULL;
+ }
+
+ dev->ibv_dev.ops = mlx4_dev_ops;
+ dev->page_size = sysconf(_SC_PAGESIZE);
+
+ return &dev->ibv_dev;
+}
+
+#ifdef HAVE_IBV_REGISTER_DRIVER
+static __attribute__((constructor)) void mlx4_register_driver(void)
+{
+ ibv_register_driver("mlx4", mlx4_driver_init);
+}
+#else
+/*
+ * Export the old libsysfs sysfs_class_device-based driver entry point
+ * if libibverbs does not export an ibv_register_driver() function.
+ */
+struct ibv_device *openib_driver_init(struct sysfs_class_device *sysdev)
+{
+ int abi_ver = 0;
+ char value[8];
+
+ if (ibv_read_sysfs_file(sysdev->path, "abi_version",
+ value, sizeof value) > 0)
+ abi_ver = strtol(value, NULL, 10);
+
+ return mlx4_driver_init(sysdev->path, abi_ver);
+}
+#endif /* HAVE_IBV_REGISTER_DRIVER */
diff --git a/src/mlx4.h b/src/mlx4.h
new file mode 100644
index 0000000..8b4dc20
--- /dev/null
+++ b/src/mlx4.h
@@ -0,0 +1,338 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX4_H
+#define MLX4_H
+
+#include <stddef.h>
+
+#include <infiniband/driver.h>
+#include <infiniband/arch.h>
+
+#ifdef HAVE_VALGRIND_MEMCHECK_H
+
+# include <valgrind/memcheck.h>
+
+# if !defined(VALGRIND_MAKE_MEM_DEFINED) || !defined(VALGRIND_MAKE_MEM_UNDEFINED)
+# warning "Valgrind support requested, but VALGRIND_MAKE_MEM_(UN)DEFINED not available"
+# endif
+
+#endif /* HAVE_VALGRIND_MEMCHECK_H */
+
+#ifndef VALGRIND_MAKE_MEM_DEFINED
+# define VALGRIND_MAKE_MEM_DEFINED(addr,len)
+#endif
+
+#ifndef VALGRIND_MAKE_MEM_UNDEFINED
+# define VALGRIND_MAKE_MEM_UNDEFINED(addr,len)
+#endif
+
+#ifndef rmb
+# define rmb() mb()
+#endif
+
+#ifndef wmb
+# define wmb() mb()
+#endif
+
+#define HIDDEN __attribute__((visibility ("hidden")))
+
+#define PFX "mlx4: "
+
+enum {
+ MLX4_CQ_ENTRY_SIZE = 0x20
+};
+
+enum {
+ MLX4_STAT_RATE_OFFSET = 5
+};
+
+enum {
+ MLX4_QP_TABLE_BITS = 8,
+ MLX4_QP_TABLE_SIZE = 1 << MLX4_QP_TABLE_BITS,
+ MLX4_QP_TABLE_MASK = MLX4_QP_TABLE_SIZE - 1
+};
+
+enum mlx4_db_type {
+ MLX4_DB_TYPE_CQ,
+ MLX4_DB_TYPE_RQ,
+ MLX4_NUM_DB_TYPE
+};
+
+enum {
+ MLX4_OPCODE_NOP = 0x00,
+ MLX4_OPCODE_SEND_INVAL = 0x01,
+ MLX4_OPCODE_RDMA_WRITE = 0x08,
+ MLX4_OPCODE_RDMA_WRITE_IMM = 0x09,
+ MLX4_OPCODE_SEND = 0x0a,
+ MLX4_OPCODE_SEND_IMM = 0x0b,
+ MLX4_OPCODE_LSO = 0x0e,
+ MLX4_OPCODE_RDMA_READ = 0x10,
+ MLX4_OPCODE_ATOMIC_CS = 0x11,
+ MLX4_OPCODE_ATOMIC_FA = 0x12,
+ MLX4_OPCODE_ATOMIC_MASK_CS = 0x14,
+ MLX4_OPCODE_ATOMIC_MASK_FA = 0x15,
+ MLX4_OPCODE_BIND_MW = 0x18,
+ MLX4_OPCODE_FMR = 0x19,
+ MLX4_OPCODE_LOCAL_INVAL = 0x1b,
+ MLX4_OPCODE_CONFIG_CMD = 0x1f,
+
+ MLX4_RECV_OPCODE_RDMA_WRITE_IMM = 0x00,
+ MLX4_RECV_OPCODE_SEND = 0x01,
+ MLX4_RECV_OPCODE_SEND_IMM = 0x02,
+ MLX4_RECV_OPCODE_SEND_INVAL = 0x03,
+
+ MLX4_CQE_OPCODE_ERROR = 0x1e,
+ MLX4_CQE_OPCODE_RESIZE = 0x16,
+};
+
+struct mlx4_device {
+ struct ibv_device ibv_dev;
+ int page_size;
+};
+
+struct mlx4_db_page;
+
+struct mlx4_context {
+ struct ibv_context ibv_ctx;
+
+ void *uar;
+ pthread_spinlock_t uar_lock;
+
+ struct {
+ struct mlx4_qp **table;
+ int refcnt;
+ } qp_table[MLX4_QP_TABLE_SIZE];
+ pthread_mutex_t qp_table_mutex;
+ int num_qps;
+ int qp_table_shift;
+ int qp_table_mask;
+
+ struct mlx4_db_page *db_list[MLX4_NUM_DB_TYPE];
+ pthread_mutex_t db_list_mutex;
+};
+
+struct mlx4_buf {
+ void *buf;
+ size_t length;
+};
+
+struct mlx4_pd {
+ struct ibv_pd ibv_pd;
+ uint32_t pdn;
+};
+
+struct mlx4_cq {
+ struct ibv_cq ibv_cq;
+ struct mlx4_buf buf;
+ pthread_spinlock_t lock;
+ uint32_t cqn;
+ uint32_t cons_index;
+ uint32_t *set_ci_db;
+ uint32_t *arm_db;
+ int arm_sn;
+};
+
+struct mlx4_srq {
+ struct ibv_srq ibv_srq;
+ struct mlx4_buf buf;
+ pthread_spinlock_t lock;
+ uint64_t *wrid;
+ uint32_t srqn;
+ int max;
+ int max_gs;
+ int wqe_shift;
+ int head;
+ int tail;
+ uint32_t *db;
+ uint16_t counter;
+};
+
+struct mlx4_wq {
+ uint64_t *wrid;
+ pthread_spinlock_t lock;
+ int max;
+ unsigned head;
+ unsigned tail;
+ int max_gs;
+ int wqe_shift;
+ int offset;
+};
+
+struct mlx4_qp {
+ struct ibv_qp ibv_qp;
+ struct mlx4_buf buf;
+ int max_inline_data;
+ int buf_size;
+
+ uint32_t doorbell_qpn;
+ uint32_t sq_signal_bits;
+ struct mlx4_wq sq;
+
+ uint32_t *db;
+ struct mlx4_wq rq;
+};
+
+struct mlx4_av {
+ uint32_t port_pd;
+ uint8_t reserved1;
+ uint8_t g_slid;
+ uint16_t dlid;
+ uint8_t reserved2;
+ uint8_t gid_index;
+ uint8_t stat_rate;
+ uint8_t hop_limit;
+ uint32_t sl_tclass_flowlabel;
+ uint8_t dgid[16];
+};
+
+struct mlx4_ah {
+ struct ibv_ah ibv_ah;
+ struct mlx4_av av;
+};
+
+static inline unsigned long align(unsigned long val, unsigned long align)
+{
+ return (val + align - 1) & ~(align - 1);
+}
+
+#define to_mxxx(xxx, type) \
+ ((struct mlx4_##type *) \
+ ((void *) ib##xxx - offsetof(struct mlx4_##type, ibv_##xxx)))
+
+static inline struct mlx4_device *to_mdev(struct ibv_device *ibdev)
+{
+ return to_mxxx(dev, device);
+}
+
+static inline struct mlx4_context *to_mctx(struct ibv_context *ibctx)
+{
+ return to_mxxx(ctx, context);
+}
+
+static inline struct mlx4_pd *to_mpd(struct ibv_pd *ibpd)
+{
+ return to_mxxx(pd, pd);
+}
+
+static inline struct mlx4_cq *to_mcq(struct ibv_cq *ibcq)
+{
+ return to_mxxx(cq, cq);
+}
+
+static inline struct mlx4_srq *to_msrq(struct ibv_srq *ibsrq)
+{
+ return to_mxxx(srq, srq);
+}
+
+static inline struct mlx4_qp *to_mqp(struct ibv_qp *ibqp)
+{
+ return to_mxxx(qp, qp);
+}
+
+static inline struct mlx4_ah *to_mah(struct ibv_ah *ibah)
+{
+ return to_mxxx(ah, ah);
+}
+
+int mlx4_alloc_buf(struct mlx4_buf *buf, size_t size, int page_size);
+void mlx4_free_buf(struct mlx4_buf *buf);
+
+uint32_t *mlx4_alloc_db(struct mlx4_context *context, enum mlx4_db_type type);
+void mlx4_free_db(struct mlx4_context *context, uint32_t *db);
+
+int mlx4_query_device(struct ibv_context *context,
+ struct ibv_device_attr *attr);
+int mlx4_query_port(struct ibv_context *context, uint8_t port,
+ struct ibv_port_attr *attr);
+
+struct ibv_pd *mlx4_alloc_pd(struct ibv_context *context);
+int mlx4_free_pd(struct ibv_pd *pd);
+
+struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr,
+ size_t length, enum ibv_access_flags access);
+int mlx4_dereg_mr(struct ibv_mr *mr);
+
+struct ibv_cq *mlx4_create_cq(struct ibv_context *context, int cqe,
+ struct ibv_comp_channel *channel,
+ int comp_vector);
+int mlx4_resize_cq(struct ibv_cq *cq, int cqe);
+int mlx4_destroy_cq(struct ibv_cq *cq);
+int mlx4_poll_cq(struct ibv_cq *cq, int ne, struct ibv_wc *wc);
+int mlx4_arm_cq(struct ibv_cq *cq, int solicited);
+void mlx4_cq_event(struct ibv_cq *cq);
+void mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn,
+ struct mlx4_srq *srq);
+void mlx4_cq_resize_copy_cqes(struct mlx4_cq *cq, void *buf, int new_cqe);
+
+struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
+ struct ibv_srq_init_attr *attr);
+int mlx4_modify_srq(struct ibv_srq *srq,
+ struct ibv_srq_attr *attr,
+ enum ibv_srq_attr_mask mask);
+int mlx4_query_srq(struct ibv_srq *srq,
+ struct ibv_srq_attr *attr);
+int mlx4_destroy_srq(struct ibv_srq *srq);
+int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr,
+ struct mlx4_srq *srq);
+void mlx4_free_srq_wqe(struct mlx4_srq *srq, int ind);
+int mlx4_post_srq_recv(struct ibv_srq *ibsrq,
+ struct ibv_recv_wr *wr,
+ struct ibv_recv_wr **bad_wr);
+
+struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr);
+int mlx4_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+ enum ibv_qp_attr_mask attr_mask,
+ struct ibv_qp_init_attr *init_attr);
+int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+ enum ibv_qp_attr_mask attr_mask);
+int mlx4_destroy_qp(struct ibv_qp *qp);
+void mlx4_init_qp_indices(struct mlx4_qp *qp);
+int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
+ struct ibv_send_wr **bad_wr);
+int mlx4_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
+ struct ibv_recv_wr **bad_wr);
+int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
+ enum ibv_qp_type type, struct mlx4_qp *qp);
+struct mlx4_qp *mlx4_find_qp(struct mlx4_context *ctx, uint32_t qpn);
+int mlx4_store_qp(struct mlx4_context *ctx, uint32_t qpn, struct mlx4_qp *qp);
+void mlx4_clear_qp(struct mlx4_context *ctx, uint32_t qpn);
+struct ibv_ah *mlx4_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr);
+int mlx4_destroy_ah(struct ibv_ah *ah);
+int mlx4_alloc_av(struct mlx4_pd *pd, struct ibv_ah_attr *attr,
+ struct mlx4_ah *ah);
+void mlx4_free_av(struct mlx4_ah *ah);
+int mlx4_attach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid);
+int mlx4_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid);
+
+#endif /* MLX4_H */
diff --git a/src/mlx4.map b/src/mlx4.map
new file mode 100644
index 0000000..59a8bae
--- /dev/null
+++ b/src/mlx4.map
@@ -0,0 +1,6 @@
+{
+ global:
+ ibv_driver_init;
+ openib_driver_init;
+ local: *;
+};
diff --git a/src/qp.c b/src/qp.c
new file mode 100644
index 0000000..36a18f0
--- /dev/null
+++ b/src/qp.c
@@ -0,0 +1,442 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2007 Cisco, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stdlib.h>
+#include <netinet/in.h>
+#include <pthread.h>
+#include <string.h>
+
+#include "mlx4.h"
+#include "doorbell.h"
+#include "wqe.h"
+
+static const uint32_t mlx4_ib_opcode[] = {
+ [IBV_WR_SEND] = MLX4_OPCODE_SEND,
+ [IBV_WR_SEND_WITH_IMM] = MLX4_OPCODE_SEND_IMM,
+ [IBV_WR_RDMA_WRITE] = MLX4_OPCODE_RDMA_WRITE,
+ [IBV_WR_RDMA_WRITE_WITH_IMM] = MLX4_OPCODE_RDMA_WRITE_IMM,
+ [IBV_WR_RDMA_READ] = MLX4_OPCODE_RDMA_READ,
+ [IBV_WR_ATOMIC_CMP_AND_SWP] = MLX4_OPCODE_ATOMIC_CS,
+ [IBV_WR_ATOMIC_FETCH_AND_ADD] = MLX4_OPCODE_ATOMIC_FA,
+};
+
+static void *get_recv_wqe(struct mlx4_qp *qp, int n)
+{
+ return qp->buf.buf + qp->rq.offset + (n << qp->rq.wqe_shift);
+}
+
+static void *get_send_wqe(struct mlx4_qp *qp, int n)
+{
+ return qp->buf.buf + qp->sq.offset + (n << qp->sq.wqe_shift);
+}
+
+void mlx4_init_qp_indices(struct mlx4_qp *qp)
+{
+ qp->sq.head = 0;
+ qp->sq.tail = 0;
+ qp->rq.head = 0;
+ qp->rq.tail = 0;
+}
+
+static inline int wq_overflow(struct mlx4_wq *wq, int nreq, struct mlx4_cq *cq)
+{
+ unsigned cur;
+
+ cur = wq->head - wq->tail;
+ if (cur + nreq < wq->max)
+ return 0;
+
+ pthread_spin_lock(&cq->lock);
+ cur = wq->head - wq->tail;
+ pthread_spin_unlock(&cq->lock);
+
+ return cur + nreq >= wq->max;
+}
+
+int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
+ struct ibv_send_wr **bad_wr)
+{
+ struct mlx4_qp *qp = to_mqp(ibqp);
+ void *wqe;
+ struct mlx4_wqe_ctrl_seg *ctrl;
+ int ind;
+ int nreq;
+ int ret = 0;
+ int size;
+ int i;
+
+ pthread_spin_lock(&qp->sq.lock);
+
+ /* XXX check that state is OK to post send */
+
+ ind = qp->sq.head;
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
+ ret = -1;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ if (wr->num_sge > qp->sq.max_gs) {
+ ret = -1;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ if (wr->opcode >= sizeof mlx4_ib_opcode / sizeof mlx4_ib_opcode[0]) {
+ ret = -1;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.max - 1));
+ qp->sq.wrid[ind & (qp->sq.max - 1)] = wr->wr_id;
+
+ ctrl->srcrb_flags =
+ (wr->send_flags & IBV_SEND_SIGNALED ?
+ htonl(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
+ (wr->send_flags & IBV_SEND_SOLICITED ?
+ htonl(MLX4_WQE_CTRL_SOLICIT) : 0) |
+ qp->sq_signal_bits;
+
+ if (wr->opcode == IBV_WR_SEND_WITH_IMM ||
+ wr->opcode == IBV_WR_RDMA_WRITE_WITH_IMM)
+ ctrl->imm = wr->imm_data;
+ else
+ ctrl->imm = 0;
+
+ wqe += sizeof *ctrl;
+ size = sizeof *ctrl / 16;
+
+ switch (ibqp->qp_type) {
+ case IBV_QPT_RC:
+ case IBV_QPT_UC:
+ switch (wr->opcode) {
+ case IBV_WR_ATOMIC_CMP_AND_SWP:
+ case IBV_WR_ATOMIC_FETCH_AND_ADD:
+ /*XXX*/
+ break;
+
+ case IBV_WR_RDMA_WRITE:
+ case IBV_WR_RDMA_WRITE_WITH_IMM:
+ case IBV_WR_RDMA_READ:
+ /*XXX*/
+ break;
+
+ default:
+ /* No extra segments required for sends */
+ break;
+ }
+ break;
+
+ case IBV_QPT_UD:
+ memcpy(((struct mlx4_wqe_datagram_seg *) wqe)->av,
+ &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
+ ((struct mlx4_wqe_datagram_seg *) wqe)->dqpn =
+ htonl(wr->wr.ud.remote_qpn);
+ ((struct mlx4_wqe_datagram_seg *) wqe)->qkey =
+ htonl(wr->wr.ud.remote_qkey);
+
+ wqe += sizeof (struct mlx4_wqe_datagram_seg);
+ size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
+ break;
+
+ default:
+ break;
+ }
+
+ if (wr->send_flags & IBV_SEND_INLINE) {
+ /*XXX handle inline send */
+ } else {
+ struct mlx4_wqe_data_seg *seg = wqe;
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ seg[i].byte_count = htonl(wr->sg_list[i].length);
+ seg[i].lkey = htonl(wr->sg_list[i].lkey);
+ seg[i].addr = htonll(wr->sg_list[i].addr);
+ }
+
+ size += wr->num_sge * (sizeof *seg / 16);
+ }
+
+ ctrl->fence_size = (wr->send_flags & IBV_SEND_FENCE ?
+ MLX4_WQE_CTRL_FENCE : 0) | size;
+
+ /*
+ * Make sure descriptor is fully written before
+ * setting ownership bit (because HW can start
+ * executing as soon as we do).
+ */
+ wmb();
+
+ ctrl->owner_opcode = htonl(mlx4_ib_opcode[wr->opcode]) |
+ (ind & qp->sq.max ? htonl(1 << 31) : 0);
+
+ ++ind;
+ }
+
+out:
+ if (nreq) {
+ qp->sq.head += nreq;
+
+ /*
+ * Make sure that descriptors are written before
+ * doorbell record.
+ */
+ wmb();
+
+ *(uint32_t *) (to_mctx(ibqp->context)->uar + MLX4_SEND_DOORBELL) =
+ qp->doorbell_qpn;
+ }
+
+ pthread_spin_unlock(&qp->sq.lock);
+
+ return ret;
+}
+
+int mlx4_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
+ struct ibv_recv_wr **bad_wr)
+{
+ struct mlx4_qp *qp = to_mqp(ibqp);
+ struct mlx4_wqe_data_seg *scat;
+ int ret = 0;
+ int nreq;
+ int ind;
+ int i;
+
+ pthread_spin_lock(&qp->rq.lock);
+
+ /* XXX check that state is OK to post receive */
+
+ ind = qp->rq.head & (qp->rq.max - 1);
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {
+ ret = -1;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ if (wr->num_sge > qp->rq.max_gs) {
+ ret = -1;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ scat = get_recv_wqe(qp, ind);
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ scat[i].byte_count = htonl(wr->sg_list[i].length);
+ scat[i].lkey = htonl(wr->sg_list[i].lkey);
+ scat[i].addr = htonll(wr->sg_list[i].addr);
+ }
+
+ if (i < qp->rq.max_gs) {
+ scat[i].byte_count = 0;
+ scat[i].lkey = htonl(MLX4_INVALID_LKEY);
+ scat[i].addr = 0;
+ }
+
+ qp->rq.wrid[ind] = wr->wr_id;
+
+ ind = (ind + 1) & (qp->rq.max - 1);
+ }
+
+out:
+ if (nreq) {
+ qp->rq.head += nreq;
+
+ /*
+ * Make sure that descriptors are written before
+ * doorbell record.
+ */
+ wmb();
+
+ *qp->db = htonl(qp->rq.head & 0xffff);
+ }
+
+ pthread_spin_unlock(&qp->rq.lock);
+
+ return ret;
+}
+
+int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
+ enum ibv_qp_type type, struct mlx4_qp *qp)
+{
+ struct mlx4_wqe_ctrl_seg *ctrl;
+ int size;
+ int max_sq_sge;
+ int i;
+
+ qp->rq.max_gs = cap->max_recv_sge;
+ qp->sq.max_gs = cap->max_send_sge;
+ max_sq_sge = align(cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg),
+ sizeof (struct mlx4_wqe_data_seg)) / sizeof (struct mlx4_wqe_data_seg);
+ if (max_sq_sge < cap->max_send_sge)
+ max_sq_sge = cap->max_send_sge;
+
+ qp->sq.wrid = malloc(qp->sq.max * sizeof (uint64_t));
+ if (!qp->sq.wrid)
+ return -1;
+
+ qp->rq.wrid = malloc(qp->rq.max * sizeof (uint64_t));
+ if (!qp->rq.wrid) {
+ free(qp->sq.wrid);
+ return -1;
+ }
+
+ size = qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg);
+
+ for (qp->rq.wqe_shift = 4; 1 << qp->rq.wqe_shift < size;
+ qp->rq.wqe_shift++)
+ ; /* nothing */
+
+ size = max_sq_sge * sizeof (struct mlx4_wqe_data_seg);
+ switch (type) {
+ case IBV_QPT_UD:
+ size += sizeof (struct mlx4_wqe_datagram_seg);
+ break;
+
+ case IBV_QPT_UC:
+ size += sizeof (struct mlx4_raddr_seg);
+ break;
+
+ case IBV_QPT_RC:
+ size += sizeof (struct mlx4_raddr_seg);
+ /*
+ * An atomic op will require an atomic segment, a
+ * remote address segment and one scatter entry.
+ */
+ if (size < (sizeof (struct mlx4_atomic_seg) +
+ sizeof (struct mlx4_raddr_seg) +
+ sizeof (struct mlx4_wqe_data_seg)))
+ size = (sizeof (struct mlx4_atomic_seg) +
+ sizeof (struct mlx4_raddr_seg) +
+ sizeof (struct mlx4_wqe_data_seg));
+ break;
+
+ default:
+ break;
+ }
+
+ /* Make sure that we have enough space for a bind request */
+ if (size < sizeof (struct mlx4_bind_seg))
+ size = sizeof (struct mlx4_bind_seg);
+
+ size += sizeof (struct mlx4_wqe_ctrl_seg);
+
+ for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
+ qp->sq.wqe_shift++)
+ ; /* nothing */
+
+ qp->buf_size = (qp->rq.max << qp->rq.wqe_shift) +
+ (qp->sq.max << qp->sq.wqe_shift);
+ if (qp->rq.wqe_shift > qp->sq.wqe_shift) {
+ qp->rq.offset = 0;
+ qp->sq.offset = qp->rq.max << qp->rq.wqe_shift;
+ } else {
+ qp->rq.offset = qp->sq.max << qp->sq.wqe_shift;
+ qp->sq.offset = 0;
+ }
+
+ if (mlx4_alloc_buf(&qp->buf,
+ align(qp->buf_size, to_mdev(pd->context->device)->page_size),
+ to_mdev(pd->context->device)->page_size)) {
+ free(qp->sq.wrid);
+ free(qp->rq.wrid);
+ return -1;
+ }
+
+ memset(qp->buf.buf, 0, qp->buf_size);
+
+ for (i = 0; i < qp->sq.max; ++i) {
+ ctrl = get_send_wqe(qp, i);
+ ctrl->owner_opcode = htonl(1 << 31);
+ }
+
+ return 0;
+}
+
+struct mlx4_qp *mlx4_find_qp(struct mlx4_context *ctx, uint32_t qpn)
+{
+ int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
+
+ if (ctx->qp_table[tind].refcnt)
+ return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];
+ else
+ return NULL;
+}
+
+int mlx4_store_qp(struct mlx4_context *ctx, uint32_t qpn, struct mlx4_qp *qp)
+{
+ int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
+ int ret = 0;
+
+ pthread_mutex_lock(&ctx->qp_table_mutex);
+
+ if (!ctx->qp_table[tind].refcnt) {
+ ctx->qp_table[tind].table = calloc(ctx->qp_table_mask + 1,
+ sizeof (struct mlx4_qp *));
+ if (!ctx->qp_table[tind].table) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ++ctx->qp_table[tind].refcnt;
+ ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = qp;
+
+out:
+ pthread_mutex_unlock(&ctx->qp_table_mutex);
+ return ret;
+}
+
+void mlx4_clear_qp(struct mlx4_context *ctx, uint32_t qpn)
+{
+ int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
+
+ pthread_mutex_lock(&ctx->qp_table_mutex);
+
+ if (!--ctx->qp_table[tind].refcnt)
+ free(ctx->qp_table[tind].table);
+ else
+ ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL;
+
+ pthread_mutex_unlock(&ctx->qp_table_mutex);
+}
diff --git a/src/srq.c b/src/srq.c
new file mode 100644
index 0000000..14c64c8
--- /dev/null
+++ b/src/srq.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2007 Cisco, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stdlib.h>
+#include <netinet/in.h>
+#include <pthread.h>
+#include <string.h>
+
+#include "mlx4.h"
+#include "doorbell.h"
+#include "wqe.h"
+
+static void *get_wqe(struct mlx4_srq *srq, int n)
+{
+ return srq->buf.buf + (n << srq->wqe_shift);
+}
+
+void mlx4_free_srq_wqe(struct mlx4_srq *srq, int ind)
+{
+ struct mlx4_wqe_srq_next_seg *next;
+
+ pthread_spin_lock(&srq->lock);
+
+ next = get_wqe(srq, srq->tail);
+ next->next_wqe_index = htons(ind);
+ srq->tail = ind;
+
+ pthread_spin_unlock(&srq->lock);
+}
+
+int mlx4_post_srq_recv(struct ibv_srq *ibsrq,
+ struct ibv_recv_wr *wr,
+ struct ibv_recv_wr **bad_wr)
+{
+ struct mlx4_srq *srq = to_msrq(ibsrq);
+ struct mlx4_wqe_srq_next_seg *next;
+ struct mlx4_wqe_data_seg *scat;
+ int err = 0;
+ int nreq;
+ int i;
+
+ pthread_spin_lock(&srq->lock);
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (wr->num_sge > srq->max_gs) {
+ err = -1;
+ *bad_wr = wr;
+ break;
+ }
+
+ srq->wrid[srq->head] = wr->wr_id;
+
+ next = get_wqe(srq, srq->head);
+ srq->head = ntohs(next->next_wqe_index);
+ scat = (struct mlx4_wqe_data_seg *) (next + 1);
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ scat[i].byte_count = htonl(wr->sg_list[i].length);
+ scat[i].lkey = htonl(wr->sg_list[i].lkey);
+ scat[i].addr = htonl(wr->sg_list[i].addr);
+ }
+
+ if (i < srq->max_gs) {
+ scat[i].byte_count = 0;
+ scat[i].lkey = htonl(MLX4_INVALID_LKEY);
+ scat[i].addr = 0;
+ }
+ }
+
+ if (nreq) {
+ srq->counter += nreq;
+
+ /*
+ * Make sure that descriptors are written before
+ * we write doorbell record.
+ */
+ wmb();
+
+ *srq->db = htonl(srq->counter);
+ }
+
+ pthread_spin_unlock(&srq->lock);
+
+ return err;
+}
+
+int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr,
+ struct mlx4_srq *srq)
+{
+ struct mlx4_wqe_srq_next_seg *next;
+ int size;
+ int buf_size;
+ int i;
+
+ srq->wrid = malloc(srq->max * sizeof (uint64_t));
+ if (!srq->wrid)
+ return -1;
+
+ size = sizeof (struct mlx4_wqe_srq_next_seg) +
+ srq->max_gs * sizeof (struct mlx4_wqe_data_seg);
+
+ for (srq->wqe_shift = 6; 1 << srq->wqe_shift < size; ++srq->wqe_shift)
+ ; /* nothing */
+
+ buf_size = srq->max << srq->wqe_shift;
+
+ if (mlx4_alloc_buf(&srq->buf, buf_size,
+ to_mdev(pd->context->device)->page_size)) {
+ free(srq->wrid);
+ return -1;
+ }
+
+ memset(srq->buf.buf, 0, buf_size);
+
+ /*
+ * Now initialize the SRQ buffer so that all of the WQEs are
+ * linked into the list of free WQEs.
+ */
+
+ for (i = 0; i < srq->max; ++i) {
+ next = get_wqe(srq, i);
+
+ next->next_wqe_index = (i + 1) & (srq->max - 1);
+ }
+
+ srq->head = 0;
+ srq->tail = srq->max - 1;
+
+ return 0;
+}
diff --git a/src/verbs.c b/src/verbs.c
new file mode 100644
index 0000000..0292945
--- /dev/null
+++ b/src/verbs.c
@@ -0,0 +1,580 @@
+/*
+ * Copyright (c) 2007 Cisco, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <strings.h>
+#include <pthread.h>
+#include <errno.h>
+#include <netinet/in.h>
+
+#include "mlx4.h"
+#include "mlx4-abi.h"
+#include "wqe.h"
+
+int mlx4_query_device(struct ibv_context *context, struct ibv_device_attr *attr)
+{
+ struct ibv_query_device cmd;
+ uint64_t raw_fw_ver;
+ unsigned major, minor, sub_minor;
+ int ret;
+
+ ret = ibv_cmd_query_device(context, attr, &raw_fw_ver, &cmd, sizeof cmd);
+ if (ret)
+ return ret;
+
+ major = (raw_fw_ver >> 32) & 0xffff;
+ minor = (raw_fw_ver >> 16) & 0xffff;
+ sub_minor = raw_fw_ver & 0xffff;
+
+ snprintf(attr->fw_ver, sizeof attr->fw_ver,
+ "%d.%d.%03d", major, minor, sub_minor);
+
+ return 0;
+}
+
+int mlx4_query_port(struct ibv_context *context, uint8_t port,
+ struct ibv_port_attr *attr)
+{
+ struct ibv_query_port cmd;
+
+ return ibv_cmd_query_port(context, port, attr, &cmd, sizeof cmd);
+}
+
+struct ibv_pd *mlx4_alloc_pd(struct ibv_context *context)
+{
+ struct ibv_alloc_pd cmd;
+ struct mlx4_alloc_pd_resp resp;
+ struct mlx4_pd *pd;
+
+ pd = malloc(sizeof *pd);
+ if (!pd)
+ return NULL;
+
+ if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof cmd,
+ &resp.ibv_resp, sizeof resp)) {
+ free(pd);
+ return NULL;
+ }
+
+ pd->pdn = resp.pdn;
+
+ return &pd->ibv_pd;
+}
+
+int mlx4_free_pd(struct ibv_pd *pd)
+{
+ int ret;
+
+ ret = ibv_cmd_dealloc_pd(pd);
+ if (ret)
+ return ret;
+
+ free(to_mpd(pd));
+ return 0;
+}
+
+struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+ enum ibv_access_flags access)
+{
+ struct ibv_mr *mr;
+ struct ibv_reg_mr cmd;
+ int ret;
+
+ mr = malloc(sizeof *mr);
+ if (!mr)
+ return NULL;
+
+#ifdef IBV_CMD_REG_MR_HAS_RESP_PARAMS
+ {
+ struct ibv_reg_mr_resp resp;
+
+ ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t) addr,
+ access, mr, &cmd, sizeof cmd,
+ &resp, sizeof resp);
+ }
+#else
+ ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t) addr, access, mr,
+ &cmd, sizeof cmd);
+#endif
+ if (ret) {
+ free(mr);
+ return NULL;
+ }
+
+ return mr;
+}
+
+int mlx4_dereg_mr(struct ibv_mr *mr)
+{
+ int ret;
+
+ ret = ibv_cmd_dereg_mr(mr);
+ if (ret)
+ return ret;
+
+ free(mr);
+ return 0;
+}
+
+static int align_cq_size(int cqe)
+{
+ int nent;
+
+ for (nent = 1; nent <= cqe; nent <<= 1)
+ ; /* nothing */
+
+ return nent;
+}
+
+struct ibv_cq *mlx4_create_cq(struct ibv_context *context, int cqe,
+ struct ibv_comp_channel *channel,
+ int comp_vector)
+{
+ struct mlx4_create_cq cmd;
+ struct mlx4_create_cq_resp resp;
+ struct mlx4_cq *cq;
+ int ret;
+
+ /* Sanity check CQ size before proceeding */
+ if (cqe > 131072)
+ return NULL;
+
+ cq = malloc(sizeof *cq);
+ if (!cq)
+ return NULL;
+
+ cq->cons_index = 0;
+
+ if (pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE))
+ goto err;
+
+ cqe = align_cq_size(cqe);
+
+ if (mlx4_alloc_buf(&cq->buf, cqe * MLX4_CQ_ENTRY_SIZE,
+ to_mdev(context->device)->page_size))
+ goto err;
+
+ memset(cq->buf.buf, 0, cqe * MLX4_CQ_ENTRY_SIZE);
+
+ cq->set_ci_db = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_CQ);
+ if (!cq->set_ci_db)
+ goto err_buf;
+
+ cq->arm_db = cq->set_ci_db + 1;
+ *cq->arm_db = 0;
+ cq->arm_sn = 1;
+ *cq->set_ci_db = 0;
+
+ cmd.buf_addr = (uintptr_t) cq->buf.buf;
+ cmd.db_addr = (uintptr_t) cq->set_ci_db;
+
+ ret = ibv_cmd_create_cq(context, cqe - 1, channel, comp_vector,
+ &cq->ibv_cq, &cmd.ibv_cmd, sizeof cmd,
+ &resp.ibv_resp, sizeof resp);
+ if (ret)
+ goto err_db;
+
+ cq->cqn = resp.cqn;
+
+ return &cq->ibv_cq;
+
+err_db:
+ mlx4_free_db(to_mctx(context), cq->set_ci_db);
+
+err_buf:
+ mlx4_free_buf(&cq->buf);
+
+err:
+ free(cq);
+
+ return NULL;
+}
+
+int mlx4_resize_cq(struct ibv_cq *ibcq, int cqe)
+{
+ /* XXX resize CQ not implemented */
+ return -ENOSYS;
+}
+
+int mlx4_destroy_cq(struct ibv_cq *cq)
+{
+ int ret;
+
+ ret = ibv_cmd_destroy_cq(cq);
+ if (ret)
+ return ret;
+
+ mlx4_free_db(to_mctx(cq->context), to_mcq(cq)->set_ci_db);
+ mlx4_free_buf(&to_mcq(cq)->buf);
+ free(to_mcq(cq));
+
+ return 0;
+}
+
+static int align_queue_size(struct ibv_context *context, int size, int spare)
+{
+ int ret;
+
+ /*
+ * If someone asks for a 0-sized queue, presumably they're not
+ * going to use it. So don't mess with their size.
+ */
+ if (!size)
+ return 0;
+
+ for (ret = 1; ret < size + spare; ret <<= 1)
+ ; /* nothing */
+
+ return ret;
+}
+
+struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
+ struct ibv_srq_init_attr *attr)
+{
+ struct mlx4_create_srq cmd;
+ struct mlx4_create_srq_resp resp;
+ struct mlx4_srq *srq;
+ int ret;
+
+ /* Sanity check SRQ size before proceeding */
+ if (attr->attr.max_wr > 1 << 16 || attr->attr.max_sge > 64)
+ return NULL;
+
+ srq = malloc(sizeof *srq);
+ if (!srq)
+ return NULL;
+
+ if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
+ goto err;
+
+ srq->max = align_queue_size(pd->context, attr->attr.max_wr, 1);
+ srq->max_gs = attr->attr.max_sge;
+ srq->counter = 0;
+
+ if (mlx4_alloc_srq_buf(pd, &attr->attr, srq))
+ goto err;
+
+ srq->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ);
+ if (!srq->db)
+ goto err_free;
+
+ *srq->db = 0;
+
+ cmd.buf_addr = (uintptr_t) srq->buf.buf;
+ cmd.db_addr = (uintptr_t) srq->db;
+
+ ret = ibv_cmd_create_srq(pd, &srq->ibv_srq, attr,
+ &cmd.ibv_cmd, sizeof cmd,
+ &resp.ibv_resp, sizeof resp);
+ if (ret)
+ goto err_db;
+
+ srq->srqn = resp.srqn;
+
+ return &srq->ibv_srq;
+
+err_db:
+ mlx4_free_db(to_mctx(pd->context), srq->db);
+
+err_free:
+ free(srq->wrid);
+ mlx4_free_buf(&srq->buf);
+
+err:
+ free(srq);
+
+ return NULL;
+}
+
+int mlx4_modify_srq(struct ibv_srq *srq,
+ struct ibv_srq_attr *attr,
+ enum ibv_srq_attr_mask attr_mask)
+{
+ struct ibv_modify_srq cmd;
+
+ return ibv_cmd_modify_srq(srq, attr, attr_mask, &cmd, sizeof cmd);
+}
+
+int mlx4_query_srq(struct ibv_srq *srq,
+ struct ibv_srq_attr *attr)
+{
+ struct ibv_query_srq cmd;
+
+ return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd);
+}
+
+int mlx4_destroy_srq(struct ibv_srq *srq)
+{
+ int ret;
+
+ ret = ibv_cmd_destroy_srq(srq);
+ if (ret)
+ return ret;
+
+ mlx4_free_db(to_mctx(srq->context), to_msrq(srq)->db);
+ mlx4_free_buf(&to_msrq(srq)->buf);
+ free(to_msrq(srq)->wrid);
+ free(to_msrq(srq));
+
+ return 0;
+}
+
+struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
+{
+ struct mlx4_create_qp cmd;
+ struct ibv_create_qp_resp resp;
+ struct mlx4_qp *qp;
+ int ret;
+
+ /* Sanity check QP size before proceeding */
+ if (attr->cap.max_send_wr > 65536 ||
+ attr->cap.max_recv_wr > 65536 ||
+ attr->cap.max_send_sge > 64 ||
+ attr->cap.max_recv_sge > 64 ||
+ attr->cap.max_inline_data > 1024)
+ return NULL;
+
+ qp = malloc(sizeof *qp);
+ if (!qp)
+ return NULL;
+
+ qp->sq.max = align_queue_size(pd->context, attr->cap.max_send_wr, 0);
+ qp->rq.max = align_queue_size(pd->context, attr->cap.max_recv_wr, 0);
+
+ if (mlx4_alloc_qp_buf(pd, &attr->cap, attr->qp_type, qp))
+ goto err;
+
+ mlx4_init_qp_indices(qp);
+
+ if (pthread_spin_init(&qp->sq.lock, PTHREAD_PROCESS_PRIVATE) ||
+ pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE))
+ goto err_free;
+
+ qp->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ);
+ if (!qp->db)
+ goto err_free;
+
+ *qp->db = 0;
+
+ cmd.buf_addr = (uintptr_t) qp->buf.buf;
+ cmd.db_addr = (uintptr_t) qp->db;
+
+ ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd.ibv_cmd, sizeof cmd,
+ &resp, sizeof resp);
+ if (ret)
+ goto err_rq_db;
+
+ ret = mlx4_store_qp(to_mctx(pd->context), qp->ibv_qp.qp_num, qp);
+ if (ret)
+ goto err_destroy;
+
+ qp->sq.max = attr->cap.max_send_wr;
+ qp->rq.max = attr->cap.max_recv_wr;
+ qp->sq.max_gs = attr->cap.max_send_sge;
+ qp->rq.max_gs = attr->cap.max_recv_sge;
+ qp->max_inline_data = attr->cap.max_inline_data;
+
+ qp->doorbell_qpn = htonl(qp->ibv_qp.qp_num << 8);
+ if (attr->sq_sig_all)
+ qp->sq_signal_bits = htonl(MLX4_WQE_CTRL_CQ_UPDATE);
+ else
+ qp->sq_signal_bits = 0;
+
+ return &qp->ibv_qp;
+
+err_destroy:
+ ibv_cmd_destroy_qp(&qp->ibv_qp);
+
+err_rq_db:
+ mlx4_free_db(to_mctx(pd->context), qp->db);
+
+err_free:
+ free(qp->sq.wrid);
+ free(qp->rq.wrid);
+ mlx4_free_buf(&qp->buf);
+
+err:
+ free(qp);
+
+ return NULL;
+}
+
+int mlx4_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+ enum ibv_qp_attr_mask attr_mask,
+ struct ibv_qp_init_attr *init_attr)
+{
+ struct ibv_query_qp cmd;
+
+ return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr, &cmd, sizeof cmd);
+}
+
+int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+ enum ibv_qp_attr_mask attr_mask)
+{
+ struct ibv_modify_qp cmd;
+ int ret;
+
+ ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof cmd);
+
+ if (!ret &&
+ (attr_mask & IBV_QP_STATE) &&
+ attr->qp_state == IBV_QPS_RESET) {
+ mlx4_cq_clean(to_mcq(qp->recv_cq), qp->qp_num,
+ qp->srq ? to_msrq(qp->srq) : NULL);
+ if (qp->send_cq != qp->recv_cq)
+ mlx4_cq_clean(to_mcq(qp->send_cq), qp->qp_num, NULL);
+
+ mlx4_init_qp_indices(to_mqp(qp));
+ }
+
+ return ret;
+}
+
+static void mlx4_lock_cqs(struct ibv_qp *qp)
+{
+ struct mlx4_cq *send_cq = to_mcq(qp->send_cq);
+ struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq);
+
+ if (send_cq == recv_cq)
+ pthread_spin_lock(&send_cq->lock);
+ else if (send_cq->cqn < recv_cq->cqn) {
+ pthread_spin_lock(&send_cq->lock);
+ pthread_spin_lock(&recv_cq->lock);
+ } else {
+ pthread_spin_lock(&recv_cq->lock);
+ pthread_spin_lock(&send_cq->lock);
+ }
+}
+
+static void mlx4_unlock_cqs(struct ibv_qp *qp)
+{
+ struct mlx4_cq *send_cq = to_mcq(qp->send_cq);
+ struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq);
+
+ if (send_cq == recv_cq)
+ pthread_spin_unlock(&send_cq->lock);
+ else if (send_cq->cqn < recv_cq->cqn) {
+ pthread_spin_unlock(&recv_cq->lock);
+ pthread_spin_unlock(&send_cq->lock);
+ } else {
+ pthread_spin_unlock(&send_cq->lock);
+ pthread_spin_unlock(&recv_cq->lock);
+ }
+}
+
+int mlx4_destroy_qp(struct ibv_qp *ibqp)
+{
+ struct mlx4_qp *qp = to_mqp(ibqp);
+ int ret;
+
+ mlx4_cq_clean(to_mcq(ibqp->recv_cq), ibqp->qp_num,
+ ibqp->srq ? to_msrq(ibqp->srq) : NULL);
+ if (ibqp->send_cq != ibqp->recv_cq)
+ mlx4_cq_clean(to_mcq(ibqp->send_cq), ibqp->qp_num, NULL);
+
+ mlx4_lock_cqs(ibqp);
+ mlx4_clear_qp(to_mctx(ibqp->context), ibqp->qp_num);
+ mlx4_unlock_cqs(ibqp);
+
+ ret = ibv_cmd_destroy_qp(ibqp);
+ if (ret) {
+ mlx4_lock_cqs(ibqp);
+ mlx4_store_qp(to_mctx(ibqp->context), ibqp->qp_num, qp);
+ mlx4_unlock_cqs(ibqp);
+
+ return ret;
+ }
+
+ mlx4_free_db(to_mctx(ibqp->context), qp->db);
+ free(qp->sq.wrid);
+ free(qp->rq.wrid);
+ mlx4_free_buf(&qp->buf);
+ free(qp);
+
+ return 0;
+}
+
+struct ibv_ah *mlx4_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
+{
+ struct mlx4_ah *ah;
+
+ ah = malloc(sizeof *ah);
+ if (!ah)
+ return NULL;
+
+ memset(&ah->av, 0, sizeof ah->av);
+
+ ah->av.port_pd = htonl(to_mpd(pd)->pdn | (attr->port_num << 24));
+ ah->av.g_slid = attr->src_path_bits;
+ ah->av.dlid = htons(attr->dlid);
+ if (attr->static_rate) {
+ ah->av.stat_rate = attr->static_rate + MLX4_STAT_RATE_OFFSET;
+ /* XXX check rate cap? */
+ }
+ ah->av.sl_tclass_flowlabel = htonl(attr->sl << 28);
+ if (attr->is_global) {
+ ah->av.g_slid |= 0x80;
+ ah->av.gid_index = attr->grh.sgid_index;
+ ah->av.hop_limit = attr->grh.hop_limit;
+ ah->av.sl_tclass_flowlabel |=
+ htonl((attr->grh.traffic_class << 20) |
+ attr->grh.flow_label);
+ memcpy(ah->av.dgid, attr->grh.dgid.raw, 16);
+ } else {
+ /* XXX needed?? low byte of GID must be 2 */
+ ah->av.dgid[3] = htonl(2);
+ }
+
+ return &ah->ibv_ah;
+}
+
+int mlx4_destroy_ah(struct ibv_ah *ah)
+{
+ free(to_mah(ah));
+
+ return 0;
+}
+
+int mlx4_attach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid)
+{
+ return ibv_cmd_attach_mcast(qp, gid, lid);
+}
+
+int mlx4_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid)
+{
+ return ibv_cmd_detach_mcast(qp, gid, lid);
+}
diff --git a/src/wqe.h b/src/wqe.h
new file mode 100644
index 0000000..b19a31a
--- /dev/null
+++ b/src/wqe.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2007 Cisco, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef WQE_H
+#define WQE_H
+
+enum {
+ MLX4_SEND_DOORBELL = 0x14,
+};
+
+enum {
+ MLX4_WQE_CTRL_FENCE = 1 << 6,
+ MLX4_WQE_CTRL_CQ_UPDATE = 3 << 2,
+ MLX4_WQE_CTRL_SOLICIT = 1 << 1,
+};
+
+enum {
+ MLX4_INLINE_SEG = 1 << 31
+};
+
+enum {
+ MLX4_INVALID_LKEY = 0x100,
+};
+
+struct mlx4_wqe_ctrl_seg {
+ uint32_t owner_opcode;
+ uint8_t reserved2[3];
+ uint8_t fence_size;
+ /*
+ * High 24 bits are SRC remote buffer; low 8 bits are flags:
+ * [7] SO (strong ordering)
+ * [5] TCP/UDP checksum
+ * [4] IP checksum
+ * [3:2] C (generate completion queue entry)
+ * [1] SE (solicited event)
+ * [0] FL (force loopback)
+ */
+ uint32_t srcrb_flags;
+ /*
+ * imm is immediate data for send/RDMA write w/ immediate;
+ * also invalidation key for send with invalidate; input
+ * modifier for WQEs on CCQs.
+ */
+ uint32_t imm;
+};
+
+struct mlx4_wqe_datagram_seg {
+ uint32_t av[8];
+ uint32_t dqpn;
+ uint32_t qkey;
+ uint32_t reserved[2];
+};
+
+struct mlx4_wqe_data_seg {
+ uint32_t byte_count;
+ uint32_t lkey;
+ uint64_t addr;
+};
+
+struct mlx4_wqe_inline_seg {
+ uint32_t byte_count;
+};
+
+struct mlx4_wqe_srq_next_seg {
+ uint16_t reserved1;
+ uint16_t next_wqe_index;
+ uint32_t reserved2[3];
+};
+
+/* XXX the rest of these are still old WQE formats... */
+struct mlx4_bind_seg {
+ uint32_t flags; /* [31] Atomic [30] rem write [29] rem read */
+ uint32_t reserved;
+ uint32_t new_rkey;
+ uint32_t lkey;
+ uint64_t addr;
+ uint64_t length;
+};
+
+struct mlx4_raddr_seg {
+ uint64_t raddr;
+ uint32_t rkey;
+ uint32_t reserved;
+};
+
+struct mlx4_atomic_seg {
+ uint64_t swap_add;
+ uint64_t compare;
+};
+
+#endif /* WQE_H */