aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-14 09:27:13 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-14 09:27:13 -0800
commitab425febda94c7d287ea3433cbd0971771d6aeb4 (patch)
tree66240fae1e9720214afda604635c3f0da0b9ebfa
parent08cdc2157966c07d3f986a097ddaa74cee312751 (diff)
parentdbc94a0fb81771a38733c0e8f2ea8c4fa6934dc1 (diff)
downloadlinux-ab425febda94c7d287ea3433cbd0971771d6aeb4.tar.gz
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "Usual size of updates, a new driver, and most of the bulk focusing on rxe: - Usual typos, style, and language updates - Driver updates for mlx5, irdma, siw, rts, srp, hfi1, hns, erdma, mlx4, srp - Lots of RXE updates: * Improve reply error handling for bad MR operations * Code tidying * Debug printing uses common loggers * Remove half implemented RD related stuff * Support IBA's recently defined Atomic Write and Flush operations - erdma support for atomic operations - New driver 'mana' for Ethernet HW available in Azure VMs. This driver only supports DPDK" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (122 commits) IB/IPoIB: Fix queue count inconsistency for PKEY child interfaces RDMA: Add missed netdev_put() for the netdevice_tracker RDMA/rxe: Enable RDMA FLUSH capability for rxe device RDMA/cm: Make QP FLUSHABLE for supported device RDMA/rxe: Implement flush completion RDMA/rxe: Implement flush execution in responder side RDMA/rxe: Implement RC RDMA FLUSH service in requester side RDMA/rxe: Extend rxe packet format to support flush RDMA/rxe: Allow registering persistent flag for pmem MR only RDMA/rxe: Extend rxe user ABI to support flush RDMA: Extend RDMA kernel verbs ABI to support flush RDMA: Extend RDMA user ABI to support flush RDMA/rxe: Fix incorrect responder length checking RDMA/rxe: Fix oops with zero length reads RDMA/mlx5: Remove not-used IB_FLOW_SPEC_IB define RDMA/hns: Fix XRC caps on HIP08 RDMA/hns: Fix error code of CMD RDMA/hns: Fix page size cap from firmware RDMA/hns: Fix PBL page MTR find RDMA/hns: Fix AH attr queried by query_qp ...
-rw-r--r--MAINTAINERS9
-rw-r--r--drivers/infiniband/Kconfig3
-rw-r--r--drivers/infiniband/core/cache.c2
-rw-r--r--drivers/infiniband/core/cm.c13
-rw-r--r--drivers/infiniband/core/cma.c2
-rw-r--r--drivers/infiniband/core/device.c10
-rw-r--r--drivers/infiniband/core/mad.c5
-rw-r--r--drivers/infiniband/core/nldev.c50
-rw-r--r--drivers/infiniband/core/restrack.c2
-rw-r--r--drivers/infiniband/core/sysfs.c17
-rw-r--r--drivers/infiniband/core/uverbs_std_types_qp.c2
-rw-r--r--drivers/infiniband/hw/Makefile1
-rw-r--r--drivers/infiniband/hw/erdma/erdma.h4
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cq.c2
-rw-r--r--drivers/infiniband/hw/erdma/erdma_hw.h37
-rw-r--r--drivers/infiniband/hw/erdma/erdma_main.c15
-rw-r--r--drivers/infiniband/hw/erdma/erdma_qp.c73
-rw-r--r--drivers/infiniband/hw/erdma/erdma_verbs.c25
-rw-r--r--drivers/infiniband/hw/erdma/erdma_verbs.h19
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c2
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c2
-rw-r--r--drivers/infiniband/hw/hfi1/firmware.c6
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c22
-rw-r--r--drivers/infiniband/hw/hfi1/netdev_rx.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c217
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h13
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c18
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c107
-rw-r--r--drivers/infiniband/hw/irdma/uk.c170
-rw-r--r--drivers/infiniband/hw/irdma/user.h20
-rw-r--r--drivers/infiniband/hw/irdma/utils.c2
-rw-r--r--drivers/infiniband/hw/irdma/verbs.c145
-rw-r--r--drivers/infiniband/hw/irdma/verbs.h53
-rw-r--r--drivers/infiniband/hw/mana/Kconfig10
-rw-r--r--drivers/infiniband/hw/mana/Makefile4
-rw-r--r--drivers/infiniband/hw/mana/cq.c79
-rw-r--r--drivers/infiniband/hw/mana/device.c117
-rw-r--r--drivers/infiniband/hw/mana/main.c521
-rw-r--r--drivers/infiniband/hw/mana/mana_ib.h162
-rw-r--r--drivers/infiniband/hw/mana/mr.c197
-rw-r--r--drivers/infiniband/hw/mana/qp.c506
-rw-r--r--drivers/infiniband/hw/mana/wq.c115
-rw-r--r--drivers/infiniband/hw/mlx4/main.c12
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c27
-rw-r--r--drivers/infiniband/hw/mlx5/fs.c1
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h4
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c6
-rw-r--r--drivers/infiniband/hw/qedr/main.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c7
-rw-r--r--drivers/infiniband/hw/qib/qib_tx.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h19
-rw-r--r--drivers/infiniband/sw/rxe/rxe_av.c43
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c47
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c8
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hdr.h48
-rw-r--r--drivers/infiniband/sw/rxe/rxe_icrc.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h9
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mmap.c6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c122
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mw.c23
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c42
-rw-r--r--drivers/infiniband/sw/rxe/rxe_opcode.c35
-rw-r--r--drivers/infiniband/sw/rxe/rxe_opcode.h17
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h7
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c98
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c50
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c329
-rw-r--r--drivers/infiniband/sw/rxe/rxe_srq.c20
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.c52
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.h19
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c106
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h7
-rw-r--r--drivers/infiniband/sw/siw/siw_cq.c24
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c40
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_netlink.c7
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c67
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c5
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.c6
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-pri.h3
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c13
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv.c72
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs.c22
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c96
-rw-r--r--drivers/net/ethernet/microsoft/mana/gdma_main.c3
-rw-r--r--include/net/mana/gdma.h31
-rw-r--r--include/net/mana/mana.h3
-rw-r--r--include/rdma/ib_pack.h5
-rw-r--r--include/rdma/ib_verbs.h24
-rw-r--r--include/rdma/opa_vnic.h2
-rw-r--r--include/trace/events/ib_mad.h13
-rw-r--r--include/uapi/rdma/hns-abi.h15
-rw-r--r--include/uapi/rdma/ib_user_ioctl_verbs.h3
-rw-r--r--include/uapi/rdma/ib_user_verbs.h21
-rw-r--r--include/uapi/rdma/mana-abi.h66
-rw-r--r--include/uapi/rdma/rdma_user_rxe.h8
99 files changed, 3621 insertions, 897 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index abb08d7cc92dec..73bd43eb8d4da2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13806,6 +13806,15 @@ F: drivers/scsi/smartpqi/smartpqi*.[ch]
F: include/linux/cciss*.h
F: include/uapi/linux/cciss*.h
+MICROSOFT MANA RDMA DRIVER
+M: Long Li <longli@microsoft.com>
+M: Ajay Sharma <sharmaajay@microsoft.com>
+L: linux-rdma@vger.kernel.org
+S: Supported
+F: drivers/infiniband/hw/mana/
+F: include/net/mana
+F: include/uapi/rdma/mana-abi.h
+
MICROSOFT SURFACE AGGREGATOR TABLET-MODE SWITCH
M: Maximilian Luz <luzmaximilian@gmail.com>
L: platform-driver-x86@vger.kernel.org
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index aa36ac618e7291..a5827d11e9346a 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -78,6 +78,7 @@ config INFINIBAND_VIRT_DMA
def_bool !HIGHMEM
if INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
+if !UML
source "drivers/infiniband/hw/bnxt_re/Kconfig"
source "drivers/infiniband/hw/cxgb4/Kconfig"
source "drivers/infiniband/hw/efa/Kconfig"
@@ -85,6 +86,7 @@ source "drivers/infiniband/hw/erdma/Kconfig"
source "drivers/infiniband/hw/hfi1/Kconfig"
source "drivers/infiniband/hw/hns/Kconfig"
source "drivers/infiniband/hw/irdma/Kconfig"
+source "drivers/infiniband/hw/mana/Kconfig"
source "drivers/infiniband/hw/mlx4/Kconfig"
source "drivers/infiniband/hw/mlx5/Kconfig"
source "drivers/infiniband/hw/mthca/Kconfig"
@@ -94,6 +96,7 @@ source "drivers/infiniband/hw/qib/Kconfig"
source "drivers/infiniband/hw/usnic/Kconfig"
source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
source "drivers/infiniband/sw/rdmavt/Kconfig"
+endif # !UML
source "drivers/infiniband/sw/rxe/Kconfig"
source "drivers/infiniband/sw/siw/Kconfig"
endif
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 4084d05a45102d..2e91d887932658 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -1422,7 +1422,7 @@ int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
*vlan_id = vlan_dev_vlan_id(ndev);
} else {
/* If the netdev is upper device and if it's lower
- * device is vlan device, consider vlan id of the
+ * device is vlan device, consider vlan id of
* the lower vlan device for this gid entry.
*/
netdev_walk_all_lower_dev_rcu(attr->ndev,
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 1f9938a2c47522..603c0aecc36144 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -4094,9 +4094,18 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
IB_QP_PKEY_INDEX | IB_QP_PORT;
qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
- if (cm_id_priv->responder_resources)
+ if (cm_id_priv->responder_resources) {
+ struct ib_device *ib_dev = cm_id_priv->id.device;
+ u64 support_flush = ib_dev->attrs.device_cap_flags &
+ (IB_DEVICE_FLUSH_GLOBAL | IB_DEVICE_FLUSH_PERSISTENT);
+ u32 flushable = support_flush ?
+ (IB_ACCESS_FLUSH_GLOBAL |
+ IB_ACCESS_FLUSH_PERSISTENT) : 0;
+
qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
- IB_ACCESS_REMOTE_ATOMIC;
+ IB_ACCESS_REMOTE_ATOMIC |
+ flushable;
+ }
qp_attr->pkey_index = cm_id_priv->av.pkey_index;
if (cm_id_priv->av.port)
qp_attr->port_num = cm_id_priv->av.port->port_num;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index aacd6254df77aa..68721ff10255e2 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -47,7 +47,7 @@ MODULE_LICENSE("Dual BSD/GPL");
#define CMA_CM_RESPONSE_TIMEOUT 20
#define CMA_MAX_CM_RETRIES 15
#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
-#define CMA_IBOE_PACKET_LIFETIME 18
+#define CMA_IBOE_PACKET_LIFETIME 16
#define CMA_PREFERRED_ROCE_GID_TYPE IB_GID_TYPE_ROCE_UDP_ENCAP
static const char * const cma_events[] = {
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index b69e2c4e4d2a40..894c06846224e6 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2159,14 +2159,16 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
return 0;
}
+ if (old_ndev)
+ netdev_tracker_free(ndev, &pdata->netdev_tracker);
if (ndev)
- dev_hold(ndev);
+ netdev_hold(ndev, &pdata->netdev_tracker, GFP_ATOMIC);
rcu_assign_pointer(pdata->netdev, ndev);
spin_unlock_irqrestore(&pdata->netdev_lock, flags);
add_ndev_hash(pdata);
if (old_ndev)
- dev_put(old_ndev);
+ __dev_put(old_ndev);
return 0;
}
@@ -2199,7 +2201,7 @@ static void free_netdevs(struct ib_device *ib_dev)
* comparisons after the put
*/
rcu_assign_pointer(pdata->netdev, NULL);
- dev_put(ndev);
+ netdev_put(ndev, &pdata->netdev_tracker);
}
spin_unlock_irqrestore(&pdata->netdev_lock, flags);
}
@@ -2851,8 +2853,8 @@ err:
static void __exit ib_core_cleanup(void)
{
roce_gid_mgmt_cleanup();
- nldev_exit();
rdma_nl_unregister(RDMA_NL_LS);
+ nldev_exit();
unregister_pernet_device(&rdma_dev_net_ops);
unregister_blocking_lsm_notifier(&ibdev_lsm_nb);
ib_sa_cleanup();
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 1893aa613ad734..674344eb8e2f48 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -59,9 +59,6 @@ static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr,
struct ib_mad_qp_info *qp_info,
struct trace_event_raw_ib_mad_send_template *entry)
{
- u16 pkey;
- struct ib_device *dev = qp_info->port_priv->device;
- u32 pnum = qp_info->port_priv->port_num;
struct ib_ud_wr *wr = &mad_send_wr->send_wr;
struct rdma_ah_attr attr = {};
@@ -69,8 +66,6 @@ static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr,
/* These are common */
entry->sl = attr.sl;
- ib_query_pkey(dev, pnum, wr->pkey_index, &pkey);
- entry->pkey = pkey;
entry->rqpn = wr->remote_qpn;
entry->rqkey = wr->remote_qkey;
entry->dlid = rdma_ah_get_dlid(&attr);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 12dc97067ed2b8..d5d3e4f0de779e 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -513,7 +513,7 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
/* In create_qp() port is not set yet */
if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port))
- return -EINVAL;
+ return -EMSGSIZE;
ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num);
if (ret)
@@ -552,7 +552,7 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
struct rdma_cm_id *cm_id = &id_priv->id;
if (port && port != cm_id->port_num)
- return 0;
+ return -EAGAIN;
if (cm_id->port_num &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
@@ -894,6 +894,8 @@ static int fill_stat_counter_qps(struct sk_buff *msg,
int ret = 0;
table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
+ if (!table_attr)
+ return -EMSGSIZE;
rt = &counter->device->res[RDMA_RESTRACK_QP];
xa_lock(&rt->xa);
@@ -1041,6 +1043,10 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
0, 0);
+ if (!nlh) {
+ err = -EMSGSIZE;
+ goto err_free;
+ }
err = fill_dev_info(msg, device);
if (err)
@@ -1126,7 +1132,7 @@ static int _nldev_get_dumpit(struct ib_device *device,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
0, NLM_F_MULTI);
- if (fill_dev_info(skb, device)) {
+ if (!nlh || fill_dev_info(skb, device)) {
nlmsg_cancel(skb, nlh);
goto out;
}
@@ -1185,6 +1191,10 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
0, 0);
+ if (!nlh) {
+ err = -EMSGSIZE;
+ goto err_free;
+ }
err = fill_port_info(msg, device, port, sock_net(skb->sk));
if (err)
@@ -1246,7 +1256,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
RDMA_NLDEV_CMD_PORT_GET),
0, NLM_F_MULTI);
- if (fill_port_info(skb, device, p, sock_net(skb->sk))) {
+ if (!nlh || fill_port_info(skb, device, p, sock_net(skb->sk))) {
nlmsg_cancel(skb, nlh);
goto out;
}
@@ -1288,6 +1298,10 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
0, 0);
+ if (!nlh) {
+ ret = -EMSGSIZE;
+ goto err_free;
+ }
ret = fill_res_info(msg, device);
if (ret)
@@ -1319,7 +1333,7 @@ static int _nldev_res_get_dumpit(struct ib_device *device,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
0, NLM_F_MULTI);
- if (fill_res_info(skb, device)) {
+ if (!nlh || fill_res_info(skb, device)) {
nlmsg_cancel(skb, nlh);
goto out;
}
@@ -1454,7 +1468,7 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
RDMA_NL_GET_OP(nlh->nlmsg_type)),
0, 0);
- if (fill_nldev_handle(msg, device)) {
+ if (!nlh || fill_nldev_handle(msg, device)) {
ret = -EMSGSIZE;
goto err_free;
}
@@ -1533,7 +1547,7 @@ static int res_get_common_dumpit(struct sk_buff *skb,
RDMA_NL_GET_OP(cb->nlh->nlmsg_type)),
0, NLM_F_MULTI);
- if (fill_nldev_handle(skb, device)) {
+ if (!nlh || fill_nldev_handle(skb, device)) {
ret = -EMSGSIZE;
goto err;
}
@@ -1795,6 +1809,10 @@ static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
RDMA_NLDEV_CMD_GET_CHARDEV),
0, 0);
+ if (!nlh) {
+ err = -EMSGSIZE;
+ goto out_nlmsg;
+ }
data.nl_msg = msg;
err = ib_get_client_nl_info(ibdev, client_name, &data);
@@ -1852,6 +1870,10 @@ static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
RDMA_NLDEV_CMD_SYS_GET),
0, 0);
+ if (!nlh) {
+ nlmsg_free(msg);
+ return -EMSGSIZE;
+ }
err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
(u8)ib_devices_shared_netns);
@@ -2032,7 +2054,7 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
RDMA_NLDEV_CMD_STAT_SET),
0, 0);
- if (fill_nldev_handle(msg, device) ||
+ if (!nlh || fill_nldev_handle(msg, device) ||
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
ret = -EMSGSIZE;
goto err_free_msg;
@@ -2101,6 +2123,10 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
RDMA_NLDEV_CMD_STAT_SET),
0, 0);
+ if (!nlh) {
+ ret = -EMSGSIZE;
+ goto err_fill;
+ }
cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
@@ -2171,7 +2197,7 @@ static int stat_get_doit_default_counter(struct sk_buff *skb,
RDMA_NLDEV_CMD_STAT_GET),
0, 0);
- if (fill_nldev_handle(msg, device) ||
+ if (!nlh || fill_nldev_handle(msg, device) ||
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
ret = -EMSGSIZE;
goto err_msg;
@@ -2259,6 +2285,10 @@ static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
RDMA_NLDEV_CMD_STAT_GET),
0, 0);
+ if (!nlh) {
+ ret = -EMSGSIZE;
+ goto err_msg;
+ }
ret = rdma_counter_get_mode(device, port, &mode, &mask);
if (ret)
@@ -2391,7 +2421,7 @@ static int nldev_stat_get_counter_status_doit(struct sk_buff *skb,
0, 0);
ret = -EMSGSIZE;
- if (fill_nldev_handle(msg, device) ||
+ if (!nlh || fill_nldev_handle(msg, device) ||
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
goto err_msg;
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 1f935d9f617854..01a499a8b88dbd 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -343,8 +343,6 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
rt = &dev->res[res->type];
old = xa_erase(&rt->xa, res->id);
- if (res->type == RDMA_RESTRACK_MR)
- return;
WARN_ON(old != res);
out:
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 84c53bd2a52db9..ee59d739156899 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -1213,6 +1213,9 @@ static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num,
p->port_num = port_num;
kobject_init(&p->kobj, &port_type);
+ if (device->port_data && is_full_dev)
+ device->port_data[port_num].sysfs = p;
+
cur_group = p->groups_list;
ret = alloc_port_table_group("gids", &p->groups[0], p->attrs_list,
attr->gid_tbl_len, show_port_gid);
@@ -1258,9 +1261,6 @@ static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num,
}
list_add_tail(&p->kobj.entry, &coredev->port_list);
- if (device->port_data && is_full_dev)
- device->port_data[port_num].sysfs = p;
-
return p;
err_groups:
@@ -1268,6 +1268,8 @@ err_groups:
err_del:
kobject_del(&p->kobj);
err_put:
+ if (device->port_data && is_full_dev)
+ device->port_data[port_num].sysfs = NULL;
kobject_put(&p->kobj);
return ERR_PTR(ret);
}
@@ -1276,14 +1278,17 @@ static void destroy_port(struct ib_core_device *coredev, struct ib_port *port)
{
bool is_full_dev = &port->ibdev->coredev == coredev;
- if (port->ibdev->port_data &&
- port->ibdev->port_data[port->port_num].sysfs == port)
- port->ibdev->port_data[port->port_num].sysfs = NULL;
list_del(&port->kobj.entry);
if (is_full_dev)
sysfs_remove_groups(&port->kobj, port->ibdev->ops.port_groups);
+
sysfs_remove_groups(&port->kobj, port->groups_list);
kobject_del(&port->kobj);
+
+ if (port->ibdev->port_data &&
+ port->ibdev->port_data[port->port_num].sysfs == port)
+ port->ibdev->port_data[port->port_num].sysfs = NULL;
+
kobject_put(&port->kobj);
}
diff --git a/drivers/infiniband/core/uverbs_std_types_qp.c b/drivers/infiniband/core/uverbs_std_types_qp.c
index dd1075466f61be..7b4773fa4bc0bd 100644
--- a/drivers/infiniband/core/uverbs_std_types_qp.c
+++ b/drivers/infiniband/core/uverbs_std_types_qp.c
@@ -163,7 +163,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QP_CREATE)(
UVERBS_ATTR_CREATE_QP_SRQ_HANDLE))
return -EINVAL;
- /* send_cq is optinal */
+ /* send_cq is optional */
if (cap.max_send_wr) {
send_cq = uverbs_attr_get_obj(attrs,
UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE);
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index 6b3a88046125ad..1211f4317a9f4f 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_INFINIBAND_QIB) += qib/
obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/
obj-$(CONFIG_INFINIBAND_EFA) += efa/
obj-$(CONFIG_INFINIBAND_IRDMA) += irdma/
+obj-$(CONFIG_MANA_INFINIBAND) += mana/
obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/
obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/
diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h
index 730783fbc89492..3d8c11aa23a26f 100644
--- a/drivers/infiniband/hw/erdma/erdma.h
+++ b/drivers/infiniband/hw/erdma/erdma.h
@@ -124,6 +124,7 @@ struct erdma_devattr {
u32 fw_version;
unsigned char peer_addr[ETH_ALEN];
+ unsigned long cap_flags;
int numa_node;
enum erdma_cc_alg cc;
@@ -189,6 +190,7 @@ struct erdma_dev {
struct net_device *netdev;
struct pci_dev *pdev;
struct notifier_block netdev_nb;
+ struct workqueue_struct *reflush_wq;
resource_size_t func_bar_addr;
resource_size_t func_bar_len;
@@ -218,7 +220,7 @@ struct erdma_dev {
DECLARE_BITMAP(sdb_page, ERDMA_DWQE_TYPE0_CNT);
/*
* We provide max 496 uContexts that each has one SQ normal Db,
- * and one directWQE db。
+ * and one directWQE db.
*/
DECLARE_BITMAP(sdb_entry, ERDMA_DWQE_TYPE1_CNT);
diff --git a/drivers/infiniband/hw/erdma/erdma_cq.c b/drivers/infiniband/hw/erdma/erdma_cq.c
index 58e0dc5c75d1d2..cabd8678b35589 100644
--- a/drivers/infiniband/hw/erdma/erdma_cq.c
+++ b/drivers/infiniband/hw/erdma/erdma_cq.c
@@ -64,6 +64,8 @@ static const enum ib_wc_opcode wc_mapping_table[ERDMA_NUM_OPCODES] = {
[ERDMA_OP_REG_MR] = IB_WC_REG_MR,
[ERDMA_OP_LOCAL_INV] = IB_WC_LOCAL_INV,
[ERDMA_OP_READ_WITH_INV] = IB_WC_RDMA_READ,
+ [ERDMA_OP_ATOMIC_CAS] = IB_WC_COMP_SWAP,
+ [ERDMA_OP_ATOMIC_FAD] = IB_WC_FETCH_ADD,
};
static const struct {
diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h
index e788887732e1f5..ab371fec610c32 100644
--- a/drivers/infiniband/hw/erdma/erdma_hw.h
+++ b/drivers/infiniband/hw/erdma/erdma_hw.h
@@ -145,6 +145,7 @@ enum CMDQ_RDMA_OPCODE {
CMDQ_OPCODE_MODIFY_QP = 3,
CMDQ_OPCODE_CREATE_CQ = 4,
CMDQ_OPCODE_DESTROY_CQ = 5,
+ CMDQ_OPCODE_REFLUSH = 6,
CMDQ_OPCODE_REG_MR = 8,
CMDQ_OPCODE_DEREG_MR = 9
};
@@ -224,8 +225,7 @@ struct erdma_cmdq_create_cq_req {
/* regmr cfg1 */
#define ERDMA_CMD_REGMR_PD_MASK GENMASK(31, 12)
#define ERDMA_CMD_REGMR_TYPE_MASK GENMASK(7, 6)
-#define ERDMA_CMD_REGMR_RIGHT_MASK GENMASK(5, 2)
-#define ERDMA_CMD_REGMR_ACC_MODE_MASK GENMASK(1, 0)
+#define ERDMA_CMD_REGMR_RIGHT_MASK GENMASK(5, 1)
/* regmr cfg2 */
#define ERDMA_CMD_REGMR_PAGESIZE_MASK GENMASK(31, 27)
@@ -302,8 +302,16 @@ struct erdma_cmdq_destroy_qp_req {
u32 qpn;
};
+struct erdma_cmdq_reflush_req {
+ u64 hdr;
+ u32 qpn;
+ u32 sq_pi;
+ u32 rq_pi;
+};
+
/* cap qword 0 definition */
#define ERDMA_CMD_DEV_CAP_MAX_CQE_MASK GENMASK_ULL(47, 40)
+#define ERDMA_CMD_DEV_CAP_FLAGS_MASK GENMASK_ULL(31, 24)
#define ERDMA_CMD_DEV_CAP_MAX_RECV_WR_MASK GENMASK_ULL(23, 16)
#define ERDMA_CMD_DEV_CAP_MAX_MR_SIZE_MASK GENMASK_ULL(7, 0)
@@ -315,6 +323,10 @@ struct erdma_cmdq_destroy_qp_req {
#define ERDMA_NQP_PER_QBLOCK 1024
+enum {
+ ERDMA_DEV_CAP_FLAGS_ATOMIC = 1 << 7,
+};
+
#define ERDMA_CMD_INFO0_FW_VER_MASK GENMASK_ULL(31, 0)
/* CQE hdr */
@@ -340,9 +352,9 @@ struct erdma_cqe {
};
struct erdma_sge {
- __aligned_le64 laddr;
+ __aligned_le64 addr;
__le32 length;
- __le32 lkey;
+ __le32 key;
};
/* Receive Queue Element */
@@ -370,8 +382,7 @@ struct erdma_rqe {
#define ERDMA_SQE_HDR_WQEBB_INDEX_MASK GENMASK_ULL(15, 0)
/* REG MR attrs */
-#define ERDMA_SQE_MR_MODE_MASK GENMASK(1, 0)
-#define ERDMA_SQE_MR_ACCESS_MASK GENMASK(5, 2)
+#define ERDMA_SQE_MR_ACCESS_MASK GENMASK(5, 1)
#define ERDMA_SQE_MR_MTT_TYPE_MASK GENMASK(7, 6)
#define ERDMA_SQE_MR_MTT_CNT_MASK GENMASK(31, 12)
@@ -410,6 +421,16 @@ struct erdma_readreq_sqe {
__le32 rsvd;
};
+struct erdma_atomic_sqe {
+ __le64 hdr;
+ __le64 rsvd;
+ __le64 fetchadd_swap_data;
+ __le64 cmp_data;
+
+ struct erdma_sge remote;
+ struct erdma_sge sgl;
+};
+
struct erdma_reg_mr_sqe {
__le64 hdr;
__le64 addr;
@@ -469,7 +490,9 @@ enum erdma_opcode {
ERDMA_OP_REG_MR = 14,
ERDMA_OP_LOCAL_INV = 15,
ERDMA_OP_READ_WITH_INV = 16,
- ERDMA_NUM_OPCODES = 17,
+ ERDMA_OP_ATOMIC_CAS = 17,
+ ERDMA_OP_ATOMIC_FAD = 18,
+ ERDMA_NUM_OPCODES = 19,
ERDMA_OP_INVALID = ERDMA_NUM_OPCODES + 1
};
diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c
index 49778bb294ae43..5dc31e5df5cba7 100644
--- a/drivers/infiniband/hw/erdma/erdma_main.c
+++ b/drivers/infiniband/hw/erdma/erdma_main.c
@@ -374,6 +374,7 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev)
dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1);
dev->attrs.max_mr = dev->attrs.max_qp << 1;
dev->attrs.max_cq = dev->attrs.max_qp << 1;
+ dev->attrs.cap_flags = ERDMA_GET_CAP(FLAGS, cap0);
dev->attrs.max_send_wr = ERDMA_MAX_SEND_WR;
dev->attrs.max_ord = ERDMA_MAX_ORD;
@@ -520,13 +521,22 @@ static int erdma_ib_device_add(struct pci_dev *pdev)
u64_to_ether_addr(mac, dev->attrs.peer_addr);
+ dev->reflush_wq = alloc_workqueue("erdma-reflush-wq", WQ_UNBOUND,
+ WQ_UNBOUND_MAX_ACTIVE);
+ if (!dev->reflush_wq) {
+ ret = -ENOMEM;
+ goto err_alloc_workqueue;
+ }
+
ret = erdma_device_register(dev);
if (ret)
- goto err_out;
+ goto err_register;
return 0;
-err_out:
+err_register:
+ destroy_workqueue(dev->reflush_wq);
+err_alloc_workqueue:
xa_destroy(&dev->qp_xa);
xa_destroy(&dev->cq_xa);
@@ -542,6 +552,7 @@ static void erdma_ib_device_remove(struct pci_dev *pdev)
unregister_netdevice_notifier(&dev->netdev_nb);
ib_unregister_device(&dev->ibdev);
+ destroy_workqueue(dev->reflush_wq);
erdma_res_cb_free(dev);
xa_destroy(&dev->qp_xa);
xa_destroy(&dev->cq_xa);
diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c
index 5fe1a339a43543..d088d6bef431af 100644
--- a/drivers/infiniband/hw/erdma/erdma_qp.c
+++ b/drivers/infiniband/hw/erdma/erdma_qp.c
@@ -120,6 +120,7 @@ static int erdma_modify_qp_state_to_stop(struct erdma_qp *qp,
int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
enum erdma_qp_attr_mask mask)
{
+ bool need_reflush = false;
int drop_conn, ret = 0;
if (!mask)
@@ -135,6 +136,7 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
ret = erdma_modify_qp_state_to_rts(qp, attrs, mask);
} else if (attrs->state == ERDMA_QP_STATE_ERROR) {
qp->attrs.state = ERDMA_QP_STATE_ERROR;
+ need_reflush = true;
if (qp->cep) {
erdma_cep_put(qp->cep);
qp->cep = NULL;
@@ -145,17 +147,12 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
case ERDMA_QP_STATE_RTS:
drop_conn = 0;
- if (attrs->state == ERDMA_QP_STATE_CLOSING) {
+ if (attrs->state == ERDMA_QP_STATE_CLOSING ||
+ attrs->state == ERDMA_QP_STATE_TERMINATE ||
+ attrs->state == ERDMA_QP_STATE_ERROR) {
ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
drop_conn = 1;
- } else if (attrs->state == ERDMA_QP_STATE_TERMINATE) {
- qp->attrs.state = ERDMA_QP_STATE_TERMINATE;
- ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
- drop_conn = 1;
- } else if (attrs->state == ERDMA_QP_STATE_ERROR) {
- ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
- qp->attrs.state = ERDMA_QP_STATE_ERROR;
- drop_conn = 1;
+ need_reflush = true;
}
if (drop_conn)
@@ -180,6 +177,12 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
break;
}
+ if (need_reflush && !ret && rdma_is_kernel_res(&qp->ibqp.res)) {
+ qp->flags |= ERDMA_QP_IN_FLUSHING;
+ mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
+ usecs_to_jiffies(100));
+ }
+
return ret;
}
@@ -285,15 +288,16 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
u32 wqe_size, wqebb_cnt, hw_op, flags, sgl_offset;
u32 idx = *pi & (qp->attrs.sq_size - 1);
enum ib_wr_opcode op = send_wr->opcode;
+ struct erdma_atomic_sqe *atomic_sqe;
struct erdma_readreq_sqe *read_sqe;
struct erdma_reg_mr_sqe *regmr_sge;
struct erdma_write_sqe *write_sqe;
struct erdma_send_sqe *send_sqe;
struct ib_rdma_wr *rdma_wr;
- struct erdma_mr *mr;
+ struct erdma_sge *sge;
__le32 *length_field;
+ struct erdma_mr *mr;
u64 wqe_hdr, *entry;
- struct ib_sge *sge;
u32 attrs;
int ret;
@@ -360,9 +364,9 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
qp->attrs.sq_size, SQEBB_SHIFT);
- sge->addr = rdma_wr->remote_addr;
- sge->lkey = rdma_wr->rkey;
- sge->length = send_wr->sg_list[0].length;
+ sge->addr = cpu_to_le64(rdma_wr->remote_addr);
+ sge->key = cpu_to_le32(rdma_wr->rkey);
+ sge->length = cpu_to_le32(send_wr->sg_list[0].length);
wqe_size = sizeof(struct erdma_readreq_sqe) +
send_wr->num_sge * sizeof(struct ib_sge);
@@ -397,8 +401,7 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
regmr_sge->addr = cpu_to_le64(mr->ibmr.iova);
regmr_sge->length = cpu_to_le32(mr->ibmr.length);
regmr_sge->stag = cpu_to_le32(reg_wr(send_wr)->key);
- attrs = FIELD_PREP(ERDMA_SQE_MR_MODE_MASK, 0) |
- FIELD_PREP(ERDMA_SQE_MR_ACCESS_MASK, mr->access) |
+ attrs = FIELD_PREP(ERDMA_SQE_MR_ACCESS_MASK, mr->access) |
FIELD_PREP(ERDMA_SQE_MR_MTT_CNT_MASK,
mr->mem.mtt_nents);
@@ -424,6 +427,35 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
regmr_sge->stag = cpu_to_le32(send_wr->ex.invalidate_rkey);
wqe_size = sizeof(struct erdma_reg_mr_sqe);
goto out;
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ atomic_sqe = (struct erdma_atomic_sqe *)entry;
+ if (op == IB_WR_ATOMIC_CMP_AND_SWP) {
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
+ ERDMA_OP_ATOMIC_CAS);
+ atomic_sqe->fetchadd_swap_data =
+ cpu_to_le64(atomic_wr(send_wr)->swap);
+ atomic_sqe->cmp_data =
+ cpu_to_le64(atomic_wr(send_wr)->compare_add);
+ } else {
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
+ ERDMA_OP_ATOMIC_FAD);
+ atomic_sqe->fetchadd_swap_data =
+ cpu_to_le64(atomic_wr(send_wr)->compare_add);
+ }
+
+ sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
+ qp->attrs.sq_size, SQEBB_SHIFT);
+ sge->addr = cpu_to_le64(atomic_wr(send_wr)->remote_addr);
+ sge->key = cpu_to_le32(atomic_wr(send_wr)->rkey);
+ sge++;
+
+ sge->addr = cpu_to_le64(send_wr->sg_list[0].addr);
+ sge->key = cpu_to_le32(send_wr->sg_list[0].lkey);
+ sge->length = cpu_to_le32(send_wr->sg_list[0].length);
+
+ wqe_size = sizeof(*atomic_sqe);
+ goto out;
default:
return -EOPNOTSUPP;
}
@@ -498,6 +530,10 @@ int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
}
spin_unlock_irqrestore(&qp->lock, flags);
+ if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING))
+ mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
+ usecs_to_jiffies(100));
+
return ret;
}
@@ -551,5 +587,10 @@ int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
}
spin_unlock_irqrestore(&qp->lock, flags);
+
+ if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING))
+ mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
+ usecs_to_jiffies(100));
+
return ret;
}
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
index 62be98e2b94142..5dab1e87975ba2 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.c
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
@@ -118,8 +118,7 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, mr->ibmr.lkey >> 8);
req.cfg1 = FIELD_PREP(ERDMA_CMD_REGMR_PD_MASK, pd->pdn) |
FIELD_PREP(ERDMA_CMD_REGMR_TYPE_MASK, mr->type) |
- FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access) |
- FIELD_PREP(ERDMA_CMD_REGMR_ACC_MODE_MASK, 0);
+ FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access);
req.cfg2 = FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK,
ilog2(mr->mem.page_size)) |
FIELD_PREP(ERDMA_CMD_REGMR_MTT_TYPE_MASK, mr->mem.mtt_type) |
@@ -289,6 +288,10 @@ int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
attr->max_mw = dev->attrs.max_mw;
attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA;
attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT;
+
+ if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_ATOMIC)
+ attr->atomic_cap = IB_ATOMIC_GLOB;
+
attr->fw_ver = dev->attrs.fw_version;
if (dev->netdev)
@@ -376,6 +379,21 @@ int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
return 0;
}
+static void erdma_flush_worker(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct erdma_qp *qp =
+ container_of(dwork, struct erdma_qp, reflush_dwork);
+ struct erdma_cmdq_reflush_req req;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_REFLUSH);
+ req.qpn = QP_ID(qp);
+ req.sq_pi = qp->kern_qp.sq_pi;
+ req.rq_pi = qp->kern_qp.rq_pi;
+ erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, NULL);
+}
+
static int erdma_qp_validate_cap(struct erdma_dev *dev,
struct ib_qp_init_attr *attrs)
{
@@ -732,6 +750,7 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
qp->attrs.max_send_sge = attrs->cap.max_send_sge;
qp->attrs.max_recv_sge = attrs->cap.max_recv_sge;
qp->attrs.state = ERDMA_QP_STATE_IDLE;
+ INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker);
ret = create_qp_cmd(dev, qp);
if (ret)
@@ -1025,6 +1044,8 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE);
up_write(&qp->state_lock);
+ cancel_delayed_work_sync(&qp->reflush_dwork);
+
erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
CMDQ_OPCODE_DESTROY_QP);
req.qpn = QP_ID(qp);
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h
index ab6380635e9e64..e0a993bc032a44 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.h
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.h
@@ -71,16 +71,18 @@ struct erdma_pd {
#define ERDMA_MR_INLINE_MTT 0
#define ERDMA_MR_INDIRECT_MTT 1
-#define ERDMA_MR_ACC_LR BIT(0)
-#define ERDMA_MR_ACC_LW BIT(1)
-#define ERDMA_MR_ACC_RR BIT(2)
-#define ERDMA_MR_ACC_RW BIT(3)
+#define ERDMA_MR_ACC_RA BIT(0)
+#define ERDMA_MR_ACC_LR BIT(1)
+#define ERDMA_MR_ACC_LW BIT(2)
+#define ERDMA_MR_ACC_RR BIT(3)
+#define ERDMA_MR_ACC_RW BIT(4)
static inline u8 to_erdma_access_flags(int access)
{
return (access & IB_ACCESS_REMOTE_READ ? ERDMA_MR_ACC_RR : 0) |
(access & IB_ACCESS_LOCAL_WRITE ? ERDMA_MR_ACC_LW : 0) |
- (access & IB_ACCESS_REMOTE_WRITE ? ERDMA_MR_ACC_RW : 0);
+ (access & IB_ACCESS_REMOTE_WRITE ? ERDMA_MR_ACC_RW : 0) |
+ (access & IB_ACCESS_REMOTE_ATOMIC ? ERDMA_MR_ACC_RA : 0);
}
struct erdma_mem {
@@ -171,6 +173,10 @@ enum erdma_qp_attr_mask {
ERDMA_QP_ATTR_MPA = (1 << 7)
};
+enum erdma_qp_flags {
+ ERDMA_QP_IN_FLUSHING = (1 << 0),
+};
+
struct erdma_qp_attrs {
enum erdma_qp_state state;
enum erdma_cc_alg cc; /* Congestion control algorithm */
@@ -195,6 +201,9 @@ struct erdma_qp {
struct erdma_cep *cep;
struct rw_semaphore state_lock;
+ unsigned long flags;
+ struct delayed_work reflush_dwork;
+
union {
struct erdma_kqp kern_qp;
struct erdma_uqp user_qp;
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 877f8e84a672a4..77ee77d4000fbf 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -177,6 +177,8 @@ out:
for (node = 0; node < node_affinity.num_possible_nodes; node++)
hfi1_per_node_cntr[node] = 1;
+ pci_dev_put(dev);
+
return 0;
}
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 8e71bef9d98265..bcc6bc0540f033 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -112,7 +112,7 @@ static int hfi1_caps_get(char *buffer, const struct kernel_param *kp)
cap_mask &= ~HFI1_CAP_LOCKED_SMASK;
cap_mask |= ((cap_mask & HFI1_CAP_K2U) << HFI1_CAP_USER_SHIFT);
- return scnprintf(buffer, PAGE_SIZE, "0x%lx", cap_mask);
+ return sysfs_emit(buffer, "0x%lx\n", cap_mask);
}
struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi)
diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index 1d77514ebbee0d..0c0cef5b1e0e55 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
@@ -1743,6 +1743,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
if (!dd->platform_config.data) {
dd_dev_err(dd, "%s: Missing config file\n", __func__);
+ ret = -EINVAL;
goto bail;
}
ptr = (u32 *)dd->platform_config.data;
@@ -1751,6 +1752,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
ptr++;
if (magic_num != PLATFORM_CONFIG_MAGIC_NUM) {
dd_dev_err(dd, "%s: Bad config file\n", __func__);
+ ret = -EINVAL;
goto bail;
}
@@ -1774,6 +1776,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
if (file_length > dd->platform_config.size) {
dd_dev_info(dd, "%s:File claims to be larger than read size\n",
__func__);
+ ret = -EINVAL;
goto bail;
} else if (file_length < dd->platform_config.size) {
dd_dev_info(dd,
@@ -1794,6 +1797,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
dd_dev_err(dd, "%s: Failed validation at offset %ld\n",
__func__, (ptr - (u32 *)
dd->platform_config.data));
+ ret = -EINVAL;
goto bail;
}
@@ -1837,6 +1841,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
__func__, table_type,
(ptr - (u32 *)
dd->platform_config.data));
+ ret = -EINVAL;
goto bail; /* We don't trust this file now */
}
pcfgcache->config_tables[table_type].table = ptr;
@@ -1856,6 +1861,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
__func__, table_type,
(ptr -
(u32 *)dd->platform_config.data));
+ ret = -EINVAL;
goto bail; /* We don't trust this file now */
}
pcfgcache->config_tables[table_type].table_metadata =
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 4146a2113a9540..e5e783c45810b2 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -2437,9 +2437,9 @@ struct opa_port_data_counters_msg {
__be64 port_vl_xmit_wait_data;
__be64 port_vl_rcv_bubble;
__be64 port_vl_mark_fecn;
- } vls[0];
+ } vls[];
/* array size defined by #bits set in vl_select_mask*/
- } port[1]; /* array size defined by #ports in attribute modifier */
+ } port;
};
struct opa_port_error_counters64_msg {
@@ -2470,9 +2470,9 @@ struct opa_port_error_counters64_msg {
u8 reserved3[7];
struct _vls_ectrs {
__be64 port_vl_xmit_discards;
- } vls[0];
+ } vls[];
/* array size defined by #bits set in vl_select_mask */
- } port[1]; /* array size defined by #ports in attribute modifier */
+ } port;
};
struct opa_port_error_info_msg {
@@ -2543,7 +2543,7 @@ struct opa_port_error_info_msg {
u8 error_info;
} __packed fm_config_ei;
__u32 reserved9;
- } port[1]; /* actual array size defined by #ports in attr modifier */
+ } port;
};
/* opa_port_error_info_msg error_info_select_mask bit definitions */
@@ -2966,7 +2966,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
}
/* Sanity check */
- response_data_size = struct_size(req, port[0].vls, num_vls);
+ response_data_size = struct_size(req, port.vls, num_vls);
if (response_data_size > sizeof(pmp->data)) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -2986,7 +2986,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
return reply((struct ib_mad_hdr *)pmp);
}
- rsp = &req->port[0];
+ rsp = &req->port;
memset(rsp, 0, sizeof(*rsp));
rsp->port_number = port;
@@ -3182,7 +3182,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
return reply((struct ib_mad_hdr *)pmp);
}
- response_data_size = struct_size(req, port[0].vls, num_vls);
+ response_data_size = struct_size(req, port.vls, num_vls);
if (response_data_size > sizeof(pmp->data)) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -3201,7 +3201,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
return reply((struct ib_mad_hdr *)pmp);
}
- rsp = &req->port[0];
+ rsp = &req->port;
ibp = to_iport(ibdev, port_num);
ppd = ppd_from_ibp(ibp);
@@ -3340,7 +3340,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
u64 reg;
req = (struct opa_port_error_info_msg *)pmp->data;
- rsp = &req->port[0];
+ rsp = &req->port;
num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
@@ -3590,7 +3590,7 @@ static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
u32 error_info_select;
req = (struct opa_port_error_info_msg *)pmp->data;
- rsp = &req->port[0];
+ rsp = &req->port;
num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c
index 3dfa5aff251256..720d4c85c9c93f 100644
--- a/drivers/infiniband/hw/hfi1/netdev_rx.c
+++ b/drivers/infiniband/hw/hfi1/netdev_rx.c
@@ -216,7 +216,7 @@ static int hfi1_netdev_rxq_init(struct hfi1_netdev_rx *rx)
* right now.
*/
set_bit(NAPI_STATE_NO_BUSY_POLL, &rxq->napi.state);
- netif_napi_add_weight(dev, &rxq->napi, hfi1_netdev_rx_napi, 64);
+ netif_napi_add(dev, &rxq->napi, hfi1_netdev_rx_napi);
rc = msix_netdev_request_rcd_irq(rxq->rcd);
if (rc)
goto bail_context_irq_failure;
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 723e55a7de8d9c..f701cc86896b38 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -202,6 +202,7 @@ struct hns_roce_ucontext {
struct list_head page_list;
struct mutex page_mutex;
struct hns_user_mmap_entry *db_mmap_entry;
+ u32 config;
};
struct hns_roce_pd {
@@ -334,6 +335,7 @@ struct hns_roce_wq {
u32 head;
u32 tail;
void __iomem *db_reg;
+ u32 ext_sge_cnt;
};
struct hns_roce_sge {
@@ -635,6 +637,7 @@ struct hns_roce_qp {
struct list_head rq_node; /* all recv qps are on a list */
struct list_head sq_node; /* all send qps are on a list */
struct hns_user_mmap_entry *dwqe_mmap_entry;
+ u32 config;
};
struct hns_roce_ib_iboe {
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 1435fe2ea176f3..b2421883993b10 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -192,7 +192,6 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
unsigned int *sge_idx, u32 msg_len)
{
struct ib_device *ibdev = &(to_hr_dev(qp->ibqp.device))->ib_dev;
- unsigned int ext_sge_sz = qp->sq.max_gs * HNS_ROCE_SGE_SIZE;
unsigned int left_len_in_pg;
unsigned int idx = *sge_idx;
unsigned int i = 0;
@@ -200,7 +199,7 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
void *addr;
void *dseg;
- if (msg_len > ext_sge_sz) {
+ if (msg_len > qp->sq.ext_sge_cnt * HNS_ROCE_SGE_SIZE) {
ibdev_err(ibdev,
"no enough extended sge space for inline data.\n");
return -EINVAL;
@@ -1274,6 +1273,30 @@ static void update_cmdq_status(struct hns_roce_dev *hr_dev)
hr_dev->cmd.state = HNS_ROCE_CMDQ_STATE_FATAL_ERR;
}
+static int hns_roce_cmd_err_convert_errno(u16 desc_ret)
+{
+ struct hns_roce_cmd_errcode errcode_table[] = {
+ {CMD_EXEC_SUCCESS, 0},
+ {CMD_NO_AUTH, -EPERM},
+ {CMD_NOT_EXIST, -EOPNOTSUPP},
+ {CMD_CRQ_FULL, -EXFULL},
+ {CMD_NEXT_ERR, -ENOSR},
+ {CMD_NOT_EXEC, -ENOTBLK},
+ {CMD_PARA_ERR, -EINVAL},
+ {CMD_RESULT_ERR, -ERANGE},
+ {CMD_TIMEOUT, -ETIME},
+ {CMD_HILINK_ERR, -ENOLINK},
+ {CMD_INFO_ILLEGAL, -ENXIO},
+ {CMD_INVALID, -EBADR},
+ };
+ u16 i;
+
+ for (i = 0; i < ARRAY_SIZE(errcode_table); i++)
+ if (desc_ret == errcode_table[i].return_status)
+ return errcode_table[i].errno;
+ return -EIO;
+}
+
static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
struct hns_roce_cmq_desc *desc, int num)
{
@@ -1319,7 +1342,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
dev_err_ratelimited(hr_dev->dev,
"Cmdq IO error, opcode = 0x%x, return = 0x%x.\n",
desc->opcode, desc_ret);
- ret = -EIO;
+ ret = hns_roce_cmd_err_convert_errno(desc_ret);
}
} else {
/* FW/HW reset or incorrect number of desc */
@@ -2024,13 +2047,14 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW |
HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR |
- HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL | HNS_ROCE_CAP_FLAG_XRC;
+ HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL;
caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
caps->flags |= HNS_ROCE_CAP_FLAG_STASH |
- HNS_ROCE_CAP_FLAG_DIRECT_WQE;
+ HNS_ROCE_CAP_FLAG_DIRECT_WQE |
+ HNS_ROCE_CAP_FLAG_XRC;
caps->max_sq_inline = HNS_ROCE_V3_MAX_SQ_INLINE;
} else {
caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE;
@@ -2342,6 +2366,9 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
caps->wqe_sge_hop_num = hr_reg_read(resp_d, PF_CAPS_D_EX_SGE_HOP_NUM);
caps->wqe_rq_hop_num = hr_reg_read(resp_d, PF_CAPS_D_RQWQE_HOP_NUM);
+ if (!(caps->page_size_cap & PAGE_SIZE))
+ caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED;
+
return 0;
}
@@ -2631,31 +2658,124 @@ static void free_dip_list(struct hns_roce_dev *hr_dev)
spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags);
}
-static void free_mr_exit(struct hns_roce_dev *hr_dev)
+static struct ib_pd *free_mr_init_pd(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_pd *hr_pd;
+ struct ib_pd *pd;
+
+ hr_pd = kzalloc(sizeof(*hr_pd), GFP_KERNEL);
+ if (ZERO_OR_NULL_PTR(hr_pd))
+ return NULL;
+ pd = &hr_pd->ibpd;
+ pd->device = ibdev;
+
+ if (hns_roce_alloc_pd(pd, NULL)) {
+ ibdev_err(ibdev, "failed to create pd for free mr.\n");
+ kfree(hr_pd);
+ return NULL;
+ }
+ free_mr->rsv_pd = to_hr_pd(pd);
+ free_mr->rsv_pd->ibpd.device = &hr_dev->ib_dev;
+ free_mr->rsv_pd->ibpd.uobject = NULL;
+ free_mr->rsv_pd->ibpd.__internal_mr = NULL;
+ atomic_set(&free_mr->rsv_pd->ibpd.usecnt, 0);
+
+ return pd;
+}
+
+static struct ib_cq *free_mr_init_cq(struct hns_roce_dev *hr_dev)
{
struct hns_roce_v2_priv *priv = hr_dev->priv;
struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct ib_cq_init_attr cq_init_attr = {};
+ struct hns_roce_cq *hr_cq;
+ struct ib_cq *cq;
+
+ cq_init_attr.cqe = HNS_ROCE_FREE_MR_USED_CQE_NUM;
+
+ hr_cq = kzalloc(sizeof(*hr_cq), GFP_KERNEL);
+ if (ZERO_OR_NULL_PTR(hr_cq))
+ return NULL;
+
+ cq = &hr_cq->ib_cq;
+ cq->device = ibdev;
+
+ if (hns_roce_create_cq(cq, &cq_init_attr, NULL)) {
+ ibdev_err(ibdev, "failed to create cq for free mr.\n");
+ kfree(hr_cq);
+ return NULL;
+ }
+ free_mr->rsv_cq = to_hr_cq(cq);
+ free_mr->rsv_cq->ib_cq.device = &hr_dev->ib_dev;
+ free_mr->rsv_cq->ib_cq.uobject = NULL;
+ free_mr->rsv_cq->ib_cq.comp_handler = NULL;
+ free_mr->rsv_cq->ib_cq.event_handler = NULL;
+ free_mr->rsv_cq->ib_cq.cq_context = NULL;
+ atomic_set(&free_mr->rsv_cq->ib_cq.usecnt, 0);
+
+ return cq;
+}
+
+static int free_mr_init_qp(struct hns_roce_dev *hr_dev, struct ib_cq *cq,
+ struct ib_qp_init_attr *init_attr, int i)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_qp *hr_qp;
+ struct ib_qp *qp;
int ret;
+
+ hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
+ if (ZERO_OR_NULL_PTR(hr_qp))
+ return -ENOMEM;
+
+ qp = &hr_qp->ibqp;
+ qp->device = ibdev;
+
+ ret = hns_roce_create_qp(qp, init_attr, NULL);
+ if (ret) {
+ ibdev_err(ibdev, "failed to create qp for free mr.\n");
+ kfree(hr_qp);
+ return ret;
+ }
+
+ free_mr->rsv_qp[i] = hr_qp;
+ free_mr->rsv_qp[i]->ibqp.recv_cq = cq;
+ free_mr->rsv_qp[i]->ibqp.send_cq = cq;
+
+ return 0;
+}
+
+static void free_mr_exit(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_qp *qp;
int i;
for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
if (free_mr->rsv_qp[i]) {
- ret = ib_destroy_qp(free_mr->rsv_qp[i]);
- if (ret)
- ibdev_err(&hr_dev->ib_dev,
- "failed to destroy qp in free mr.\n");
-
+ qp = &free_mr->rsv_qp[i]->ibqp;
+ hns_roce_v2_destroy_qp(qp, NULL);
+ kfree(free_mr->rsv_qp[i]);
free_mr->rsv_qp[i] = NULL;
}
}
if (free_mr->rsv_cq) {
- ib_destroy_cq(free_mr->rsv_cq);
+ hns_roce_destroy_cq(&free_mr->rsv_cq->ib_cq, NULL);
+ kfree(free_mr->rsv_cq);
free_mr->rsv_cq = NULL;
}
if (free_mr->rsv_pd) {
- ib_dealloc_pd(free_mr->rsv_pd);
+ hns_roce_dealloc_pd(&free_mr->rsv_pd->ibpd, NULL);
+ kfree(free_mr->rsv_pd);
free_mr->rsv_pd = NULL;
}
}
@@ -2664,55 +2784,46 @@ static int free_mr_alloc_res(struct hns_roce_dev *hr_dev)
{
struct hns_roce_v2_priv *priv = hr_dev->priv;
struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
- struct ib_device *ibdev = &hr_dev->ib_dev;
- struct ib_cq_init_attr cq_init_attr = {};
struct ib_qp_init_attr qp_init_attr = {};
struct ib_pd *pd;
struct ib_cq *cq;
- struct ib_qp *qp;
int ret;
int i;
- pd = ib_alloc_pd(ibdev, 0);
- if (IS_ERR(pd)) {
- ibdev_err(ibdev, "failed to create pd for free mr.\n");
- return PTR_ERR(pd);
- }
- free_mr->rsv_pd = pd;
+ pd = free_mr_init_pd(hr_dev);
+ if (!pd)
+ return -ENOMEM;
- cq_init_attr.cqe = HNS_ROCE_FREE_MR_USED_CQE_NUM;
- cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_init_attr);
- if (IS_ERR(cq)) {
- ibdev_err(ibdev, "failed to create cq for free mr.\n");
- ret = PTR_ERR(cq);
- goto create_failed;
+ cq = free_mr_init_cq(hr_dev);
+ if (!cq) {
+ ret = -ENOMEM;
+ goto create_failed_cq;
}
- free_mr->rsv_cq = cq;
qp_init_attr.qp_type = IB_QPT_RC;
qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
- qp_init_attr.send_cq = free_mr->rsv_cq;
- qp_init_attr.recv_cq = free_mr->rsv_cq;
+ qp_init_attr.send_cq = cq;
+ qp_init_attr.recv_cq = cq;
for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
qp_init_attr.cap.max_send_wr = HNS_ROCE_FREE_MR_USED_SQWQE_NUM;
qp_init_attr.cap.max_send_sge = HNS_ROCE_FREE_MR_USED_SQSGE_NUM;
qp_init_attr.cap.max_recv_wr = HNS_ROCE_FREE_MR_USED_RQWQE_NUM;
qp_init_attr.cap.max_recv_sge = HNS_ROCE_FREE_MR_USED_RQSGE_NUM;
- qp = ib_create_qp(free_mr->rsv_pd, &qp_init_attr);
- if (IS_ERR(qp)) {
- ibdev_err(ibdev, "failed to create qp for free mr.\n");
- ret = PTR_ERR(qp);
- goto create_failed;
- }
-
- free_mr->rsv_qp[i] = qp;
+ ret = free_mr_init_qp(hr_dev, cq, &qp_init_attr, i);
+ if (ret)
+ goto create_failed_qp;
}
return 0;
-create_failed:
- free_mr_exit(hr_dev);
+create_failed_qp:
+ hns_roce_destroy_cq(cq, NULL);
+ kfree(cq);
+
+create_failed_cq:
+ hns_roce_dealloc_pd(pd, NULL);
+ kfree(pd);
return ret;
}
@@ -2728,14 +2839,17 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev,
int mask;
int ret;
- hr_qp = to_hr_qp(free_mr->rsv_qp[sl_num]);
+ hr_qp = to_hr_qp(&free_mr->rsv_qp[sl_num]->ibqp);
hr_qp->free_mr_en = 1;
+ hr_qp->ibqp.device = ibdev;
+ hr_qp->ibqp.qp_type = IB_QPT_RC;
mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS;
attr->qp_state = IB_QPS_INIT;
attr->port_num = 1;
attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
- ret = ib_modify_qp(&hr_qp->ibqp, attr, mask);
+ ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT,
+ IB_QPS_INIT);
if (ret) {
ibdev_err(ibdev, "failed to modify qp to init, ret = %d.\n",
ret);
@@ -2756,7 +2870,8 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev,
rdma_ah_set_sl(&attr->ah_attr, (u8)sl_num);
- ret = ib_modify_qp(&hr_qp->ibqp, attr, mask);
+ ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT,
+ IB_QPS_RTR);
hr_dev->loop_idc = loopback;
if (ret) {
ibdev_err(ibdev, "failed to modify qp to rtr, ret = %d.\n",
@@ -2770,7 +2885,8 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev,
attr->sq_psn = HNS_ROCE_FREE_MR_USED_PSN;
attr->retry_cnt = HNS_ROCE_FREE_MR_USED_QP_RETRY_CNT;
attr->timeout = HNS_ROCE_FREE_MR_USED_QP_TIMEOUT;
- ret = ib_modify_qp(&hr_qp->ibqp, attr, mask);
+ ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_RTR,
+ IB_QPS_RTS);
if (ret)
ibdev_err(ibdev, "failed to modify qp to rts, ret = %d.\n",
ret);
@@ -3186,7 +3302,8 @@ static int set_mtpt_pbl(struct hns_roce_dev *hr_dev,
int i, count;
count = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages,
- ARRAY_SIZE(pages), &pbl_ba);
+ min_t(int, ARRAY_SIZE(pages), mr->npages),
+ &pbl_ba);
if (count < 1) {
ibdev_err(ibdev, "failed to find PBL mtr, count = %d.\n",
count);
@@ -3414,7 +3531,7 @@ static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev)
mutex_lock(&free_mr->mutex);
for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
- hr_qp = to_hr_qp(free_mr->rsv_qp[i]);
+ hr_qp = free_mr->rsv_qp[i];
ret = free_mr_post_send_lp_wqe(hr_qp);
if (ret) {
@@ -3429,7 +3546,7 @@ static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev)
end = msecs_to_jiffies(HNS_ROCE_V2_FREE_MR_TIMEOUT) + jiffies;
while (cqe_cnt) {
- npolled = hns_roce_v2_poll_cq(free_mr->rsv_cq, cqe_cnt, wc);
+ npolled = hns_roce_v2_poll_cq(&free_mr->rsv_cq->ib_cq, cqe_cnt, wc);
if (npolled < 0) {
ibdev_err(ibdev,
"failed to poll cqe for free mr, remain %d cqe.\n",
@@ -5375,6 +5492,8 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
rdma_ah_set_sl(&qp_attr->ah_attr,
hr_reg_read(&context, QPC_SL));
+ rdma_ah_set_port_num(&qp_attr->ah_attr, hr_qp->port + 1);
+ rdma_ah_set_ah_flags(&qp_attr->ah_attr, IB_AH_GRH);
grh->flow_label = hr_reg_read(&context, QPC_FL);
grh->sgid_index = hr_reg_read(&context, QPC_GMV_IDX);
grh->hop_limit = hr_reg_read(&context, QPC_HOPLIMIT);
@@ -5468,7 +5587,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
return ret;
}
-static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
+int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index c7bf2d52c1cdb2..b1b3e1e0b84e55 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -272,6 +272,11 @@ enum hns_roce_cmd_return_status {
CMD_OTHER_ERR = 0xff
};
+struct hns_roce_cmd_errcode {
+ enum hns_roce_cmd_return_status return_status;
+ int errno;
+};
+
enum hns_roce_sgid_type {
GID_TYPE_FLAG_ROCE_V1 = 0,
GID_TYPE_FLAG_ROCE_V2_IPV4,
@@ -1327,9 +1332,9 @@ struct hns_roce_link_table {
#define HNS_ROCE_EXT_LLM_MIN_PAGES(que_num) ((que_num) * 4 + 2)
struct hns_roce_v2_free_mr {
- struct ib_qp *rsv_qp[HNS_ROCE_FREE_MR_USED_QP_NUM];
- struct ib_cq *rsv_cq;
- struct ib_pd *rsv_pd;
+ struct hns_roce_qp *rsv_qp[HNS_ROCE_FREE_MR_USED_QP_NUM];
+ struct hns_roce_cq *rsv_cq;
+ struct hns_roce_pd *rsv_pd;
struct mutex mutex;
};
@@ -1459,6 +1464,8 @@ struct hns_roce_sccc_clr_done {
__le32 rsv[5];
};
+int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
+
static inline void hns_roce_write64(struct hns_roce_dev *hr_dev, __le32 val[2],
void __iomem *dest)
{
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index dcf89689a4c628..8ba68ac12388de 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -354,10 +354,11 @@ static int hns_roce_alloc_uar_entry(struct ib_ucontext *uctx)
static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
struct ib_udata *udata)
{
- int ret;
struct hns_roce_ucontext *context = to_hr_ucontext(uctx);
- struct hns_roce_ib_alloc_ucontext_resp resp = {};
struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device);
+ struct hns_roce_ib_alloc_ucontext_resp resp = {};
+ struct hns_roce_ib_alloc_ucontext ucmd = {};
+ int ret;
if (!hr_dev->active)
return -EAGAIN;
@@ -365,6 +366,19 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
resp.qp_tab_size = hr_dev->caps.num_qps;
resp.srq_tab_size = hr_dev->caps.num_srqs;
+ ret = ib_copy_from_udata(&ucmd, udata,
+ min(udata->inlen, sizeof(ucmd)));
+ if (ret)
+ return ret;
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ context->config = ucmd.config & HNS_ROCE_EXSGE_FLAGS;
+
+ if (context->config & HNS_ROCE_EXSGE_FLAGS) {
+ resp.config |= HNS_ROCE_RSP_EXSGE_FLAGS;
+ resp.max_inline_data = hr_dev->caps.max_sq_inline;
+ }
+
ret = hns_roce_uar_alloc(hr_dev, &context->uar);
if (ret)
goto error_fail_uar_alloc;
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 845ac7d3831f42..37a5cf62f88b48 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -392,10 +392,10 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
return &mr->ibmr;
-err_key:
- free_mr_key(hr_dev, mr);
err_pbl:
free_mr_pbl(hr_dev, mr);
+err_key:
+ free_mr_key(hr_dev, mr);
err_free:
kfree(mr);
return ERR_PTR(ret);
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index f0bd82a18069a6..0ae335fb205cad 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -476,38 +476,109 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
return 0;
}
-static u32 get_wqe_ext_sge_cnt(struct hns_roce_qp *qp)
+static u32 get_max_inline_data(struct hns_roce_dev *hr_dev,
+ struct ib_qp_cap *cap)
{
- /* GSI/UD QP only has extended sge */
- if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD)
- return qp->sq.max_gs;
-
- if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE)
- return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE;
+ if (cap->max_inline_data) {
+ cap->max_inline_data = roundup_pow_of_two(cap->max_inline_data);
+ return min(cap->max_inline_data,
+ hr_dev->caps.max_sq_inline);
+ }
return 0;
}
+static void update_inline_data(struct hns_roce_qp *hr_qp,
+ struct ib_qp_cap *cap)
+{
+ u32 sge_num = hr_qp->sq.ext_sge_cnt;
+
+ if (hr_qp->config & HNS_ROCE_EXSGE_FLAGS) {
+ if (!(hr_qp->ibqp.qp_type == IB_QPT_GSI ||
+ hr_qp->ibqp.qp_type == IB_QPT_UD))
+ sge_num = max((u32)HNS_ROCE_SGE_IN_WQE, sge_num);
+
+ cap->max_inline_data = max(cap->max_inline_data,
+ sge_num * HNS_ROCE_SGE_SIZE);
+ }
+
+ hr_qp->max_inline_data = cap->max_inline_data;
+}
+
+static u32 get_sge_num_from_max_send_sge(bool is_ud_or_gsi,
+ u32 max_send_sge)
+{
+ unsigned int std_sge_num;
+ unsigned int min_sge;
+
+ std_sge_num = is_ud_or_gsi ? 0 : HNS_ROCE_SGE_IN_WQE;
+ min_sge = is_ud_or_gsi ? 1 : 0;
+ return max_send_sge > std_sge_num ? (max_send_sge - std_sge_num) :
+ min_sge;
+}
+
+static unsigned int get_sge_num_from_max_inl_data(bool is_ud_or_gsi,
+ u32 max_inline_data)
+{
+ unsigned int inline_sge;
+
+ inline_sge = roundup_pow_of_two(max_inline_data) / HNS_ROCE_SGE_SIZE;
+
+ /*
+ * if max_inline_data less than
+ * HNS_ROCE_SGE_IN_WQE * HNS_ROCE_SGE_SIZE,
+ * In addition to ud's mode, no need to extend sge.
+ */
+ if (!is_ud_or_gsi && inline_sge <= HNS_ROCE_SGE_IN_WQE)
+ inline_sge = 0;
+
+ return inline_sge;
+}
+
static void set_ext_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt,
struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap)
{
+ bool is_ud_or_gsi = (hr_qp->ibqp.qp_type == IB_QPT_GSI ||
+ hr_qp->ibqp.qp_type == IB_QPT_UD);
+ unsigned int std_sge_num;
+ u32 inline_ext_sge = 0;
+ u32 ext_wqe_sge_cnt;
u32 total_sge_cnt;
- u32 wqe_sge_cnt;
+
+ cap->max_inline_data = get_max_inline_data(hr_dev, cap);
hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT;
+ std_sge_num = is_ud_or_gsi ? 0 : HNS_ROCE_SGE_IN_WQE;
+ ext_wqe_sge_cnt = get_sge_num_from_max_send_sge(is_ud_or_gsi,
+ cap->max_send_sge);
- hr_qp->sq.max_gs = max(1U, cap->max_send_sge);
+ if (hr_qp->config & HNS_ROCE_EXSGE_FLAGS) {
+ inline_ext_sge = max(ext_wqe_sge_cnt,
+ get_sge_num_from_max_inl_data(is_ud_or_gsi,
+ cap->max_inline_data));
+ hr_qp->sq.ext_sge_cnt = inline_ext_sge ?
+ roundup_pow_of_two(inline_ext_sge) : 0;
- wqe_sge_cnt = get_wqe_ext_sge_cnt(hr_qp);
+ hr_qp->sq.max_gs = max(1U, (hr_qp->sq.ext_sge_cnt + std_sge_num));
+ hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg);
+
+ ext_wqe_sge_cnt = hr_qp->sq.ext_sge_cnt;
+ } else {
+ hr_qp->sq.max_gs = max(1U, cap->max_send_sge);
+ hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg);
+ hr_qp->sq.ext_sge_cnt = hr_qp->sq.max_gs;
+ }
/* If the number of extended sge is not zero, they MUST use the
* space of HNS_HW_PAGE_SIZE at least.
*/
- if (wqe_sge_cnt) {
- total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * wqe_sge_cnt);
+ if (ext_wqe_sge_cnt) {
+ total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * ext_wqe_sge_cnt);
hr_qp->sge.sge_cnt = max(total_sge_cnt,
(u32)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE);
}
+
+ update_inline_data(hr_qp, cap);
}
static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev,
@@ -556,6 +627,7 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev,
hr_qp->sq.wqe_shift = ucmd->log_sq_stride;
hr_qp->sq.wqe_cnt = cnt;
+ cap->max_send_sge = hr_qp->sq.max_gs;
return 0;
}
@@ -986,13 +1058,9 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
struct hns_roce_ib_create_qp *ucmd)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_ucontext *uctx;
int ret;
- if (init_attr->cap.max_inline_data > hr_dev->caps.max_sq_inline)
- init_attr->cap.max_inline_data = hr_dev->caps.max_sq_inline;
-
- hr_qp->max_inline_data = init_attr->cap.max_inline_data;
-
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR;
else
@@ -1015,12 +1083,17 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
return ret;
}
+ uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext,
+ ibucontext);
+ hr_qp->config = uctx->config;
ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd);
if (ret)
ibdev_err(ibdev,
"failed to set user SQ size, ret = %d.\n",
ret);
} else {
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ hr_qp->config = HNS_ROCE_EXSGE_FLAGS;
ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp);
if (ret)
ibdev_err(ibdev,
diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c
index a6e5d350a94ce8..16183e894da77b 100644
--- a/drivers/infiniband/hw/irdma/uk.c
+++ b/drivers/infiniband/hw/irdma/uk.c
@@ -566,21 +566,37 @@ static void irdma_set_mw_bind_wqe_gen_1(__le64 *wqe,
/**
* irdma_copy_inline_data_gen_1 - Copy inline data to wqe
- * @dest: pointer to wqe
- * @src: pointer to inline data
- * @len: length of inline data to copy
+ * @wqe: pointer to wqe
+ * @sge_list: table of pointers to inline data
+ * @num_sges: Total inline data length
* @polarity: compatibility parameter
*/
-static void irdma_copy_inline_data_gen_1(u8 *dest, u8 *src, u32 len,
- u8 polarity)
+static void irdma_copy_inline_data_gen_1(u8 *wqe, struct ib_sge *sge_list,
+ u32 num_sges, u8 polarity)
{
- if (len <= 16) {
- memcpy(dest, src, len);
- } else {
- memcpy(dest, src, 16);
- src += 16;
- dest = dest + 32;
- memcpy(dest, src, len - 16);
+ u32 quanta_bytes_remaining = 16;
+ int i;
+
+ for (i = 0; i < num_sges; i++) {
+ u8 *cur_sge = (u8 *)(uintptr_t)sge_list[i].addr;
+ u32 sge_len = sge_list[i].length;
+
+ while (sge_len) {
+ u32 bytes_copied;
+
+ bytes_copied = min(sge_len, quanta_bytes_remaining);
+ memcpy(wqe, cur_sge, bytes_copied);
+ wqe += bytes_copied;
+ cur_sge += bytes_copied;
+ quanta_bytes_remaining -= bytes_copied;
+ sge_len -= bytes_copied;
+
+ if (!quanta_bytes_remaining) {
+ /* Remaining inline bytes reside after hdr */
+ wqe += 16;
+ quanta_bytes_remaining = 32;
+ }
+ }
}
}
@@ -612,35 +628,51 @@ static void irdma_set_mw_bind_wqe(__le64 *wqe,
/**
* irdma_copy_inline_data - Copy inline data to wqe
- * @dest: pointer to wqe
- * @src: pointer to inline data
- * @len: length of inline data to copy
+ * @wqe: pointer to wqe
+ * @sge_list: table of pointers to inline data
+ * @num_sges: number of SGE's
* @polarity: polarity of wqe valid bit
*/
-static void irdma_copy_inline_data(u8 *dest, u8 *src, u32 len, u8 polarity)
+static void irdma_copy_inline_data(u8 *wqe, struct ib_sge *sge_list,
+ u32 num_sges, u8 polarity)
{
u8 inline_valid = polarity << IRDMA_INLINE_VALID_S;
- u32 copy_size;
-
- dest += 8;
- if (len <= 8) {
- memcpy(dest, src, len);
- return;
- }
-
- *((u64 *)dest) = *((u64 *)src);
- len -= 8;
- src += 8;
- dest += 24; /* point to additional 32 byte quanta */
-
- while (len) {
- copy_size = len < 31 ? len : 31;
- memcpy(dest, src, copy_size);
- *(dest + 31) = inline_valid;
- len -= copy_size;
- dest += 32;
- src += copy_size;
+ u32 quanta_bytes_remaining = 8;
+ bool first_quanta = true;
+ int i;
+
+ wqe += 8;
+
+ for (i = 0; i < num_sges; i++) {
+ u8 *cur_sge = (u8 *)(uintptr_t)sge_list[i].addr;
+ u32 sge_len = sge_list[i].length;
+
+ while (sge_len) {
+ u32 bytes_copied;
+
+ bytes_copied = min(sge_len, quanta_bytes_remaining);
+ memcpy(wqe, cur_sge, bytes_copied);
+ wqe += bytes_copied;
+ cur_sge += bytes_copied;
+ quanta_bytes_remaining -= bytes_copied;
+ sge_len -= bytes_copied;
+
+ if (!quanta_bytes_remaining) {
+ quanta_bytes_remaining = 31;
+
+ /* Remaining inline bytes reside after hdr */
+ if (first_quanta) {
+ first_quanta = false;
+ wqe += 16;
+ } else {
+ *wqe = inline_valid;
+ wqe++;
+ }
+ }
+ }
}
+ if (!first_quanta && quanta_bytes_remaining < 31)
+ *(wqe + quanta_bytes_remaining) = inline_valid;
}
/**
@@ -679,20 +711,27 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp,
struct irdma_post_sq_info *info, bool post_sq)
{
__le64 *wqe;
- struct irdma_inline_rdma_write *op_info;
+ struct irdma_rdma_write *op_info;
u64 hdr = 0;
u32 wqe_idx;
bool read_fence = false;
+ u32 i, total_size = 0;
u16 quanta;
info->push_wqe = qp->push_db ? true : false;
- op_info = &info->op.inline_rdma_write;
+ op_info = &info->op.rdma_write;
+
+ if (unlikely(qp->max_sq_frag_cnt < op_info->num_lo_sges))
+ return -EINVAL;
+
+ for (i = 0; i < op_info->num_lo_sges; i++)
+ total_size += op_info->lo_sg_list[i].length;
- if (op_info->len > qp->max_inline_data)
+ if (unlikely(total_size > qp->max_inline_data))
return -EINVAL;
- quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len);
- wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len,
+ quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size);
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
info);
if (!wqe)
return -ENOMEM;
@@ -705,7 +744,7 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp,
hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.lkey) |
FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) |
- FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, op_info->len) |
+ FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, total_size) |
FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt ? 1 : 0) |
FIELD_PREP(IRDMAQPSQ_INLINEDATAFLAG, 1) |
FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid ? 1 : 0) |
@@ -719,7 +758,8 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp,
set_64bit_val(wqe, 0,
FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data));
- qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->data, op_info->len,
+ qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->lo_sg_list,
+ op_info->num_lo_sges,
qp->swqe_polarity);
dma_wmb(); /* make sure WQE is populated before valid bit is set */
@@ -745,20 +785,27 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp,
struct irdma_post_sq_info *info, bool post_sq)
{
__le64 *wqe;
- struct irdma_post_inline_send *op_info;
+ struct irdma_post_send *op_info;
u64 hdr;
u32 wqe_idx;
bool read_fence = false;
+ u32 i, total_size = 0;
u16 quanta;
info->push_wqe = qp->push_db ? true : false;
- op_info = &info->op.inline_send;
+ op_info = &info->op.send;
+
+ if (unlikely(qp->max_sq_frag_cnt < op_info->num_sges))
+ return -EINVAL;
- if (op_info->len > qp->max_inline_data)
+ for (i = 0; i < op_info->num_sges; i++)
+ total_size += op_info->sg_list[i].length;
+
+ if (unlikely(total_size > qp->max_inline_data))
return -EINVAL;
- quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len);
- wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len,
+ quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size);
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
info);
if (!wqe)
return -ENOMEM;
@@ -773,7 +820,7 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp,
hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, info->stag_to_inv) |
FIELD_PREP(IRDMAQPSQ_AHID, op_info->ah_id) |
FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) |
- FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, op_info->len) |
+ FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, total_size) |
FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG,
(info->imm_data_valid ? 1 : 0)) |
FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) |
@@ -789,8 +836,8 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp,
if (info->imm_data_valid)
set_64bit_val(wqe, 0,
FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data));
- qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->data, op_info->len,
- qp->swqe_polarity);
+ qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->sg_list,
+ op_info->num_sges, qp->swqe_polarity);
dma_wmb(); /* make sure WQE is populated before valid bit is set */
@@ -1002,11 +1049,10 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
__le64 *cqe;
struct irdma_qp_uk *qp;
struct irdma_ring *pring = NULL;
- u32 wqe_idx, q_type;
+ u32 wqe_idx;
int ret_code;
bool move_cq_head = true;
u8 polarity;
- u8 op_type;
bool ext_valid;
__le64 *ext_cqe;
@@ -1074,7 +1120,7 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
info->ud_vlan_valid = false;
}
- q_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3);
+ info->q_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3);
info->error = (bool)FIELD_GET(IRDMA_CQ_ERROR, qword3);
info->push_dropped = (bool)FIELD_GET(IRDMACQ_PSHDROP, qword3);
info->ipv4 = (bool)FIELD_GET(IRDMACQ_IPV4, qword3);
@@ -1113,8 +1159,9 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
}
wqe_idx = (u32)FIELD_GET(IRDMA_CQ_WQEIDX, qword3);
info->qp_handle = (irdma_qp_handle)(unsigned long)qp;
+ info->op_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3);
- if (q_type == IRDMA_CQE_QTYPE_RQ) {
+ if (info->q_type == IRDMA_CQE_QTYPE_RQ) {
u32 array_idx;
array_idx = wqe_idx / qp->rq_wqe_size_multiplier;
@@ -1134,10 +1181,6 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
info->bytes_xfered = (u32)FIELD_GET(IRDMACQ_PAYLDLEN, qword0);
- if (info->imm_valid)
- info->op_type = IRDMA_OP_TYPE_REC_IMM;
- else
- info->op_type = IRDMA_OP_TYPE_REC;
if (qword3 & IRDMACQ_STAG) {
info->stag_invalid_set = true;
info->inv_stag = (u32)FIELD_GET(IRDMACQ_INVSTAG, qword2);
@@ -1195,17 +1238,18 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
sw_wqe = qp->sq_base[tail].elem;
get_64bit_val(sw_wqe, 24,
&wqe_qword);
- op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, wqe_qword);
- info->op_type = op_type;
+ info->op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE,
+ wqe_qword);
IRDMA_RING_SET_TAIL(qp->sq_ring,
tail + qp->sq_wrtrk_array[tail].quanta);
- if (op_type != IRDMAQP_OP_NOP) {
+ if (info->op_type != IRDMAQP_OP_NOP) {
info->wr_id = qp->sq_wrtrk_array[tail].wrid;
info->bytes_xfered = qp->sq_wrtrk_array[tail].wr_len;
break;
}
} while (1);
- if (op_type == IRDMA_OP_TYPE_BIND_MW && info->minor_err == FLUSH_PROT_ERR)
+ if (info->op_type == IRDMA_OP_TYPE_BIND_MW &&
+ info->minor_err == FLUSH_PROT_ERR)
info->minor_err = FLUSH_MW_BIND_ERR;
qp->sq_flush_seen = true;
if (!IRDMA_RING_MORE_WORK(qp->sq_ring))
diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h
index 2ef61923c92685..d0cdf609f5e06a 100644
--- a/drivers/infiniband/hw/irdma/user.h
+++ b/drivers/infiniband/hw/irdma/user.h
@@ -173,14 +173,6 @@ struct irdma_post_send {
u32 ah_id;
};
-struct irdma_post_inline_send {
- void *data;
- u32 len;
- u32 qkey;
- u32 dest_qp;
- u32 ah_id;
-};
-
struct irdma_post_rq_info {
u64 wr_id;
struct ib_sge *sg_list;
@@ -193,12 +185,6 @@ struct irdma_rdma_write {
struct ib_sge rem_addr;
};
-struct irdma_inline_rdma_write {
- void *data;
- u32 len;
- struct ib_sge rem_addr;
-};
-
struct irdma_rdma_read {
struct ib_sge *lo_sg_list;
u32 num_lo_sges;
@@ -241,8 +227,6 @@ struct irdma_post_sq_info {
struct irdma_rdma_read rdma_read;
struct irdma_bind_window bind_window;
struct irdma_inv_local_stag inv_local_stag;
- struct irdma_inline_rdma_write inline_rdma_write;
- struct irdma_post_inline_send inline_send;
} op;
};
@@ -261,6 +245,7 @@ struct irdma_cq_poll_info {
u16 ud_vlan;
u8 ud_smac[6];
u8 op_type;
+ u8 q_type;
bool stag_invalid_set:1; /* or L_R_Key set */
bool push_dropped:1;
bool error:1;
@@ -291,7 +276,8 @@ int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp,
bool post_sq);
struct irdma_wqe_uk_ops {
- void (*iw_copy_inline_data)(u8 *dest, u8 *src, u32 len, u8 polarity);
+ void (*iw_copy_inline_data)(u8 *dest, struct ib_sge *sge_list,
+ u32 num_sges, u8 polarity);
u16 (*iw_inline_data_size_to_quanta)(u32 data_size);
void (*iw_set_fragment)(__le64 *wqe, u32 offset, struct ib_sge *sge,
u8 valid);
diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c
index 8dfc9e154d733a..445e69e864097e 100644
--- a/drivers/infiniband/hw/irdma/utils.c
+++ b/drivers/infiniband/hw/irdma/utils.c
@@ -2591,6 +2591,7 @@ void irdma_generate_flush_completions(struct irdma_qp *iwqp)
sw_wqe = qp->sq_base[wqe_idx].elem;
get_64bit_val(sw_wqe, 24, &wqe_qword);
cmpl->cpi.op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, IRDMAQPSQ_OPCODE);
+ cmpl->cpi.q_type = IRDMA_CQE_QTYPE_SQ;
/* remove the SQ WR by moving SQ tail*/
IRDMA_RING_SET_TAIL(*sq_ring,
sq_ring->tail + qp->sq_wrtrk_array[sq_ring->tail].quanta);
@@ -2629,6 +2630,7 @@ void irdma_generate_flush_completions(struct irdma_qp *iwqp)
cmpl->cpi.wr_id = qp->rq_wrid_array[wqe_idx];
cmpl->cpi.op_type = IRDMA_OP_TYPE_REC;
+ cmpl->cpi.q_type = IRDMA_CQE_QTYPE_RQ;
/* remove the RQ WR by moving RQ tail */
IRDMA_RING_SET_TAIL(*rq_ring, rq_ring->tail + 1);
ibdev_dbg(iwqp->iwrcq->ibcq.device,
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index a22afbb25bc58d..f6973ea55eda7e 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -64,36 +64,6 @@ static int irdma_query_device(struct ib_device *ibdev,
}
/**
- * irdma_get_eth_speed_and_width - Get IB port speed and width from netdev speed
- * @link_speed: netdev phy link speed
- * @active_speed: IB port speed
- * @active_width: IB port width
- */
-static void irdma_get_eth_speed_and_width(u32 link_speed, u16 *active_speed,
- u8 *active_width)
-{
- if (link_speed <= SPEED_1000) {
- *active_width = IB_WIDTH_1X;
- *active_speed = IB_SPEED_SDR;
- } else if (link_speed <= SPEED_10000) {
- *active_width = IB_WIDTH_1X;
- *active_speed = IB_SPEED_FDR10;
- } else if (link_speed <= SPEED_20000) {
- *active_width = IB_WIDTH_4X;
- *active_speed = IB_SPEED_DDR;
- } else if (link_speed <= SPEED_25000) {
- *active_width = IB_WIDTH_1X;
- *active_speed = IB_SPEED_EDR;
- } else if (link_speed <= SPEED_40000) {
- *active_width = IB_WIDTH_4X;
- *active_speed = IB_SPEED_FDR10;
- } else {
- *active_width = IB_WIDTH_4X;
- *active_speed = IB_SPEED_EDR;
- }
-}
-
-/**
* irdma_query_port - get port attributes
* @ibdev: device pointer from stack
* @port: port number for query
@@ -120,8 +90,9 @@ static int irdma_query_port(struct ib_device *ibdev, u32 port,
props->state = IB_PORT_DOWN;
props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
}
- irdma_get_eth_speed_and_width(SPEED_100000, &props->active_speed,
- &props->active_width);
+
+ ib_get_eth_speed(ibdev, port, &props->active_speed,
+ &props->active_width);
if (rdma_protocol_roce(ibdev, 1)) {
props->gid_tbl_len = 32;
@@ -1242,6 +1213,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
av->attrs = attr->ah_attr;
rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid_attr->gid);
rdma_gid2ip((struct sockaddr *)&av->dgid_addr, &attr->ah_attr.grh.dgid);
+ av->net_type = rdma_gid_attr_network_type(sgid_attr);
if (av->net_type == RDMA_NETWORK_IPV6) {
__be32 *daddr =
av->dgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32;
@@ -2358,9 +2330,10 @@ static bool irdma_check_mr_contiguous(struct irdma_pble_alloc *palloc,
* @rf: RDMA PCI function
* @iwmr: mr pointer for this memory registration
* @use_pbles: flag if to use pble's
+ * @lvl_1_only: request only level 1 pble if true
*/
static int irdma_setup_pbles(struct irdma_pci_f *rf, struct irdma_mr *iwmr,
- bool use_pbles)
+ bool use_pbles, bool lvl_1_only)
{
struct irdma_pbl *iwpbl = &iwmr->iwpbl;
struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
@@ -2371,7 +2344,7 @@ static int irdma_setup_pbles(struct irdma_pci_f *rf, struct irdma_mr *iwmr,
if (use_pbles) {
status = irdma_get_pble(rf->pble_rsrc, palloc, iwmr->page_cnt,
- false);
+ lvl_1_only);
if (status)
return status;
@@ -2414,16 +2387,10 @@ static int irdma_handle_q_mem(struct irdma_device *iwdev,
bool ret = true;
pg_size = iwmr->page_size;
- err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles);
+ err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles, true);
if (err)
return err;
- if (use_pbles && palloc->level != PBLE_LEVEL_1) {
- irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
- iwpbl->pbl_allocated = false;
- return -ENOMEM;
- }
-
if (use_pbles)
arr = palloc->level1.addr;
@@ -2899,7 +2866,7 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
case IRDMA_MEMREG_TYPE_MEM:
use_pbles = (iwmr->page_cnt != 1);
- err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles);
+ err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles, false);
if (err)
goto error;
@@ -3165,30 +3132,20 @@ static int irdma_post_send(struct ib_qp *ibqp,
info.stag_to_inv = ib_wr->ex.invalidate_rkey;
}
- if (ib_wr->send_flags & IB_SEND_INLINE) {
- info.op.inline_send.data = (void *)(unsigned long)
- ib_wr->sg_list[0].addr;
- info.op.inline_send.len = ib_wr->sg_list[0].length;
- if (iwqp->ibqp.qp_type == IB_QPT_UD ||
- iwqp->ibqp.qp_type == IB_QPT_GSI) {
- ah = to_iwah(ud_wr(ib_wr)->ah);
- info.op.inline_send.ah_id = ah->sc_ah.ah_info.ah_idx;
- info.op.inline_send.qkey = ud_wr(ib_wr)->remote_qkey;
- info.op.inline_send.dest_qp = ud_wr(ib_wr)->remote_qpn;
- }
+ info.op.send.num_sges = ib_wr->num_sge;
+ info.op.send.sg_list = ib_wr->sg_list;
+ if (iwqp->ibqp.qp_type == IB_QPT_UD ||
+ iwqp->ibqp.qp_type == IB_QPT_GSI) {
+ ah = to_iwah(ud_wr(ib_wr)->ah);
+ info.op.send.ah_id = ah->sc_ah.ah_info.ah_idx;
+ info.op.send.qkey = ud_wr(ib_wr)->remote_qkey;
+ info.op.send.dest_qp = ud_wr(ib_wr)->remote_qpn;
+ }
+
+ if (ib_wr->send_flags & IB_SEND_INLINE)
err = irdma_uk_inline_send(ukqp, &info, false);
- } else {
- info.op.send.num_sges = ib_wr->num_sge;
- info.op.send.sg_list = ib_wr->sg_list;
- if (iwqp->ibqp.qp_type == IB_QPT_UD ||
- iwqp->ibqp.qp_type == IB_QPT_GSI) {
- ah = to_iwah(ud_wr(ib_wr)->ah);
- info.op.send.ah_id = ah->sc_ah.ah_info.ah_idx;
- info.op.send.qkey = ud_wr(ib_wr)->remote_qkey;
- info.op.send.dest_qp = ud_wr(ib_wr)->remote_qpn;
- }
+ else
err = irdma_uk_send(ukqp, &info, false);
- }
break;
case IB_WR_RDMA_WRITE_WITH_IMM:
if (ukqp->qp_caps & IRDMA_WRITE_WITH_IMM) {
@@ -3205,22 +3162,15 @@ static int irdma_post_send(struct ib_qp *ibqp,
else
info.op_type = IRDMA_OP_TYPE_RDMA_WRITE;
- if (ib_wr->send_flags & IB_SEND_INLINE) {
- info.op.inline_rdma_write.data = (void *)(uintptr_t)ib_wr->sg_list[0].addr;
- info.op.inline_rdma_write.len =
- ib_wr->sg_list[0].length;
- info.op.inline_rdma_write.rem_addr.addr =
- rdma_wr(ib_wr)->remote_addr;
- info.op.inline_rdma_write.rem_addr.lkey =
- rdma_wr(ib_wr)->rkey;
+ info.op.rdma_write.num_lo_sges = ib_wr->num_sge;
+ info.op.rdma_write.lo_sg_list = ib_wr->sg_list;
+ info.op.rdma_write.rem_addr.addr =
+ rdma_wr(ib_wr)->remote_addr;
+ info.op.rdma_write.rem_addr.lkey = rdma_wr(ib_wr)->rkey;
+ if (ib_wr->send_flags & IB_SEND_INLINE)
err = irdma_uk_inline_rdma_write(ukqp, &info, false);
- } else {
- info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list;
- info.op.rdma_write.num_lo_sges = ib_wr->num_sge;
- info.op.rdma_write.rem_addr.addr = rdma_wr(ib_wr)->remote_addr;
- info.op.rdma_write.rem_addr.lkey = rdma_wr(ib_wr)->rkey;
+ else
err = irdma_uk_rdma_write(ukqp, &info, false);
- }
break;
case IB_WR_RDMA_READ_WITH_INV:
inv_stag = true;
@@ -3380,7 +3330,6 @@ static enum ib_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode
static void irdma_process_cqe(struct ib_wc *entry,
struct irdma_cq_poll_info *cq_poll_info)
{
- struct irdma_qp *iwqp;
struct irdma_sc_qp *qp;
entry->wc_flags = 0;
@@ -3388,7 +3337,6 @@ static void irdma_process_cqe(struct ib_wc *entry,
entry->wr_id = cq_poll_info->wr_id;
qp = cq_poll_info->qp_handle;
- iwqp = qp->qp_uk.back_qp;
entry->qp = qp->qp_uk.back_qp;
if (cq_poll_info->error) {
@@ -3421,42 +3369,17 @@ static void irdma_process_cqe(struct ib_wc *entry,
}
}
- switch (cq_poll_info->op_type) {
- case IRDMA_OP_TYPE_RDMA_WRITE:
- case IRDMA_OP_TYPE_RDMA_WRITE_SOL:
- entry->opcode = IB_WC_RDMA_WRITE;
- break;
- case IRDMA_OP_TYPE_RDMA_READ_INV_STAG:
- case IRDMA_OP_TYPE_RDMA_READ:
- entry->opcode = IB_WC_RDMA_READ;
- break;
- case IRDMA_OP_TYPE_SEND_INV:
- case IRDMA_OP_TYPE_SEND_SOL:
- case IRDMA_OP_TYPE_SEND_SOL_INV:
- case IRDMA_OP_TYPE_SEND:
- entry->opcode = IB_WC_SEND;
- break;
- case IRDMA_OP_TYPE_FAST_REG_NSMR:
- entry->opcode = IB_WC_REG_MR;
- break;
- case IRDMA_OP_TYPE_INV_STAG:
- entry->opcode = IB_WC_LOCAL_INV;
- break;
- case IRDMA_OP_TYPE_REC_IMM:
- case IRDMA_OP_TYPE_REC:
- entry->opcode = cq_poll_info->op_type == IRDMA_OP_TYPE_REC_IMM ?
- IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV;
+ if (cq_poll_info->q_type == IRDMA_CQE_QTYPE_SQ) {
+ set_ib_wc_op_sq(cq_poll_info, entry);
+ } else {
+ set_ib_wc_op_rq(cq_poll_info, entry,
+ qp->qp_uk.qp_caps & IRDMA_SEND_WITH_IMM ?
+ true : false);
if (qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_UD &&
cq_poll_info->stag_invalid_set) {
entry->ex.invalidate_rkey = cq_poll_info->inv_stag;
entry->wc_flags |= IB_WC_WITH_INVALIDATE;
}
- break;
- default:
- ibdev_err(&iwqp->iwdev->ibdev,
- "Invalid opcode = %d in CQE\n", cq_poll_info->op_type);
- entry->status = IB_WC_GENERAL_ERR;
- return;
}
if (qp->qp_uk.qp_type == IRDMA_QP_TYPE_ROCE_UD) {
diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h
index 4309b7159f42c8..a536e9fa85ebf1 100644
--- a/drivers/infiniband/hw/irdma/verbs.h
+++ b/drivers/infiniband/hw/irdma/verbs.h
@@ -232,6 +232,59 @@ static inline u16 irdma_fw_minor_ver(struct irdma_sc_dev *dev)
return (u16)FIELD_GET(IRDMA_FW_VER_MINOR, dev->feature_info[IRDMA_FEATURE_FW_INFO]);
}
+static inline void set_ib_wc_op_sq(struct irdma_cq_poll_info *cq_poll_info,
+ struct ib_wc *entry)
+{
+ switch (cq_poll_info->op_type) {
+ case IRDMA_OP_TYPE_RDMA_WRITE:
+ case IRDMA_OP_TYPE_RDMA_WRITE_SOL:
+ entry->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case IRDMA_OP_TYPE_RDMA_READ_INV_STAG:
+ case IRDMA_OP_TYPE_RDMA_READ:
+ entry->opcode = IB_WC_RDMA_READ;
+ break;
+ case IRDMA_OP_TYPE_SEND_SOL:
+ case IRDMA_OP_TYPE_SEND_SOL_INV:
+ case IRDMA_OP_TYPE_SEND_INV:
+ case IRDMA_OP_TYPE_SEND:
+ entry->opcode = IB_WC_SEND;
+ break;
+ case IRDMA_OP_TYPE_FAST_REG_NSMR:
+ entry->opcode = IB_WC_REG_MR;
+ break;
+ case IRDMA_OP_TYPE_INV_STAG:
+ entry->opcode = IB_WC_LOCAL_INV;
+ break;
+ default:
+ entry->status = IB_WC_GENERAL_ERR;
+ }
+}
+
+static inline void set_ib_wc_op_rq(struct irdma_cq_poll_info *cq_poll_info,
+ struct ib_wc *entry, bool send_imm_support)
+{
+ /**
+ * iWARP does not support sendImm, so the presence of Imm data
+ * must be WriteImm.
+ */
+ if (!send_imm_support) {
+ entry->opcode = cq_poll_info->imm_valid ?
+ IB_WC_RECV_RDMA_WITH_IMM :
+ IB_WC_RECV;
+ return;
+ }
+
+ switch (cq_poll_info->op_type) {
+ case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE:
+ case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE:
+ entry->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ break;
+ default:
+ entry->opcode = IB_WC_RECV;
+ }
+}
+
void irdma_mcast_mac(u32 *ip_addr, u8 *mac, bool ipv4);
int irdma_ib_register_device(struct irdma_device *iwdev);
void irdma_ib_unregister_device(struct irdma_device *iwdev);
diff --git a/drivers/infiniband/hw/mana/Kconfig b/drivers/infiniband/hw/mana/Kconfig
new file mode 100644
index 00000000000000..546640657bac28
--- /dev/null
+++ b/drivers/infiniband/hw/mana/Kconfig
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config MANA_INFINIBAND
+ tristate "Microsoft Azure Network Adapter support"
+ depends on NETDEVICES && ETHERNET && PCI && MICROSOFT_MANA
+ help
+ This driver provides low-level RDMA support for Microsoft Azure
+ Network Adapter (MANA). MANA supports RDMA features that can be used
+ for workloads (e.g. DPDK, MPI etc) that uses RDMA verbs to directly
+ access hardware from user-mode processes in Microsoft Azure cloud
+ environment.
diff --git a/drivers/infiniband/hw/mana/Makefile b/drivers/infiniband/hw/mana/Makefile
new file mode 100644
index 00000000000000..88655fe5e398a8
--- /dev/null
+++ b/drivers/infiniband/hw/mana/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_MANA_INFINIBAND) += mana_ib.o
+
+mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o
diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
new file mode 100644
index 00000000000000..d141cab8a1e698
--- /dev/null
+++ b/drivers/infiniband/hw/mana/cq.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+ struct ib_device *ibdev = ibcq->device;
+ struct mana_ib_create_cq ucmd = {};
+ struct mana_ib_dev *mdev;
+ int err;
+
+ mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ if (udata->inlen < sizeof(ucmd))
+ return -EINVAL;
+
+ err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(ibdev,
+ "Failed to copy from udata for create cq, %d\n", err);
+ return err;
+ }
+
+ if (attr->cqe > MAX_SEND_BUFFERS_PER_QUEUE) {
+ ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
+ return -EINVAL;
+ }
+
+ cq->cqe = attr->cqe;
+ cq->umem = ib_umem_get(ibdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE,
+ IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(cq->umem)) {
+ err = PTR_ERR(cq->umem);
+ ibdev_dbg(ibdev, "Failed to get umem for create cq, err %d\n",
+ err);
+ return err;
+ }
+
+ err = mana_ib_gd_create_dma_region(mdev, cq->umem, &cq->gdma_region);
+ if (err) {
+ ibdev_dbg(ibdev,
+ "Failed to create dma region for create cq, %d\n",
+ err);
+ goto err_release_umem;
+ }
+
+ ibdev_dbg(ibdev,
+ "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n",
+ err, cq->gdma_region);
+
+ /*
+ * The CQ ID is not known at this time. The ID is generated at create_qp
+ */
+
+ return 0;
+
+err_release_umem:
+ ib_umem_release(cq->umem);
+ return err;
+}
+
+int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+{
+ struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+ struct ib_device *ibdev = ibcq->device;
+ struct mana_ib_dev *mdev;
+
+ mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);
+ ib_umem_release(cq->umem);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
new file mode 100644
index 00000000000000..d4541b8707e4c7
--- /dev/null
+++ b/drivers/infiniband/hw/mana/device.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+#include <net/mana/mana_auxiliary.h>
+
+MODULE_DESCRIPTION("Microsoft Azure Network Adapter IB driver");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(NET_MANA);
+
+static const struct ib_device_ops mana_ib_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_MANA,
+ .uverbs_abi_ver = MANA_IB_UVERBS_ABI_VERSION,
+
+ .alloc_pd = mana_ib_alloc_pd,
+ .alloc_ucontext = mana_ib_alloc_ucontext,
+ .create_cq = mana_ib_create_cq,
+ .create_qp = mana_ib_create_qp,
+ .create_rwq_ind_table = mana_ib_create_rwq_ind_table,
+ .create_wq = mana_ib_create_wq,
+ .dealloc_pd = mana_ib_dealloc_pd,
+ .dealloc_ucontext = mana_ib_dealloc_ucontext,
+ .dereg_mr = mana_ib_dereg_mr,
+ .destroy_cq = mana_ib_destroy_cq,
+ .destroy_qp = mana_ib_destroy_qp,
+ .destroy_rwq_ind_table = mana_ib_destroy_rwq_ind_table,
+ .destroy_wq = mana_ib_destroy_wq,
+ .disassociate_ucontext = mana_ib_disassociate_ucontext,
+ .get_port_immutable = mana_ib_get_port_immutable,
+ .mmap = mana_ib_mmap,
+ .modify_qp = mana_ib_modify_qp,
+ .modify_wq = mana_ib_modify_wq,
+ .query_device = mana_ib_query_device,
+ .query_gid = mana_ib_query_gid,
+ .query_port = mana_ib_query_port,
+ .reg_user_mr = mana_ib_reg_user_mr,
+
+ INIT_RDMA_OBJ_SIZE(ib_cq, mana_ib_cq, ibcq),
+ INIT_RDMA_OBJ_SIZE(ib_pd, mana_ib_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_qp, mana_ib_qp, ibqp),
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, mana_ib_ucontext, ibucontext),
+ INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mana_ib_rwq_ind_table,
+ ib_ind_table),
+};
+
+static int mana_ib_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct mana_adev *madev = container_of(adev, struct mana_adev, adev);
+ struct gdma_dev *mdev = madev->mdev;
+ struct mana_context *mc;
+ struct mana_ib_dev *dev;
+ int ret;
+
+ mc = mdev->driver_data;
+
+ dev = ib_alloc_device(mana_ib_dev, ib_dev);
+ if (!dev)
+ return -ENOMEM;
+
+ ib_set_device_ops(&dev->ib_dev, &mana_ib_dev_ops);
+
+ dev->ib_dev.phys_port_cnt = mc->num_ports;
+
+ ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
+ mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt);
+
+ dev->gdma_dev = mdev;
+ dev->ib_dev.node_type = RDMA_NODE_IB_CA;
+
+ /*
+ * num_comp_vectors needs to set to the max MSIX index
+ * when interrupts and event queues are implemented
+ */
+ dev->ib_dev.num_comp_vectors = 1;
+ dev->ib_dev.dev.parent = mdev->gdma_context->dev;
+
+ ret = ib_register_device(&dev->ib_dev, "mana_%d",
+ mdev->gdma_context->dev);
+ if (ret) {
+ ib_dealloc_device(&dev->ib_dev);
+ return ret;
+ }
+
+ dev_set_drvdata(&adev->dev, dev);
+
+ return 0;
+}
+
+static void mana_ib_remove(struct auxiliary_device *adev)
+{
+ struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev);
+
+ ib_unregister_device(&dev->ib_dev);
+ ib_dealloc_device(&dev->ib_dev);
+}
+
+static const struct auxiliary_device_id mana_id_table[] = {
+ {
+ .name = "mana.rdma",
+ },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, mana_id_table);
+
+static struct auxiliary_driver mana_driver = {
+ .name = "rdma",
+ .probe = mana_ib_probe,
+ .remove = mana_ib_remove,
+ .id_table = mana_id_table,
+};
+
+module_auxiliary_driver(mana_driver);
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
new file mode 100644
index 00000000000000..8b3bc302d6f3a3
--- /dev/null
+++ b/drivers/infiniband/hw/mana/main.c
@@ -0,0 +1,521 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
+ u32 port)
+{
+ struct gdma_dev *gd = dev->gdma_dev;
+ struct mana_port_context *mpc;
+ struct net_device *ndev;
+ struct mana_context *mc;
+
+ mc = gd->driver_data;
+ ndev = mc->ports[port];
+ mpc = netdev_priv(ndev);
+
+ mutex_lock(&pd->vport_mutex);
+
+ pd->vport_use_count--;
+ WARN_ON(pd->vport_use_count < 0);
+
+ if (!pd->vport_use_count)
+ mana_uncfg_vport(mpc);
+
+ mutex_unlock(&pd->vport_mutex);
+}
+
+int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port, struct mana_ib_pd *pd,
+ u32 doorbell_id)
+{
+ struct gdma_dev *mdev = dev->gdma_dev;
+ struct mana_port_context *mpc;
+ struct mana_context *mc;
+ struct net_device *ndev;
+ int err;
+
+ mc = mdev->driver_data;
+ ndev = mc->ports[port];
+ mpc = netdev_priv(ndev);
+
+ mutex_lock(&pd->vport_mutex);
+
+ pd->vport_use_count++;
+ if (pd->vport_use_count > 1) {
+ ibdev_dbg(&dev->ib_dev,
+ "Skip as this PD is already configured vport\n");
+ mutex_unlock(&pd->vport_mutex);
+ return 0;
+ }
+
+ err = mana_cfg_vport(mpc, pd->pdn, doorbell_id);
+ if (err) {
+ pd->vport_use_count--;
+ mutex_unlock(&pd->vport_mutex);
+
+ ibdev_dbg(&dev->ib_dev, "Failed to configure vPort %d\n", err);
+ return err;
+ }
+
+ mutex_unlock(&pd->vport_mutex);
+
+ pd->tx_shortform_allowed = mpc->tx_shortform_allowed;
+ pd->tx_vp_offset = mpc->tx_vp_offset;
+
+ ibdev_dbg(&dev->ib_dev, "vport handle %llx pdid %x doorbell_id %x\n",
+ mpc->port_handle, pd->pdn, doorbell_id);
+
+ return 0;
+}
+
+int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct ib_device *ibdev = ibpd->device;
+ struct gdma_create_pd_resp resp = {};
+ struct gdma_create_pd_req req = {};
+ enum gdma_pd_flags flags = 0;
+ struct mana_ib_dev *dev;
+ struct gdma_dev *mdev;
+ int err;
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ mdev = dev->gdma_dev;
+
+ mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_PD, sizeof(req),
+ sizeof(resp));
+
+ req.flags = flags;
+ err = mana_gd_send_request(mdev->gdma_context, sizeof(req), &req,
+ sizeof(resp), &resp);
+
+ if (err || resp.hdr.status) {
+ ibdev_dbg(&dev->ib_dev,
+ "Failed to get pd_id err %d status %u\n", err,
+ resp.hdr.status);
+ if (!err)
+ err = -EPROTO;
+
+ return err;
+ }
+
+ pd->pd_handle = resp.pd_handle;
+ pd->pdn = resp.pd_id;
+ ibdev_dbg(&dev->ib_dev, "pd_handle 0x%llx pd_id %d\n",
+ pd->pd_handle, pd->pdn);
+
+ mutex_init(&pd->vport_mutex);
+ pd->vport_use_count = 0;
+ return 0;
+}
+
+int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct ib_device *ibdev = ibpd->device;
+ struct gdma_destory_pd_resp resp = {};
+ struct gdma_destroy_pd_req req = {};
+ struct mana_ib_dev *dev;
+ struct gdma_dev *mdev;
+ int err;
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ mdev = dev->gdma_dev;
+
+ mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_PD, sizeof(req),
+ sizeof(resp));
+
+ req.pd_handle = pd->pd_handle;
+ err = mana_gd_send_request(mdev->gdma_context, sizeof(req), &req,
+ sizeof(resp), &resp);
+
+ if (err || resp.hdr.status) {
+ ibdev_dbg(&dev->ib_dev,
+ "Failed to destroy pd_handle 0x%llx err %d status %u",
+ pd->pd_handle, err, resp.hdr.status);
+ if (!err)
+ err = -EPROTO;
+ }
+
+ return err;
+}
+
+static int mana_gd_destroy_doorbell_page(struct gdma_context *gc,
+ int doorbell_page)
+{
+ struct gdma_destroy_resource_range_req req = {};
+ struct gdma_resp_hdr resp = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_RESOURCE_RANGE,
+ sizeof(req), sizeof(resp));
+
+ req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE;
+ req.num_resources = 1;
+ req.allocated_resources = doorbell_page;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err || resp.status) {
+ dev_err(gc->dev,
+ "Failed to destroy doorbell page: ret %d, 0x%x\n",
+ err, resp.status);
+ return err ?: -EPROTO;
+ }
+
+ return 0;
+}
+
+static int mana_gd_allocate_doorbell_page(struct gdma_context *gc,
+ int *doorbell_page)
+{
+ struct gdma_allocate_resource_range_req req = {};
+ struct gdma_allocate_resource_range_resp resp = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, GDMA_ALLOCATE_RESOURCE_RANGE,
+ sizeof(req), sizeof(resp));
+
+ req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE;
+ req.num_resources = 1;
+ req.alignment = 1;
+
+ /* Have GDMA start searching from 0 */
+ req.allocated_resources = 0;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err || resp.hdr.status) {
+ dev_err(gc->dev,
+ "Failed to allocate doorbell page: ret %d, 0x%x\n",
+ err, resp.hdr.status);
+ return err ?: -EPROTO;
+ }
+
+ *doorbell_page = resp.allocated_resources;
+
+ return 0;
+}
+
+int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext,
+ struct ib_udata *udata)
+{
+ struct mana_ib_ucontext *ucontext =
+ container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
+ struct ib_device *ibdev = ibcontext->device;
+ struct mana_ib_dev *mdev;
+ struct gdma_context *gc;
+ struct gdma_dev *dev;
+ int doorbell_page;
+ int ret;
+
+ mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ dev = mdev->gdma_dev;
+ gc = dev->gdma_context;
+
+ /* Allocate a doorbell page index */
+ ret = mana_gd_allocate_doorbell_page(gc, &doorbell_page);
+ if (ret) {
+ ibdev_dbg(ibdev, "Failed to allocate doorbell page %d\n", ret);
+ return ret;
+ }
+
+ ibdev_dbg(ibdev, "Doorbell page allocated %d\n", doorbell_page);
+
+ ucontext->doorbell = doorbell_page;
+
+ return 0;
+}
+
+void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
+{
+ struct mana_ib_ucontext *mana_ucontext =
+ container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
+ struct ib_device *ibdev = ibcontext->device;
+ struct mana_ib_dev *mdev;
+ struct gdma_context *gc;
+ int ret;
+
+ mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ gc = mdev->gdma_dev->gdma_context;
+
+ ret = mana_gd_destroy_doorbell_page(gc, mana_ucontext->doorbell);
+ if (ret)
+ ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret);
+}
+
+static int
+mana_ib_gd_first_dma_region(struct mana_ib_dev *dev,
+ struct gdma_context *gc,
+ struct gdma_create_dma_region_req *create_req,
+ size_t num_pages, mana_handle_t *gdma_region)
+{
+ struct gdma_create_dma_region_resp create_resp = {};
+ unsigned int create_req_msg_size;
+ int err;
+
+ create_req_msg_size =
+ struct_size(create_req, page_addr_list, num_pages);
+ create_req->page_addr_list_len = num_pages;
+
+ err = mana_gd_send_request(gc, create_req_msg_size, create_req,
+ sizeof(create_resp), &create_resp);
+ if (err || create_resp.hdr.status) {
+ ibdev_dbg(&dev->ib_dev,
+ "Failed to create DMA region: %d, 0x%x\n",
+ err, create_resp.hdr.status);
+ if (!err)
+ err = -EPROTO;
+
+ return err;
+ }
+
+ *gdma_region = create_resp.dma_region_handle;
+ ibdev_dbg(&dev->ib_dev, "Created DMA region handle 0x%llx\n",
+ *gdma_region);
+
+ return 0;
+}
+
+static int
+mana_ib_gd_add_dma_region(struct mana_ib_dev *dev, struct gdma_context *gc,
+ struct gdma_dma_region_add_pages_req *add_req,
+ unsigned int num_pages, u32 expected_status)
+{
+ unsigned int add_req_msg_size =
+ struct_size(add_req, page_addr_list, num_pages);
+ struct gdma_general_resp add_resp = {};
+ int err;
+
+ mana_gd_init_req_hdr(&add_req->hdr, GDMA_DMA_REGION_ADD_PAGES,
+ add_req_msg_size, sizeof(add_resp));
+ add_req->page_addr_list_len = num_pages;
+
+ err = mana_gd_send_request(gc, add_req_msg_size, add_req,
+ sizeof(add_resp), &add_resp);
+ if (err || add_resp.hdr.status != expected_status) {
+ ibdev_dbg(&dev->ib_dev,
+ "Failed to create DMA region: %d, 0x%x\n",
+ err, add_resp.hdr.status);
+
+ if (!err)
+ err = -EPROTO;
+
+ return err;
+ }
+
+ return 0;
+}
+
+int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+ mana_handle_t *gdma_region)
+{
+ struct gdma_dma_region_add_pages_req *add_req = NULL;
+ size_t num_pages_processed = 0, num_pages_to_handle;
+ struct gdma_create_dma_region_req *create_req;
+ unsigned int create_req_msg_size;
+ struct hw_channel_context *hwc;
+ struct ib_block_iter biter;
+ size_t max_pgs_add_cmd = 0;
+ size_t max_pgs_create_cmd;
+ struct gdma_context *gc;
+ size_t num_pages_total;
+ struct gdma_dev *mdev;
+ unsigned long page_sz;
+ unsigned int tail = 0;
+ u64 *page_addr_list;
+ void *request_buf;
+ int err;
+
+ mdev = dev->gdma_dev;
+ gc = mdev->gdma_context;
+ hwc = gc->hwc.driver_data;
+
+ /* Hardware requires dma region to align to chosen page size */
+ page_sz = ib_umem_find_best_pgsz(umem, PAGE_SZ_BM, 0);
+ if (!page_sz) {
+ ibdev_dbg(&dev->ib_dev, "failed to find page size.\n");
+ return -ENOMEM;
+ }
+ num_pages_total = ib_umem_num_dma_blocks(umem, page_sz);
+
+ max_pgs_create_cmd =
+ (hwc->max_req_msg_size - sizeof(*create_req)) / sizeof(u64);
+ num_pages_to_handle =
+ min_t(size_t, num_pages_total, max_pgs_create_cmd);
+ create_req_msg_size =
+ struct_size(create_req, page_addr_list, num_pages_to_handle);
+
+ request_buf = kzalloc(hwc->max_req_msg_size, GFP_KERNEL);
+ if (!request_buf)
+ return -ENOMEM;
+
+ create_req = request_buf;
+ mana_gd_init_req_hdr(&create_req->hdr, GDMA_CREATE_DMA_REGION,
+ create_req_msg_size,
+ sizeof(struct gdma_create_dma_region_resp));
+
+ create_req->length = umem->length;
+ create_req->offset_in_page = umem->address & (page_sz - 1);
+ create_req->gdma_page_type = order_base_2(page_sz) - PAGE_SHIFT;
+ create_req->page_count = num_pages_total;
+
+ ibdev_dbg(&dev->ib_dev, "size_dma_region %lu num_pages_total %lu\n",
+ umem->length, num_pages_total);
+
+ ibdev_dbg(&dev->ib_dev, "page_sz %lu offset_in_page %u\n",
+ page_sz, create_req->offset_in_page);
+
+ ibdev_dbg(&dev->ib_dev, "num_pages_to_handle %lu, gdma_page_type %u",
+ num_pages_to_handle, create_req->gdma_page_type);
+
+ page_addr_list = create_req->page_addr_list;
+ rdma_umem_for_each_dma_block(umem, &biter, page_sz) {
+ page_addr_list[tail++] = rdma_block_iter_dma_address(&biter);
+ if (tail < num_pages_to_handle)
+ continue;
+
+ if (!num_pages_processed) {
+ /* First create message */
+ err = mana_ib_gd_first_dma_region(dev, gc, create_req,
+ tail, gdma_region);
+ if (err)
+ goto out;
+
+ max_pgs_add_cmd = (hwc->max_req_msg_size -
+ sizeof(*add_req)) / sizeof(u64);
+
+ add_req = request_buf;
+ add_req->dma_region_handle = *gdma_region;
+ add_req->reserved3 = 0;
+ page_addr_list = add_req->page_addr_list;
+ } else {
+ /* Subsequent create messages */
+ u32 expected_s = 0;
+
+ if (num_pages_processed + num_pages_to_handle <
+ num_pages_total)
+ expected_s = GDMA_STATUS_MORE_ENTRIES;
+
+ err = mana_ib_gd_add_dma_region(dev, gc, add_req, tail,
+ expected_s);
+ if (err)
+ break;
+ }
+
+ num_pages_processed += tail;
+ tail = 0;
+
+ /* The remaining pages to create */
+ num_pages_to_handle =
+ min_t(size_t,
+ num_pages_total - num_pages_processed,
+ max_pgs_add_cmd);
+ }
+
+ if (err)
+ mana_ib_gd_destroy_dma_region(dev, *gdma_region);
+
+out:
+ kfree(request_buf);
+ return err;
+}
+
+int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev, u64 gdma_region)
+{
+ struct gdma_dev *mdev = dev->gdma_dev;
+ struct gdma_context *gc;
+
+ gc = mdev->gdma_context;
+ ibdev_dbg(&dev->ib_dev, "destroy dma region 0x%llx\n", gdma_region);
+
+ return mana_gd_destroy_dma_region(gc, gdma_region);
+}
+
+int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
+{
+ struct mana_ib_ucontext *mana_ucontext =
+ container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
+ struct ib_device *ibdev = ibcontext->device;
+ struct mana_ib_dev *mdev;
+ struct gdma_context *gc;
+ phys_addr_t pfn;
+ pgprot_t prot;
+ int ret;
+
+ mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ gc = mdev->gdma_dev->gdma_context;
+
+ if (vma->vm_pgoff != 0) {
+ ibdev_dbg(ibdev, "Unexpected vm_pgoff %lu\n", vma->vm_pgoff);
+ return -EINVAL;
+ }
+
+ /* Map to the page indexed by ucontext->doorbell */
+ pfn = (gc->phys_db_page_base +
+ gc->db_page_size * mana_ucontext->doorbell) >>
+ PAGE_SHIFT;
+ prot = pgprot_writecombine(vma->vm_page_prot);
+
+ ret = rdma_user_mmap_io(ibcontext, vma, pfn, gc->db_page_size, prot,
+ NULL);
+ if (ret)
+ ibdev_dbg(ibdev, "can't rdma_user_mmap_io ret %d\n", ret);
+ else
+ ibdev_dbg(ibdev, "mapped I/O pfn 0x%llx page_size %u, ret %d\n",
+ pfn, gc->db_page_size, ret);
+
+ return ret;
+}
+
+int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_immutable *immutable)
+{
+ /*
+ * This version only support RAW_PACKET
+ * other values need to be filled for other types
+ */
+ immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+ return 0;
+}
+
+int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
+ struct ib_udata *uhw)
+{
+ props->max_qp = MANA_MAX_NUM_QUEUES;
+ props->max_qp_wr = MAX_SEND_BUFFERS_PER_QUEUE;
+
+ /*
+ * max_cqe could be potentially much bigger.
+ * As this version of driver only support RAW QP, set it to the same
+ * value as max_qp_wr
+ */
+ props->max_cqe = MAX_SEND_BUFFERS_PER_QUEUE;
+
+ props->max_mr_size = MANA_IB_MAX_MR_SIZE;
+ props->max_mr = MANA_IB_MAX_MR;
+ props->max_send_sge = MAX_TX_WQE_SGL_ENTRIES;
+ props->max_recv_sge = MAX_RX_WQE_SGL_ENTRIES;
+
+ return 0;
+}
+
+int mana_ib_query_port(struct ib_device *ibdev, u32 port,
+ struct ib_port_attr *props)
+{
+ /* This version doesn't return port properties */
+ return 0;
+}
+
+int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
+ union ib_gid *gid)
+{
+ /* This version doesn't return GID properties */
+ return 0;
+}
+
+void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+}
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
new file mode 100644
index 00000000000000..502cc8672eefa2
--- /dev/null
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef _MANA_IB_H_
+#define _MANA_IB_H_
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_umem.h>
+#include <rdma/mana-abi.h>
+#include <rdma/uverbs_ioctl.h>
+
+#include <net/mana/mana.h>
+
+#define PAGE_SZ_BM \
+ (SZ_4K | SZ_8K | SZ_16K | SZ_32K | SZ_64K | SZ_128K | SZ_256K | \
+ SZ_512K | SZ_1M | SZ_2M)
+
+/* MANA doesn't have any limit for MR size */
+#define MANA_IB_MAX_MR_SIZE U64_MAX
+
+/*
+ * The hardware limit of number of MRs is greater than maximum number of MRs
+ * that can possibly represent in 24 bits
+ */
+#define MANA_IB_MAX_MR 0xFFFFFFu
+
+struct mana_ib_dev {
+ struct ib_device ib_dev;
+ struct gdma_dev *gdma_dev;
+};
+
+struct mana_ib_wq {
+ struct ib_wq ibwq;
+ struct ib_umem *umem;
+ int wqe;
+ u32 wq_buf_size;
+ u64 gdma_region;
+ u64 id;
+ mana_handle_t rx_object;
+};
+
+struct mana_ib_pd {
+ struct ib_pd ibpd;
+ u32 pdn;
+ mana_handle_t pd_handle;
+
+ /* Mutex for sharing access to vport_use_count */
+ struct mutex vport_mutex;
+ int vport_use_count;
+
+ bool tx_shortform_allowed;
+ u32 tx_vp_offset;
+};
+
+struct mana_ib_mr {
+ struct ib_mr ibmr;
+ struct ib_umem *umem;
+ mana_handle_t mr_handle;
+};
+
+struct mana_ib_cq {
+ struct ib_cq ibcq;
+ struct ib_umem *umem;
+ int cqe;
+ u64 gdma_region;
+ u64 id;
+};
+
+struct mana_ib_qp {
+ struct ib_qp ibqp;
+
+ /* Work queue info */
+ struct ib_umem *sq_umem;
+ int sqe;
+ u64 sq_gdma_region;
+ u64 sq_id;
+ mana_handle_t tx_object;
+
+ /* The port on the IB device, starting with 1 */
+ u32 port;
+};
+
+struct mana_ib_ucontext {
+ struct ib_ucontext ibucontext;
+ u32 doorbell;
+};
+
+struct mana_ib_rwq_ind_table {
+ struct ib_rwq_ind_table ib_ind_table;
+};
+
+int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+ mana_handle_t *gdma_region);
+
+int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev,
+ mana_handle_t gdma_region);
+
+struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr,
+ struct ib_udata *udata);
+
+int mana_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
+ u32 wq_attr_mask, struct ib_udata *udata);
+
+int mana_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata);
+
+int mana_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata);
+
+int mana_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl);
+
+struct ib_mr *mana_ib_get_dma_mr(struct ib_pd *ibpd, int access_flags);
+
+struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 iova, int access_flags,
+ struct ib_udata *udata);
+
+int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
+
+int mana_ib_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata);
+
+int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+
+int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
+
+int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port_id,
+ struct mana_ib_pd *pd, u32 doorbell_id);
+void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
+ u32 port);
+
+int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
+
+int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+
+int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+
+int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext,
+ struct ib_udata *udata);
+void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext);
+
+int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma);
+
+int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_immutable *immutable);
+int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
+ struct ib_udata *uhw);
+int mana_ib_query_port(struct ib_device *ibdev, u32 port,
+ struct ib_port_attr *props);
+int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
+ union ib_gid *gid);
+
+void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext);
+
+#endif
diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c
new file mode 100644
index 00000000000000..351207c60eb65d
--- /dev/null
+++ b/drivers/infiniband/hw/mana/mr.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+#define VALID_MR_FLAGS \
+ (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ)
+
+static enum gdma_mr_access_flags
+mana_ib_verbs_to_gdma_access_flags(int access_flags)
+{
+ enum gdma_mr_access_flags flags = GDMA_ACCESS_FLAG_LOCAL_READ;
+
+ if (access_flags & IB_ACCESS_LOCAL_WRITE)
+ flags |= GDMA_ACCESS_FLAG_LOCAL_WRITE;
+
+ if (access_flags & IB_ACCESS_REMOTE_WRITE)
+ flags |= GDMA_ACCESS_FLAG_REMOTE_WRITE;
+
+ if (access_flags & IB_ACCESS_REMOTE_READ)
+ flags |= GDMA_ACCESS_FLAG_REMOTE_READ;
+
+ return flags;
+}
+
+static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr,
+ struct gdma_create_mr_params *mr_params)
+{
+ struct gdma_create_mr_response resp = {};
+ struct gdma_create_mr_request req = {};
+ struct gdma_dev *mdev = dev->gdma_dev;
+ struct gdma_context *gc;
+ int err;
+
+ gc = mdev->gdma_context;
+
+ mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_MR, sizeof(req),
+ sizeof(resp));
+ req.pd_handle = mr_params->pd_handle;
+ req.mr_type = mr_params->mr_type;
+
+ switch (mr_params->mr_type) {
+ case GDMA_MR_TYPE_GVA:
+ req.gva.dma_region_handle = mr_params->gva.dma_region_handle;
+ req.gva.virtual_address = mr_params->gva.virtual_address;
+ req.gva.access_flags = mr_params->gva.access_flags;
+ break;
+
+ default:
+ ibdev_dbg(&dev->ib_dev,
+ "invalid param (GDMA_MR_TYPE) passed, type %d\n",
+ req.mr_type);
+ return -EINVAL;
+ }
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+
+ if (err || resp.hdr.status) {
+ ibdev_dbg(&dev->ib_dev, "Failed to create mr %d, %u", err,
+ resp.hdr.status);
+ if (!err)
+ err = -EPROTO;
+
+ return err;
+ }
+
+ mr->ibmr.lkey = resp.lkey;
+ mr->ibmr.rkey = resp.rkey;
+ mr->mr_handle = resp.mr_handle;
+
+ return 0;
+}
+
+static int mana_ib_gd_destroy_mr(struct mana_ib_dev *dev, u64 mr_handle)
+{
+ struct gdma_destroy_mr_response resp = {};
+ struct gdma_destroy_mr_request req = {};
+ struct gdma_dev *mdev = dev->gdma_dev;
+ struct gdma_context *gc;
+ int err;
+
+ gc = mdev->gdma_context;
+
+ mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_MR, sizeof(req),
+ sizeof(resp));
+
+ req.mr_handle = mr_handle;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err || resp.hdr.status) {
+ dev_err(gc->dev, "Failed to destroy MR: %d, 0x%x\n", err,
+ resp.hdr.status);
+ if (!err)
+ err = -EPROTO;
+ return err;
+ }
+
+ return 0;
+}
+
+struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 iova, int access_flags,
+ struct ib_udata *udata)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct gdma_create_mr_params mr_params = {};
+ struct ib_device *ibdev = ibpd->device;
+ struct mana_ib_dev *dev;
+ struct mana_ib_mr *mr;
+ u64 dma_region_handle;
+ int err;
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ ibdev_dbg(ibdev,
+ "start 0x%llx, iova 0x%llx length 0x%llx access_flags 0x%x",
+ start, iova, length, access_flags);
+
+ if (access_flags & ~VALID_MR_FLAGS)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->umem = ib_umem_get(ibdev, start, length, access_flags);
+ if (IS_ERR(mr->umem)) {
+ err = PTR_ERR(mr->umem);
+ ibdev_dbg(ibdev,
+ "Failed to get umem for register user-mr, %d\n", err);
+ goto err_free;
+ }
+
+ err = mana_ib_gd_create_dma_region(dev, mr->umem, &dma_region_handle);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed create dma region for user-mr, %d\n",
+ err);
+ goto err_umem;
+ }
+
+ ibdev_dbg(ibdev,
+ "mana_ib_gd_create_dma_region ret %d gdma_region %llx\n", err,
+ dma_region_handle);
+
+ mr_params.pd_handle = pd->pd_handle;
+ mr_params.mr_type = GDMA_MR_TYPE_GVA;
+ mr_params.gva.dma_region_handle = dma_region_handle;
+ mr_params.gva.virtual_address = iova;
+ mr_params.gva.access_flags =
+ mana_ib_verbs_to_gdma_access_flags(access_flags);
+
+ err = mana_ib_gd_create_mr(dev, mr, &mr_params);
+ if (err)
+ goto err_dma_region;
+
+ /*
+ * There is no need to keep track of dma_region_handle after MR is
+ * successfully created. The dma_region_handle is tracked in the PF
+ * as part of the lifecycle of this MR.
+ */
+
+ return &mr->ibmr;
+
+err_dma_region:
+ mana_gd_destroy_dma_region(dev->gdma_dev->gdma_context,
+ dma_region_handle);
+
+err_umem:
+ ib_umem_release(mr->umem);
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+{
+ struct mana_ib_mr *mr = container_of(ibmr, struct mana_ib_mr, ibmr);
+ struct ib_device *ibdev = ibmr->device;
+ struct mana_ib_dev *dev;
+ int err;
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ err = mana_ib_gd_destroy_mr(dev, mr->mr_handle);
+ if (err)
+ return err;
+
+ if (mr->umem)
+ ib_umem_release(mr->umem);
+
+ kfree(mr);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
new file mode 100644
index 00000000000000..ea15ec77e3212a
--- /dev/null
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -0,0 +1,506 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
+ struct net_device *ndev,
+ mana_handle_t default_rxobj,
+ mana_handle_t ind_table[],
+ u32 log_ind_tbl_size, u32 rx_hash_key_len,
+ u8 *rx_hash_key)
+{
+ struct mana_port_context *mpc = netdev_priv(ndev);
+ struct mana_cfg_rx_steer_req *req = NULL;
+ struct mana_cfg_rx_steer_resp resp = {};
+ mana_handle_t *req_indir_tab;
+ struct gdma_context *gc;
+ struct gdma_dev *mdev;
+ u32 req_buf_size;
+ int i, err;
+
+ mdev = dev->gdma_dev;
+ gc = mdev->gdma_context;
+
+ req_buf_size =
+ sizeof(*req) + sizeof(mana_handle_t) * MANA_INDIRECT_TABLE_SIZE;
+ req = kzalloc(req_buf_size, GFP_KERNEL);
+ if (!req)
+ return -ENOMEM;
+
+ mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size,
+ sizeof(resp));
+
+ req->vport = mpc->port_handle;
+ req->rx_enable = 1;
+ req->update_default_rxobj = 1;
+ req->default_rxobj = default_rxobj;
+ req->hdr.dev_id = mdev->dev_id;
+
+ /* If there are more than 1 entries in indirection table, enable RSS */
+ if (log_ind_tbl_size)
+ req->rss_enable = true;
+
+ req->num_indir_entries = MANA_INDIRECT_TABLE_SIZE;
+ req->indir_tab_offset = sizeof(*req);
+ req->update_indir_tab = true;
+
+ req_indir_tab = (mana_handle_t *)(req + 1);
+ /* The ind table passed to the hardware must have
+ * MANA_INDIRECT_TABLE_SIZE entries. Adjust the verb
+ * ind_table to MANA_INDIRECT_TABLE_SIZE if required
+ */
+ ibdev_dbg(&dev->ib_dev, "ind table size %u\n", 1 << log_ind_tbl_size);
+ for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) {
+ req_indir_tab[i] = ind_table[i % (1 << log_ind_tbl_size)];
+ ibdev_dbg(&dev->ib_dev, "index %u handle 0x%llx\n", i,
+ req_indir_tab[i]);
+ }
+
+ req->update_hashkey = true;
+ if (rx_hash_key_len)
+ memcpy(req->hashkey, rx_hash_key, rx_hash_key_len);
+ else
+ netdev_rss_key_fill(req->hashkey, MANA_HASH_KEY_SIZE);
+
+ ibdev_dbg(&dev->ib_dev, "vport handle %llu default_rxobj 0x%llx\n",
+ req->vport, default_rxobj);
+
+ err = mana_gd_send_request(gc, req_buf_size, req, sizeof(resp), &resp);
+ if (err) {
+ netdev_err(ndev, "Failed to configure vPort RX: %d\n", err);
+ goto out;
+ }
+
+ if (resp.hdr.status) {
+ netdev_err(ndev, "vPort RX configuration failed: 0x%x\n",
+ resp.hdr.status);
+ err = -EPROTO;
+ goto out;
+ }
+
+ netdev_info(ndev, "Configured steering vPort %llu log_entries %u\n",
+ mpc->port_handle, log_ind_tbl_size);
+
+out:
+ kfree(req);
+ return err;
+}
+
+static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ struct mana_ib_dev *mdev =
+ container_of(pd->device, struct mana_ib_dev, ib_dev);
+ struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl;
+ struct mana_ib_create_qp_rss_resp resp = {};
+ struct mana_ib_create_qp_rss ucmd = {};
+ struct gdma_dev *gd = mdev->gdma_dev;
+ mana_handle_t *mana_ind_table;
+ struct mana_port_context *mpc;
+ struct mana_context *mc;
+ struct net_device *ndev;
+ struct mana_ib_cq *cq;
+ struct mana_ib_wq *wq;
+ unsigned int ind_tbl_size;
+ struct ib_cq *ibcq;
+ struct ib_wq *ibwq;
+ int i = 0;
+ u32 port;
+ int ret;
+
+ mc = gd->driver_data;
+
+ if (!udata || udata->inlen < sizeof(ucmd))
+ return -EINVAL;
+
+ ret = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+ if (ret) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed copy from udata for create rss-qp, err %d\n",
+ ret);
+ return ret;
+ }
+
+ if (attr->cap.max_recv_wr > MAX_SEND_BUFFERS_PER_QUEUE) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Requested max_recv_wr %d exceeding limit\n",
+ attr->cap.max_recv_wr);
+ return -EINVAL;
+ }
+
+ if (attr->cap.max_recv_sge > MAX_RX_WQE_SGL_ENTRIES) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Requested max_recv_sge %d exceeding limit\n",
+ attr->cap.max_recv_sge);
+ return -EINVAL;
+ }
+
+ ind_tbl_size = 1 << ind_tbl->log_ind_tbl_size;
+ if (ind_tbl_size > MANA_INDIRECT_TABLE_SIZE) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Indirect table size %d exceeding limit\n",
+ ind_tbl_size);
+ return -EINVAL;
+ }
+
+ if (ucmd.rx_hash_function != MANA_IB_RX_HASH_FUNC_TOEPLITZ) {
+ ibdev_dbg(&mdev->ib_dev,
+ "RX Hash function is not supported, %d\n",
+ ucmd.rx_hash_function);
+ return -EINVAL;
+ }
+
+ /* IB ports start with 1, MANA start with 0 */
+ port = ucmd.port;
+ if (port < 1 || port > mc->num_ports) {
+ ibdev_dbg(&mdev->ib_dev, "Invalid port %u in creating qp\n",
+ port);
+ return -EINVAL;
+ }
+ ndev = mc->ports[port - 1];
+ mpc = netdev_priv(ndev);
+
+ ibdev_dbg(&mdev->ib_dev, "rx_hash_function %d port %d\n",
+ ucmd.rx_hash_function, port);
+
+ mana_ind_table = kcalloc(ind_tbl_size, sizeof(mana_handle_t),
+ GFP_KERNEL);
+ if (!mana_ind_table) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ qp->port = port;
+
+ for (i = 0; i < ind_tbl_size; i++) {
+ struct mana_obj_spec wq_spec = {};
+ struct mana_obj_spec cq_spec = {};
+
+ ibwq = ind_tbl->ind_tbl[i];
+ wq = container_of(ibwq, struct mana_ib_wq, ibwq);
+
+ ibcq = ibwq->cq;
+ cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+
+ wq_spec.gdma_region = wq->gdma_region;
+ wq_spec.queue_size = wq->wq_buf_size;
+
+ cq_spec.gdma_region = cq->gdma_region;
+ cq_spec.queue_size = cq->cqe * COMP_ENTRY_SIZE;
+ cq_spec.modr_ctx_id = 0;
+ cq_spec.attached_eq = GDMA_CQ_NO_EQ;
+
+ ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ,
+ &wq_spec, &cq_spec, &wq->rx_object);
+ if (ret)
+ goto fail;
+
+ /* The GDMA regions are now owned by the WQ object */
+ wq->gdma_region = GDMA_INVALID_DMA_REGION;
+ cq->gdma_region = GDMA_INVALID_DMA_REGION;
+
+ wq->id = wq_spec.queue_index;
+ cq->id = cq_spec.queue_index;
+
+ ibdev_dbg(&mdev->ib_dev,
+ "ret %d rx_object 0x%llx wq id %llu cq id %llu\n",
+ ret, wq->rx_object, wq->id, cq->id);
+
+ resp.entries[i].cqid = cq->id;
+ resp.entries[i].wqid = wq->id;
+
+ mana_ind_table[i] = wq->rx_object;
+ }
+ resp.num_entries = i;
+
+ ret = mana_ib_cfg_vport_steering(mdev, ndev, wq->rx_object,
+ mana_ind_table,
+ ind_tbl->log_ind_tbl_size,
+ ucmd.rx_hash_key_len,
+ ucmd.rx_hash_key);
+ if (ret)
+ goto fail;
+
+ ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (ret) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed to copy to udata create rss-qp, %d\n",
+ ret);
+ goto fail;
+ }
+
+ kfree(mana_ind_table);
+
+ return 0;
+
+fail:
+ while (i-- > 0) {
+ ibwq = ind_tbl->ind_tbl[i];
+ wq = container_of(ibwq, struct mana_ib_wq, ibwq);
+ mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object);
+ }
+
+ kfree(mana_ind_table);
+
+ return ret;
+}
+
+static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ struct mana_ib_dev *mdev =
+ container_of(ibpd->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_cq *send_cq =
+ container_of(attr->send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_ucontext *mana_ucontext =
+ rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
+ ibucontext);
+ struct mana_ib_create_qp_resp resp = {};
+ struct gdma_dev *gd = mdev->gdma_dev;
+ struct mana_ib_create_qp ucmd = {};
+ struct mana_obj_spec wq_spec = {};
+ struct mana_obj_spec cq_spec = {};
+ struct mana_port_context *mpc;
+ struct mana_context *mc;
+ struct net_device *ndev;
+ struct ib_umem *umem;
+ int err;
+ u32 port;
+
+ mc = gd->driver_data;
+
+ if (!mana_ucontext || udata->inlen < sizeof(ucmd))
+ return -EINVAL;
+
+ err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed to copy from udata create qp-raw, %d\n", err);
+ return err;
+ }
+
+ /* IB ports start with 1, MANA Ethernet ports start with 0 */
+ port = ucmd.port;
+ if (ucmd.port > mc->num_ports)
+ return -EINVAL;
+
+ if (attr->cap.max_send_wr > MAX_SEND_BUFFERS_PER_QUEUE) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Requested max_send_wr %d exceeding limit\n",
+ attr->cap.max_send_wr);
+ return -EINVAL;
+ }
+
+ if (attr->cap.max_send_sge > MAX_TX_WQE_SGL_ENTRIES) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Requested max_send_sge %d exceeding limit\n",
+ attr->cap.max_send_sge);
+ return -EINVAL;
+ }
+
+ ndev = mc->ports[port - 1];
+ mpc = netdev_priv(ndev);
+ ibdev_dbg(&mdev->ib_dev, "port %u ndev %p mpc %p\n", port, ndev, mpc);
+
+ err = mana_ib_cfg_vport(mdev, port - 1, pd, mana_ucontext->doorbell);
+ if (err)
+ return -ENODEV;
+
+ qp->port = port;
+
+ ibdev_dbg(&mdev->ib_dev, "ucmd sq_buf_addr 0x%llx port %u\n",
+ ucmd.sq_buf_addr, ucmd.port);
+
+ umem = ib_umem_get(ibpd->device, ucmd.sq_buf_addr, ucmd.sq_buf_size,
+ IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(umem)) {
+ err = PTR_ERR(umem);
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed to get umem for create qp-raw, err %d\n",
+ err);
+ goto err_free_vport;
+ }
+ qp->sq_umem = umem;
+
+ err = mana_ib_gd_create_dma_region(mdev, qp->sq_umem,
+ &qp->sq_gdma_region);
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed to create dma region for create qp-raw, %d\n",
+ err);
+ goto err_release_umem;
+ }
+
+ ibdev_dbg(&mdev->ib_dev,
+ "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n",
+ err, qp->sq_gdma_region);
+
+ /* Create a WQ on the same port handle used by the Ethernet */
+ wq_spec.gdma_region = qp->sq_gdma_region;
+ wq_spec.queue_size = ucmd.sq_buf_size;
+
+ cq_spec.gdma_region = send_cq->gdma_region;
+ cq_spec.queue_size = send_cq->cqe * COMP_ENTRY_SIZE;
+ cq_spec.modr_ctx_id = 0;
+ cq_spec.attached_eq = GDMA_CQ_NO_EQ;
+
+ err = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_SQ, &wq_spec,
+ &cq_spec, &qp->tx_object);
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed to create wq for create raw-qp, err %d\n",
+ err);
+ goto err_destroy_dma_region;
+ }
+
+ /* The GDMA regions are now owned by the WQ object */
+ qp->sq_gdma_region = GDMA_INVALID_DMA_REGION;
+ send_cq->gdma_region = GDMA_INVALID_DMA_REGION;
+
+ qp->sq_id = wq_spec.queue_index;
+ send_cq->id = cq_spec.queue_index;
+
+ ibdev_dbg(&mdev->ib_dev,
+ "ret %d qp->tx_object 0x%llx sq id %llu cq id %llu\n", err,
+ qp->tx_object, qp->sq_id, send_cq->id);
+
+ resp.sqid = qp->sq_id;
+ resp.cqid = send_cq->id;
+ resp.tx_vp_offset = pd->tx_vp_offset;
+
+ err = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed copy udata for create qp-raw, %d\n",
+ err);
+ goto err_destroy_wq_obj;
+ }
+
+ return 0;
+
+err_destroy_wq_obj:
+ mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object);
+
+err_destroy_dma_region:
+ mana_ib_gd_destroy_dma_region(mdev, qp->sq_gdma_region);
+
+err_release_umem:
+ ib_umem_release(umem);
+
+err_free_vport:
+ mana_ib_uncfg_vport(mdev, pd, port - 1);
+
+ return err;
+}
+
+int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
+ struct ib_udata *udata)
+{
+ switch (attr->qp_type) {
+ case IB_QPT_RAW_PACKET:
+ /* When rwq_ind_tbl is used, it's for creating WQs for RSS */
+ if (attr->rwq_ind_tbl)
+ return mana_ib_create_qp_rss(ibqp, ibqp->pd, attr,
+ udata);
+
+ return mana_ib_create_qp_raw(ibqp, ibqp->pd, attr, udata);
+ default:
+ /* Creating QP other than IB_QPT_RAW_PACKET is not supported */
+ ibdev_dbg(ibqp->device, "Creating QP type %u not supported\n",
+ attr->qp_type);
+ }
+
+ return -EINVAL;
+}
+
+int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ /* modify_qp is not supported by this version of the driver */
+ return -EOPNOTSUPP;
+}
+
+static int mana_ib_destroy_qp_rss(struct mana_ib_qp *qp,
+ struct ib_rwq_ind_table *ind_tbl,
+ struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev =
+ container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ struct gdma_dev *gd = mdev->gdma_dev;
+ struct mana_port_context *mpc;
+ struct mana_context *mc;
+ struct net_device *ndev;
+ struct mana_ib_wq *wq;
+ struct ib_wq *ibwq;
+ int i;
+
+ mc = gd->driver_data;
+ ndev = mc->ports[qp->port - 1];
+ mpc = netdev_priv(ndev);
+
+ for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) {
+ ibwq = ind_tbl->ind_tbl[i];
+ wq = container_of(ibwq, struct mana_ib_wq, ibwq);
+ ibdev_dbg(&mdev->ib_dev, "destroying wq->rx_object %llu\n",
+ wq->rx_object);
+ mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object);
+ }
+
+ return 0;
+}
+
+static int mana_ib_destroy_qp_raw(struct mana_ib_qp *qp, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev =
+ container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ struct gdma_dev *gd = mdev->gdma_dev;
+ struct ib_pd *ibpd = qp->ibqp.pd;
+ struct mana_port_context *mpc;
+ struct mana_context *mc;
+ struct net_device *ndev;
+ struct mana_ib_pd *pd;
+
+ mc = gd->driver_data;
+ ndev = mc->ports[qp->port - 1];
+ mpc = netdev_priv(ndev);
+ pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+
+ mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object);
+
+ if (qp->sq_umem) {
+ mana_ib_gd_destroy_dma_region(mdev, qp->sq_gdma_region);
+ ib_umem_release(qp->sq_umem);
+ }
+
+ mana_ib_uncfg_vport(mdev, pd, qp->port - 1);
+
+ return 0;
+}
+
+int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
+{
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+
+ switch (ibqp->qp_type) {
+ case IB_QPT_RAW_PACKET:
+ if (ibqp->rwq_ind_tbl)
+ return mana_ib_destroy_qp_rss(qp, ibqp->rwq_ind_tbl,
+ udata);
+
+ return mana_ib_destroy_qp_raw(qp, udata);
+
+ default:
+ ibdev_dbg(ibqp->device, "Unexpected QP type %u\n",
+ ibqp->qp_type);
+ }
+
+ return -ENOENT;
+}
diff --git a/drivers/infiniband/hw/mana/wq.c b/drivers/infiniband/hw/mana/wq.c
new file mode 100644
index 00000000000000..372d361510e0ca
--- /dev/null
+++ b/drivers/infiniband/hw/mana/wq.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev =
+ container_of(pd->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_create_wq ucmd = {};
+ struct mana_ib_wq *wq;
+ struct ib_umem *umem;
+ int err;
+
+ if (udata->inlen < sizeof(ucmd))
+ return ERR_PTR(-EINVAL);
+
+ err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed to copy from udata for create wq, %d\n", err);
+ return ERR_PTR(err);
+ }
+
+ wq = kzalloc(sizeof(*wq), GFP_KERNEL);
+ if (!wq)
+ return ERR_PTR(-ENOMEM);
+
+ ibdev_dbg(&mdev->ib_dev, "ucmd wq_buf_addr 0x%llx\n", ucmd.wq_buf_addr);
+
+ umem = ib_umem_get(pd->device, ucmd.wq_buf_addr, ucmd.wq_buf_size,
+ IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(umem)) {
+ err = PTR_ERR(umem);
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed to get umem for create wq, err %d\n", err);
+ goto err_free_wq;
+ }
+
+ wq->umem = umem;
+ wq->wqe = init_attr->max_wr;
+ wq->wq_buf_size = ucmd.wq_buf_size;
+ wq->rx_object = INVALID_MANA_HANDLE;
+
+ err = mana_ib_gd_create_dma_region(mdev, wq->umem, &wq->gdma_region);
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed to create dma region for create wq, %d\n",
+ err);
+ goto err_release_umem;
+ }
+
+ ibdev_dbg(&mdev->ib_dev,
+ "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n",
+ err, wq->gdma_region);
+
+ /* WQ ID is returned at wq_create time, doesn't know the value yet */
+
+ return &wq->ibwq;
+
+err_release_umem:
+ ib_umem_release(umem);
+
+err_free_wq:
+ kfree(wq);
+
+ return ERR_PTR(err);
+}
+
+int mana_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
+ u32 wq_attr_mask, struct ib_udata *udata)
+{
+ /* modify_wq is not supported by this version of the driver */
+ return -EOPNOTSUPP;
+}
+
+int mana_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata)
+{
+ struct mana_ib_wq *wq = container_of(ibwq, struct mana_ib_wq, ibwq);
+ struct ib_device *ib_dev = ibwq->device;
+ struct mana_ib_dev *mdev;
+
+ mdev = container_of(ib_dev, struct mana_ib_dev, ib_dev);
+
+ mana_ib_gd_destroy_dma_region(mdev, wq->gdma_region);
+ ib_umem_release(wq->umem);
+
+ kfree(wq);
+
+ return 0;
+}
+
+int mana_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ /*
+ * There is no additional data in ind_table to be maintained by this
+ * driver, do nothing
+ */
+ return 0;
+}
+
+int mana_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
+{
+ /*
+ * There is no additional data in ind_table to be maintained by this
+ * driver, do nothing
+ */
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index ba47874f90d381..dceebcd885bbd0 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -144,8 +144,7 @@ static struct net_device *mlx4_ib_get_netdev(struct ib_device *device,
}
}
}
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
rcu_read_unlock();
return dev;
@@ -1307,8 +1306,7 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
spin_lock_bh(&mdev->iboe.lock);
ndev = mdev->iboe.netdevs[mqp->port - 1];
- if (ndev)
- dev_hold(ndev);
+ dev_hold(ndev);
spin_unlock_bh(&mdev->iboe.lock);
if (ndev) {
@@ -1955,11 +1953,9 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
if (ge) {
spin_lock_bh(&mdev->iboe.lock);
ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
- if (ndev)
- dev_hold(ndev);
+ dev_hold(ndev);
spin_unlock_bh(&mdev->iboe.lock);
- if (ndev)
- dev_put(ndev);
+ dev_put(ndev);
list_del(&ge->list);
kfree(ge);
} else
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index be189e0525de6c..efc9e4a6df04a2 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -267,17 +267,20 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
}
-static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
+static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe,
+ struct ib_wc *wc, const char *level)
{
- mlx5_ib_warn(dev, "dump error cqe\n");
- mlx5_dump_err_cqe(dev->mdev, cqe);
+ mlx5_ib_log(level, dev, "WC error: %d, Message: %s\n", wc->status,
+ ib_wc_status_msg(wc->status));
+ print_hex_dump(level, "cqe_dump: ", DUMP_PREFIX_OFFSET, 16, 1,
+ cqe, sizeof(*cqe), false);
}
static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
struct mlx5_err_cqe *cqe,
struct ib_wc *wc)
{
- int dump = 1;
+ const char *dump = KERN_WARNING;
switch (cqe->syndrome) {
case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
@@ -287,10 +290,11 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
wc->status = IB_WC_LOC_QP_OP_ERR;
break;
case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
+ dump = KERN_DEBUG;
wc->status = IB_WC_LOC_PROT_ERR;
break;
case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
- dump = 0;
+ dump = NULL;
wc->status = IB_WC_WR_FLUSH_ERR;
break;
case MLX5_CQE_SYNDROME_MW_BIND_ERR:
@@ -306,18 +310,20 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
wc->status = IB_WC_REM_INV_REQ_ERR;
break;
case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
+ dump = KERN_DEBUG;
wc->status = IB_WC_REM_ACCESS_ERR;
break;
case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
+ dump = KERN_DEBUG;
wc->status = IB_WC_REM_OP_ERR;
break;
case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
+ dump = NULL;
wc->status = IB_WC_RETRY_EXC_ERR;
- dump = 0;
break;
case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
+ dump = NULL;
wc->status = IB_WC_RNR_RETRY_EXC_ERR;
- dump = 0;
break;
case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
wc->status = IB_WC_REM_ABORT_ERR;
@@ -328,11 +334,8 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
}
wc->vendor_err = cqe->vendor_err_synd;
- if (dump) {
- mlx5_ib_warn(dev, "WC error: %d, Message: %s\n", wc->status,
- ib_wc_status_msg(wc->status));
- dump_cqe(dev, cqe);
- }
+ if (dump)
+ dump_cqe(dev, cqe, wc, dump);
}
static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 490ec308e3098f..3008632a6c2064 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -127,7 +127,6 @@ static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
}
#define LAST_ETH_FIELD vlan_tag
-#define LAST_IB_FIELD sl
#define LAST_IPV4_FIELD tos
#define LAST_IPV6_FIELD traffic_class
#define LAST_TCP_UDP_FIELD src_port
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 4a7f7064bd0eb5..8b91babdd4c084 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -38,6 +38,10 @@
dev_warn(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \
__LINE__, current->pid, ##arg)
+#define mlx5_ib_log(lvl, _dev, format, arg...) \
+ dev_printk(lvl, &(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, ##arg)
+
#define MLX5_IB_DEFAULT_UIDX 0xffffff
#define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 410cc5fd25239d..053fe946e45aef 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1929,10 +1929,8 @@ int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
in = kzalloc(inlen, GFP_KERNEL);
- if (!in) {
- err = -ENOMEM;
- goto free;
- }
+ if (!in)
+ return -ENOMEM;
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index ba0c3e4c07d85c..ecdfeff3d44f36 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -479,7 +479,7 @@ static irqreturn_t qedr_irq_handler(int irq, void *handle)
/* The CQ's CNQ notification counter is checked before
* destroying the CQ in a busy-wait loop that waits for all of
* the CQ's CNQ interrupts to be processed. It is increased
- * here, only after the completion handler, to ensure that the
+ * here, only after the completion handler, to ensure that
* the handler is not running when the CQ is destroyed.
*/
cq->cnq_notif++;
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index 07386117f21ad8..277769fa97452c 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -799,12 +799,9 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg,
hwerrs &= ~TXE_PIO_PARITY;
}
- if (!hwerrs) {
- static u32 freeze_cnt;
-
- freeze_cnt++;
+ if (!hwerrs)
qib_6120_clear_freeze(dd);
- } else
+ else
isfatal = 1;
}
diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c
index 6a8148851f21d3..1325110237cd9a 100644
--- a/drivers/infiniband/hw/qib/qib_tx.c
+++ b/drivers/infiniband/hw/qib/qib_tx.c
@@ -82,7 +82,6 @@ int qib_disarm_piobufs_ifneeded(struct qib_ctxtdata *rcd)
struct qib_devdata *dd = rcd->dd;
unsigned i;
unsigned last;
- unsigned n = 0;
last = rcd->pio_base + rcd->piocnt;
/*
@@ -102,10 +101,8 @@ int qib_disarm_piobufs_ifneeded(struct qib_ctxtdata *rcd)
}
spin_lock_irq(&dd->pioavail_lock);
for (i = rcd->pio_base; i < last; i++) {
- if (__test_and_clear_bit(i, dd->pio_need_disarm)) {
- n++;
+ if (__test_and_clear_bit(i, dd->pio_need_disarm))
dd->f_sendctrl(rcd->ppd, QIB_SENDCTRL_DISARM_BUF(i));
- }
}
spin_unlock_irq(&dd->pioavail_lock);
return 0;
diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
index bf2f30d67949dc..9fe03d6ffac1a7 100644
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -851,7 +851,7 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
}
/*
- * This assignment is a bit strange. it's because the
+ * This assignment is a bit strange. it's because
* the pbc counts the number of 32 bit words in the full
* packet _except_ the first word of the pbc itself...
*/
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 51daac5c4feb75..136c2efe34660a 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -187,14 +187,14 @@ static int rxe_newlink(const char *ibdev_name, struct net_device *ndev)
exists = rxe_get_dev_from_net(ndev);
if (exists) {
ib_device_put(&exists->ib_dev);
- pr_err("already configured on %s\n", ndev->name);
+ rxe_dbg(exists, "already configured on %s\n", ndev->name);
err = -EEXIST;
goto err;
}
err = rxe_net_add(ibdev_name, ndev);
if (err) {
- pr_err("failed to add %s\n", ndev->name);
+ rxe_dbg(exists, "failed to add %s\n", ndev->name);
goto err;
}
err:
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index 30fbdf3bc76a34..ab334900fcc3d7 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -38,6 +38,25 @@
#define RXE_ROCE_V2_SPORT (0xc000)
+#define rxe_dbg(rxe, fmt, ...) ibdev_dbg(&(rxe)->ib_dev, \
+ "%s: " fmt, __func__, ##__VA_ARGS__)
+#define rxe_dbg_uc(uc, fmt, ...) ibdev_dbg((uc)->ibuc.device, \
+ "uc#%d %s: " fmt, (uc)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_dbg_pd(pd, fmt, ...) ibdev_dbg((pd)->ibpd.device, \
+ "pd#%d %s: " fmt, (pd)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_dbg_ah(ah, fmt, ...) ibdev_dbg((ah)->ibah.device, \
+ "ah#%d %s: " fmt, (ah)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_dbg_srq(srq, fmt, ...) ibdev_dbg((srq)->ibsrq.device, \
+ "srq#%d %s: " fmt, (srq)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_dbg_qp(qp, fmt, ...) ibdev_dbg((qp)->ibqp.device, \
+ "qp#%d %s: " fmt, (qp)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_dbg_cq(cq, fmt, ...) ibdev_dbg((cq)->ibcq.device, \
+ "cq#%d %s: " fmt, (cq)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_dbg_mr(mr, fmt, ...) ibdev_dbg((mr)->ibmr.device, \
+ "mr#%d %s: " fmt, (mr)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_dbg_mw(mw, fmt, ...) ibdev_dbg((mw)->ibmw.device, \
+ "mw#%d %s: " fmt, (mw)->elem.index, __func__, ##__VA_ARGS__)
+
void rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);
int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name);
diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c
index 3b05314ca739ec..889d7adbd45504 100644
--- a/drivers/infiniband/sw/rxe/rxe_av.c
+++ b/drivers/infiniband/sw/rxe/rxe_av.c
@@ -14,26 +14,45 @@ void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av)
memcpy(av->dmac, attr->roce.dmac, ETH_ALEN);
}
-int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr)
+static int chk_attr(void *obj, struct rdma_ah_attr *attr, bool obj_is_ah)
{
const struct ib_global_route *grh = rdma_ah_read_grh(attr);
struct rxe_port *port;
+ struct rxe_dev *rxe;
+ struct rxe_qp *qp;
+ struct rxe_ah *ah;
int type;
+ if (obj_is_ah) {
+ ah = obj;
+ rxe = to_rdev(ah->ibah.device);
+ } else {
+ qp = obj;
+ rxe = to_rdev(qp->ibqp.device);
+ }
+
port = &rxe->port;
if (rdma_ah_get_ah_flags(attr) & IB_AH_GRH) {
if (grh->sgid_index > port->attr.gid_tbl_len) {
- pr_warn("invalid sgid index = %d\n",
- grh->sgid_index);
+ if (obj_is_ah)
+ rxe_dbg_ah(ah, "invalid sgid index = %d\n",
+ grh->sgid_index);
+ else
+ rxe_dbg_qp(qp, "invalid sgid index = %d\n",
+ grh->sgid_index);
return -EINVAL;
}
type = rdma_gid_attr_network_type(grh->sgid_attr);
if (type < RDMA_NETWORK_IPV4 ||
type > RDMA_NETWORK_IPV6) {
- pr_warn("invalid network type for rdma_rxe = %d\n",
- type);
+ if (obj_is_ah)
+ rxe_dbg_ah(ah, "invalid network type for rdma_rxe = %d\n",
+ type);
+ else
+ rxe_dbg_qp(qp, "invalid network type for rdma_rxe = %d\n",
+ type);
return -EINVAL;
}
}
@@ -41,6 +60,16 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr)
return 0;
}
+int rxe_av_chk_attr(struct rxe_qp *qp, struct rdma_ah_attr *attr)
+{
+ return chk_attr(qp, attr, false);
+}
+
+int rxe_ah_chk_attr(struct rxe_ah *ah, struct rdma_ah_attr *attr)
+{
+ return chk_attr(ah, attr, true);
+}
+
void rxe_av_from_attr(u8 port_num, struct rxe_av *av,
struct rdma_ah_attr *attr)
{
@@ -121,12 +150,12 @@ struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp)
/* only new user provider or kernel client */
ah = rxe_pool_get_index(&pkt->rxe->ah_pool, ah_num);
if (!ah) {
- pr_warn("Unable to find AH matching ah_num\n");
+ rxe_dbg_qp(pkt->qp, "Unable to find AH matching ah_num\n");
return NULL;
}
if (rxe_ah_pd(ah) != pkt->qp->pd) {
- pr_warn("PDs don't match for AH and QP\n");
+ rxe_dbg_qp(pkt->qp, "PDs don't match for AH and QP\n");
rxe_put(ah);
return NULL;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index fb0c008af78cc2..20737fec392bf7 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -104,6 +104,8 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode)
case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV;
case IB_WR_REG_MR: return IB_WC_REG_MR;
case IB_WR_BIND_MW: return IB_WC_BIND_MW;
+ case IB_WR_ATOMIC_WRITE: return IB_WC_ATOMIC_WRITE;
+ case IB_WR_FLUSH: return IB_WC_FLUSH;
default:
return 0xff;
@@ -114,11 +116,11 @@ void retransmit_timer(struct timer_list *t)
{
struct rxe_qp *qp = from_timer(qp, t, retrans_timer);
- pr_debug("%s: fired for qp#%d\n", __func__, qp->elem.index);
+ rxe_dbg_qp(qp, "retransmit timer fired\n");
if (qp->valid) {
qp->comp.timeout = 1;
- rxe_run_task(&qp->comp.task, 1);
+ rxe_sched_task(&qp->comp.task);
}
}
@@ -132,7 +134,10 @@ void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
if (must_sched != 0)
rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_COMPLETER_SCHED);
- rxe_run_task(&qp->comp.task, must_sched);
+ if (must_sched)
+ rxe_sched_task(&qp->comp.task);
+ else
+ rxe_run_task(&qp->comp.task);
}
static inline enum comp_state get_wqe(struct rxe_qp *qp,
@@ -200,6 +205,10 @@ static inline enum comp_state check_psn(struct rxe_qp *qp,
*/
if (pkt->psn == wqe->last_psn)
return COMPST_COMP_ACK;
+ else if (pkt->opcode == IB_OPCODE_RC_ACKNOWLEDGE &&
+ (qp->comp.opcode == IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST ||
+ qp->comp.opcode == IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE))
+ return COMPST_CHECK_ACK;
else
return COMPST_DONE;
} else if ((diff > 0) && (wqe->mask & WR_ATOMIC_OR_READ_MASK)) {
@@ -228,6 +237,10 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST:
case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
+ /* Check NAK code to handle a remote error */
+ if (pkt->opcode == IB_OPCODE_RC_ACKNOWLEDGE)
+ break;
+
if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE &&
pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) {
/* read retries of partial data may restart from
@@ -258,12 +271,16 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
if ((syn & AETH_TYPE_MASK) != AETH_ACK)
return COMPST_ERROR;
+ if (wqe->wr.opcode == IB_WR_ATOMIC_WRITE)
+ return COMPST_WRITE_SEND;
+
fallthrough;
/* (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE doesn't have an AETH)
*/
case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
if (wqe->wr.opcode != IB_WR_RDMA_READ &&
- wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) {
+ wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV &&
+ wqe->wr.opcode != IB_WR_FLUSH) {
wqe->status = IB_WC_FATAL_ERR;
return COMPST_ERROR;
}
@@ -305,7 +322,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
qp->comp.psn = pkt->psn;
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
- rxe_run_task(&qp->req.task, 0);
+ rxe_run_task(&qp->req.task);
}
}
return COMPST_ERROR_RETRY;
@@ -323,7 +340,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
return COMPST_ERROR;
default:
- pr_warn("unexpected nak %x\n", syn);
+ rxe_dbg_qp(qp, "unexpected nak %x\n", syn);
wqe->status = IB_WC_REM_OP_ERR;
return COMPST_ERROR;
}
@@ -334,7 +351,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
break;
default:
- pr_warn("unexpected opcode\n");
+ rxe_dbg_qp(qp, "unexpected opcode\n");
}
return COMPST_ERROR;
@@ -452,7 +469,7 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
*/
if (qp->req.wait_fence) {
qp->req.wait_fence = 0;
- rxe_run_task(&qp->req.task, 0);
+ rxe_run_task(&qp->req.task);
}
}
@@ -466,7 +483,7 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp,
if (qp->req.need_rd_atomic) {
qp->comp.timeout_retry = 0;
qp->req.need_rd_atomic = 0;
- rxe_run_task(&qp->req.task, 0);
+ rxe_run_task(&qp->req.task);
}
}
@@ -512,7 +529,7 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp,
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
- rxe_run_task(&qp->req.task, 1);
+ rxe_sched_task(&qp->req.task);
}
}
@@ -587,8 +604,7 @@ int rxe_completer(void *arg)
state = COMPST_GET_ACK;
while (1) {
- pr_debug("qp#%d state = %s\n", qp_num(qp),
- comp_state_name[state]);
+ rxe_dbg_qp(qp, "state = %s\n", comp_state_name[state]);
switch (state) {
case COMPST_GET_ACK:
skb = skb_dequeue(&qp->resp_pkts);
@@ -646,7 +662,7 @@ int rxe_completer(void *arg)
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
- rxe_run_task(&qp->req.task, 1);
+ rxe_sched_task(&qp->req.task);
}
state = COMPST_DONE;
@@ -714,7 +730,7 @@ int rxe_completer(void *arg)
RXE_CNT_COMP_RETRY);
qp->req.need_retry = 1;
qp->comp.started_retry = 1;
- rxe_run_task(&qp->req.task, 0);
+ rxe_run_task(&qp->req.task);
}
goto done;
@@ -735,8 +751,7 @@ int rxe_completer(void *arg)
* rnr timer has fired
*/
qp->req.wait_for_rnr_timer = 1;
- pr_debug("qp#%d set rnr nak timer\n",
- qp_num(qp));
+ rxe_dbg_qp(qp, "set rnr nak timer\n");
mod_timer(&qp->rnr_nak_timer,
jiffies + rnrnak_jiffies(aeth_syn(pkt)
& ~AETH_TYPE_MASK));
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index b1a0ab3cd4bd18..1df186534639aa 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -14,12 +14,12 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
int count;
if (cqe <= 0) {
- pr_warn("cqe(%d) <= 0\n", cqe);
+ rxe_dbg(rxe, "cqe(%d) <= 0\n", cqe);
goto err1;
}
if (cqe > rxe->attr.max_cqe) {
- pr_debug("cqe(%d) > max_cqe(%d)\n",
+ rxe_dbg(rxe, "cqe(%d) > max_cqe(%d)\n",
cqe, rxe->attr.max_cqe);
goto err1;
}
@@ -27,7 +27,7 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
if (cq) {
count = queue_count(cq->queue, QUEUE_TYPE_TO_CLIENT);
if (cqe < count) {
- pr_debug("cqe(%d) < current # elements in queue (%d)",
+ rxe_dbg_cq(cq, "cqe(%d) < current # elements in queue (%d)",
cqe, count);
goto err1;
}
@@ -65,7 +65,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
cq->queue = rxe_queue_init(rxe, &cqe,
sizeof(struct rxe_cqe), type);
if (!cq->queue) {
- pr_warn("unable to create cq\n");
+ rxe_dbg(rxe, "unable to create cq\n");
return -ENOMEM;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h
index e432f9e37795ef..46f82b27fcd2f5 100644
--- a/drivers/infiniband/sw/rxe/rxe_hdr.h
+++ b/drivers/infiniband/sw/rxe/rxe_hdr.h
@@ -608,6 +608,52 @@ static inline void reth_set_len(struct rxe_pkt_info *pkt, u32 len)
}
/******************************************************************************
+ * FLUSH Extended Transport Header
+ ******************************************************************************/
+
+struct rxe_feth {
+ __be32 bits;
+};
+
+#define FETH_PLT_MASK (0x0000000f) /* bits 3-0 */
+#define FETH_SEL_MASK (0x00000030) /* bits 5-4 */
+#define FETH_SEL_SHIFT (4U)
+
+static inline u32 __feth_plt(void *arg)
+{
+ struct rxe_feth *feth = arg;
+
+ return be32_to_cpu(feth->bits) & FETH_PLT_MASK;
+}
+
+static inline u32 __feth_sel(void *arg)
+{
+ struct rxe_feth *feth = arg;
+
+ return (be32_to_cpu(feth->bits) & FETH_SEL_MASK) >> FETH_SEL_SHIFT;
+}
+
+static inline u32 feth_plt(struct rxe_pkt_info *pkt)
+{
+ return __feth_plt(pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_FETH]);
+}
+
+static inline u32 feth_sel(struct rxe_pkt_info *pkt)
+{
+ return __feth_sel(pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_FETH]);
+}
+
+static inline void feth_init(struct rxe_pkt_info *pkt, u8 type, u8 level)
+{
+ struct rxe_feth *feth = (struct rxe_feth *)
+ (pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_FETH]);
+ u32 bits = ((level << FETH_SEL_SHIFT) & FETH_SEL_MASK) |
+ (type & FETH_PLT_MASK);
+
+ feth->bits = cpu_to_be32(bits);
+}
+
+/******************************************************************************
* Atomic Extended Transport Header
******************************************************************************/
struct rxe_atmeth {
@@ -742,7 +788,6 @@ enum aeth_syndrome {
AETH_NAK_INVALID_REQ = 0x61,
AETH_NAK_REM_ACC_ERR = 0x62,
AETH_NAK_REM_OP_ERR = 0x63,
- AETH_NAK_INV_RD_REQ = 0x64,
};
static inline u8 __aeth_syn(void *arg)
@@ -910,6 +955,7 @@ enum rxe_hdr_length {
RXE_ATMETH_BYTES = sizeof(struct rxe_atmeth),
RXE_IETH_BYTES = sizeof(struct rxe_ieth),
RXE_RDETH_BYTES = sizeof(struct rxe_rdeth),
+ RXE_FETH_BYTES = sizeof(struct rxe_feth),
};
static inline size_t header_size(struct rxe_pkt_info *pkt)
diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c
index 46bb07c5c4df25..71bc2c1895888f 100644
--- a/drivers/infiniband/sw/rxe/rxe_icrc.c
+++ b/drivers/infiniband/sw/rxe/rxe_icrc.c
@@ -21,7 +21,7 @@ int rxe_icrc_init(struct rxe_dev *rxe)
tfm = crypto_alloc_shash("crc32", 0, 0);
if (IS_ERR(tfm)) {
- pr_warn("failed to init crc32 algorithm err:%ld\n",
+ rxe_dbg(rxe, "failed to init crc32 algorithm err: %ld\n",
PTR_ERR(tfm));
return PTR_ERR(tfm);
}
@@ -51,7 +51,7 @@ static __be32 rxe_crc32(struct rxe_dev *rxe, __be32 crc, void *next, size_t len)
*(__be32 *)shash_desc_ctx(shash) = crc;
err = crypto_shash_update(shash, next, len);
if (unlikely(err)) {
- pr_warn_ratelimited("failed crc calculation, err: %d\n", err);
+ rxe_dbg(rxe, "failed crc calculation, err: %d\n", err);
return (__force __be32)crc32_le((__force u32)crc, next, len);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index c2a5c8814a48bb..948ce4902b10fe 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -9,16 +9,12 @@
/* rxe_av.c */
void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av);
-
-int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr);
-
+int rxe_av_chk_attr(struct rxe_qp *qp, struct rdma_ah_attr *attr);
+int rxe_ah_chk_attr(struct rxe_ah *ah, struct rdma_ah_attr *attr);
void rxe_av_from_attr(u8 port_num, struct rxe_av *av,
struct rdma_ah_attr *attr);
-
void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr);
-
void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr);
-
struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp);
/* rxe_cq.c */
@@ -68,6 +64,7 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr);
int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
int access, struct rxe_mr *mr);
int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr);
+int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, int length);
int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
enum rxe_mr_copy_dir dir);
int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,
diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c
index 9149b609542960..a47d72dbc5376d 100644
--- a/drivers/infiniband/sw/rxe/rxe_mmap.c
+++ b/drivers/infiniband/sw/rxe/rxe_mmap.c
@@ -79,7 +79,7 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
/* Don't allow a mmap larger than the object. */
if (size > ip->info.size) {
- pr_err("mmap region is larger than the object!\n");
+ rxe_dbg(rxe, "mmap region is larger than the object!\n");
spin_unlock_bh(&rxe->pending_lock);
ret = -EINVAL;
goto done;
@@ -87,7 +87,7 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
goto found_it;
}
- pr_warn("unable to find pending mmap info\n");
+ rxe_dbg(rxe, "unable to find pending mmap info\n");
spin_unlock_bh(&rxe->pending_lock);
ret = -EINVAL;
goto done;
@@ -98,7 +98,7 @@ found_it:
ret = remap_vmalloc_range(vma, ip->obj, 0);
if (ret) {
- pr_err("err %d from remap_vmalloc_range\n", ret);
+ rxe_dbg(rxe, "err %d from remap_vmalloc_range\n", ret);
goto done;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 502e9ada99b307..072eac4b65d296 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -4,6 +4,8 @@
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*/
+#include <linux/libnvdimm.h>
+
#include "rxe.h"
#include "rxe_loc.h"
@@ -26,7 +28,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
{
- switch (mr->type) {
+ switch (mr->ibmr.type) {
case IB_MR_TYPE_DMA:
return 0;
@@ -38,8 +40,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
return 0;
default:
- pr_warn("%s: mr type (%d) not supported\n",
- __func__, mr->type);
+ rxe_dbg_mr(mr, "type (%d) not supported\n", mr->ibmr.type);
return -EFAULT;
}
}
@@ -62,7 +63,6 @@ static void rxe_mr_init(int access, struct rxe_mr *mr)
mr->rkey = mr->ibmr.rkey = rkey;
mr->state = RXE_MR_STATE_INVALID;
- mr->map_shift = ilog2(RXE_BUF_PER_MAP);
}
static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
@@ -99,6 +99,7 @@ err2:
kfree(mr->map[i]);
kfree(mr->map);
+ mr->map = NULL;
err1:
return -ENOMEM;
}
@@ -109,7 +110,16 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr)
mr->access = access;
mr->state = RXE_MR_STATE_VALID;
- mr->type = IB_MR_TYPE_DMA;
+ mr->ibmr.type = IB_MR_TYPE_DMA;
+}
+
+static bool is_pmem_page(struct page *pg)
+{
+ unsigned long paddr = page_to_phys(pg);
+
+ return REGION_INTERSECTS ==
+ region_intersects(paddr, PAGE_SIZE, IORESOURCE_MEM,
+ IORES_DESC_PERSISTENT_MEMORY);
}
int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
@@ -122,12 +132,11 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
int num_buf;
void *vaddr;
int err;
- int i;
umem = ib_umem_get(&rxe->ib_dev, start, length, access);
if (IS_ERR(umem)) {
- pr_warn("%s: Unable to pin memory region err = %d\n",
- __func__, (int)PTR_ERR(umem));
+ rxe_dbg_mr(mr, "Unable to pin memory region err = %d\n",
+ (int)PTR_ERR(umem));
err = PTR_ERR(umem);
goto err_out;
}
@@ -138,8 +147,7 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
err = rxe_mr_alloc(mr, num_buf);
if (err) {
- pr_warn("%s: Unable to allocate memory for map\n",
- __func__);
+ rxe_dbg_mr(mr, "Unable to allocate memory for map\n");
goto err_release_umem;
}
@@ -149,23 +157,30 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
num_buf = 0;
map = mr->map;
if (length > 0) {
- buf = map[0]->buf;
+ bool persistent_access = access & IB_ACCESS_FLUSH_PERSISTENT;
+ buf = map[0]->buf;
for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
+ struct page *pg = sg_page_iter_page(&sg_iter);
+
+ if (persistent_access && !is_pmem_page(pg)) {
+ rxe_dbg_mr(mr, "Unable to register persistent access to non-pmem device\n");
+ err = -EINVAL;
+ goto err_release_umem;
+ }
+
if (num_buf >= RXE_BUF_PER_MAP) {
map++;
buf = map[0]->buf;
num_buf = 0;
}
- vaddr = page_address(sg_page_iter_page(&sg_iter));
+ vaddr = page_address(pg);
if (!vaddr) {
- pr_warn("%s: Unable to get virtual address\n",
- __func__);
+ rxe_dbg_mr(mr, "Unable to get virtual address\n");
err = -ENOMEM;
- goto err_cleanup_map;
+ goto err_release_umem;
}
-
buf->addr = (uintptr_t)vaddr;
buf->size = PAGE_SIZE;
num_buf++;
@@ -178,14 +193,11 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
mr->access = access;
mr->offset = ib_umem_offset(umem);
mr->state = RXE_MR_STATE_VALID;
- mr->type = IB_MR_TYPE_USER;
+ mr->ibmr.type = IB_MR_TYPE_USER;
+ mr->ibmr.page_size = PAGE_SIZE;
return 0;
-err_cleanup_map:
- for (i = 0; i < mr->num_map; i++)
- kfree(mr->map[i]);
- kfree(mr->map);
err_release_umem:
ib_umem_release(umem);
err_out:
@@ -205,7 +217,7 @@ int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr)
mr->max_buf = max_pages;
mr->state = RXE_MR_STATE_FREE;
- mr->type = IB_MR_TYPE_MEM_REG;
+ mr->ibmr.type = IB_MR_TYPE_MEM_REG;
return 0;
@@ -256,7 +268,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
void *addr;
if (mr->state != RXE_MR_STATE_VALID) {
- pr_warn("mr not in valid state\n");
+ rxe_dbg_mr(mr, "Not in valid state\n");
addr = NULL;
goto out;
}
@@ -267,7 +279,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
}
if (mr_check_range(mr, iova, length)) {
- pr_warn("range violation\n");
+ rxe_dbg_mr(mr, "Range violation\n");
addr = NULL;
goto out;
}
@@ -275,7 +287,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
lookup_iova(mr, iova, &m, &n, &offset);
if (offset + length > mr->map[m]->buf[n].size) {
- pr_warn("crosses page boundary\n");
+ rxe_dbg_mr(mr, "Crosses page boundary\n");
addr = NULL;
goto out;
}
@@ -286,6 +298,39 @@ out:
return addr;
}
+int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, int length)
+{
+ size_t offset;
+
+ if (length == 0)
+ return 0;
+
+ if (mr->ibmr.type == IB_MR_TYPE_DMA)
+ return -EFAULT;
+
+ offset = (iova - mr->ibmr.iova + mr->offset) & mr->page_mask;
+ while (length > 0) {
+ u8 *va;
+ int bytes;
+
+ bytes = mr->ibmr.page_size - offset;
+ if (bytes > length)
+ bytes = length;
+
+ va = iova_to_vaddr(mr, iova, length);
+ if (!va)
+ return -EFAULT;
+
+ arch_wb_cache_pmem(va, bytes);
+
+ length -= bytes;
+ iova += bytes;
+ offset = 0;
+ }
+
+ return 0;
+}
+
/* copy data from a range (vaddr, vaddr+length-1) to or from
* a mr object starting at iova.
*/
@@ -304,7 +349,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
if (length == 0)
return 0;
- if (mr->type == IB_MR_TYPE_DMA) {
+ if (mr->ibmr.type == IB_MR_TYPE_DMA) {
u8 *src, *dest;
src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
@@ -511,7 +556,7 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) ||
(type == RXE_LOOKUP_REMOTE && mr->rkey != key) ||
- mr_pd(mr) != pd || (access && !(access & mr->access)) ||
+ mr_pd(mr) != pd || ((access & mr->access) != access) ||
mr->state != RXE_MR_STATE_VALID)) {
rxe_put(mr);
mr = NULL;
@@ -528,27 +573,26 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8);
if (!mr) {
- pr_err("%s: No MR for key %#x\n", __func__, key);
+ rxe_dbg_qp(qp, "No MR for key %#x\n", key);
ret = -EINVAL;
goto err;
}
if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) {
- pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n",
- __func__, key, (mr->rkey ? mr->rkey : mr->lkey));
+ rxe_dbg_mr(mr, "wr key (%#x) doesn't match mr key (%#x)\n",
+ key, (mr->rkey ? mr->rkey : mr->lkey));
ret = -EINVAL;
goto err_drop_ref;
}
if (atomic_read(&mr->num_mw) > 0) {
- pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n",
- __func__);
+ rxe_dbg_mr(mr, "Attempt to invalidate an MR while bound to MWs\n");
ret = -EINVAL;
goto err_drop_ref;
}
- if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) {
- pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type);
+ if (unlikely(mr->ibmr.type != IB_MR_TYPE_MEM_REG)) {
+ rxe_dbg_mr(mr, "Type (%d) is wrong\n", mr->ibmr.type);
ret = -EINVAL;
goto err_drop_ref;
}
@@ -577,22 +621,20 @@ int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
/* user can only register MR in free state */
if (unlikely(mr->state != RXE_MR_STATE_FREE)) {
- pr_warn("%s: mr->lkey = 0x%x not free\n",
- __func__, mr->lkey);
+ rxe_dbg_mr(mr, "mr->lkey = 0x%x not free\n", mr->lkey);
return -EINVAL;
}
/* user can only register mr with qp in same protection domain */
if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) {
- pr_warn("%s: qp->pd and mr->pd don't match\n",
- __func__);
+ rxe_dbg_mr(mr, "qp->pd and mr->pd don't match\n");
return -EINVAL;
}
/* user is only allowed to change key portion of l/rkey */
if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) {
- pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n",
- __func__, key, mr->lkey);
+ rxe_dbg_mr(mr, "key = 0x%x has wrong index mr->lkey = 0x%x\n",
+ key, mr->lkey);
return -EINVAL;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
index 902b7df7aaedb6..afa5ce1a711667 100644
--- a/drivers/infiniband/sw/rxe/rxe_mw.c
+++ b/drivers/infiniband/sw/rxe/rxe_mw.c
@@ -52,14 +52,14 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
{
if (mw->ibmw.type == IB_MW_TYPE_1) {
if (unlikely(mw->state != RXE_MW_STATE_VALID)) {
- pr_err_once(
+ rxe_dbg_mw(mw,
"attempt to bind a type 1 MW not in the valid state\n");
return -EINVAL;
}
/* o10-36.2.2 */
if (unlikely((mw->access & IB_ZERO_BASED))) {
- pr_err_once("attempt to bind a zero based type 1 MW\n");
+ rxe_dbg_mw(mw, "attempt to bind a zero based type 1 MW\n");
return -EINVAL;
}
}
@@ -67,21 +67,21 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
if (mw->ibmw.type == IB_MW_TYPE_2) {
/* o10-37.2.30 */
if (unlikely(mw->state != RXE_MW_STATE_FREE)) {
- pr_err_once(
+ rxe_dbg_mw(mw,
"attempt to bind a type 2 MW not in the free state\n");
return -EINVAL;
}
/* C10-72 */
if (unlikely(qp->pd != to_rpd(mw->ibmw.pd))) {
- pr_err_once(
+ rxe_dbg_mw(mw,
"attempt to bind type 2 MW with qp with different PD\n");
return -EINVAL;
}
/* o10-37.2.40 */
if (unlikely(!mr || wqe->wr.wr.mw.length == 0)) {
- pr_err_once(
+ rxe_dbg_mw(mw,
"attempt to invalidate type 2 MW by binding with NULL or zero length MR\n");
return -EINVAL;
}
@@ -92,13 +92,13 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
return 0;
if (unlikely(mr->access & IB_ZERO_BASED)) {
- pr_err_once("attempt to bind MW to zero based MR\n");
+ rxe_dbg_mw(mw, "attempt to bind MW to zero based MR\n");
return -EINVAL;
}
/* C10-73 */
if (unlikely(!(mr->access & IB_ACCESS_MW_BIND))) {
- pr_err_once(
+ rxe_dbg_mw(mw,
"attempt to bind an MW to an MR without bind access\n");
return -EINVAL;
}
@@ -107,7 +107,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
if (unlikely((mw->access &
(IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC)) &&
!(mr->access & IB_ACCESS_LOCAL_WRITE))) {
- pr_err_once(
+ rxe_dbg_mw(mw,
"attempt to bind an Writable MW to an MR without local write access\n");
return -EINVAL;
}
@@ -115,7 +115,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
/* C10-75 */
if (mw->access & IB_ZERO_BASED) {
if (unlikely(wqe->wr.wr.mw.length > mr->ibmr.length)) {
- pr_err_once(
+ rxe_dbg_mw(mw,
"attempt to bind a ZB MW outside of the MR\n");
return -EINVAL;
}
@@ -123,7 +123,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
if (unlikely((wqe->wr.wr.mw.addr < mr->ibmr.iova) ||
((wqe->wr.wr.mw.addr + wqe->wr.wr.mw.length) >
(mr->ibmr.iova + mr->ibmr.length)))) {
- pr_err_once(
+ rxe_dbg_mw(mw,
"attempt to bind a VA MW outside of the MR\n");
return -EINVAL;
}
@@ -293,8 +293,7 @@ struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey)
if (unlikely((mw->rkey != rkey) || rxe_mw_pd(mw) != pd ||
(mw->ibmw.type == IB_MW_TYPE_2 && mw->qp != qp) ||
- (mw->length == 0) ||
- (access && !(access & mw->access)) ||
+ (mw->length == 0) || ((access & mw->access) != access) ||
mw->state != RXE_MW_STATE_VALID)) {
rxe_put(mw);
return NULL;
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 35f327b9d4b8ec..e02e1624bcf4db 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -20,9 +20,10 @@
static struct rxe_recv_sockets recv_sockets;
-static struct dst_entry *rxe_find_route4(struct net_device *ndev,
- struct in_addr *saddr,
- struct in_addr *daddr)
+static struct dst_entry *rxe_find_route4(struct rxe_qp *qp,
+ struct net_device *ndev,
+ struct in_addr *saddr,
+ struct in_addr *daddr)
{
struct rtable *rt;
struct flowi4 fl = { { 0 } };
@@ -35,7 +36,7 @@ static struct dst_entry *rxe_find_route4(struct net_device *ndev,
rt = ip_route_output_key(&init_net, &fl);
if (IS_ERR(rt)) {
- pr_err_ratelimited("no route to %pI4\n", &daddr->s_addr);
+ rxe_dbg_qp(qp, "no route to %pI4\n", &daddr->s_addr);
return NULL;
}
@@ -43,7 +44,8 @@ static struct dst_entry *rxe_find_route4(struct net_device *ndev,
}
#if IS_ENABLED(CONFIG_IPV6)
-static struct dst_entry *rxe_find_route6(struct net_device *ndev,
+static struct dst_entry *rxe_find_route6(struct rxe_qp *qp,
+ struct net_device *ndev,
struct in6_addr *saddr,
struct in6_addr *daddr)
{
@@ -60,12 +62,12 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev,
recv_sockets.sk6->sk, &fl6,
NULL);
if (IS_ERR(ndst)) {
- pr_err_ratelimited("no route to %pI6\n", daddr);
+ rxe_dbg_qp(qp, "no route to %pI6\n", daddr);
return NULL;
}
if (unlikely(ndst->error)) {
- pr_err("no route to %pI6\n", daddr);
+ rxe_dbg_qp(qp, "no route to %pI6\n", daddr);
goto put;
}
@@ -77,7 +79,8 @@ put:
#else
-static struct dst_entry *rxe_find_route6(struct net_device *ndev,
+static struct dst_entry *rxe_find_route6(struct rxe_qp *qp,
+ struct net_device *ndev,
struct in6_addr *saddr,
struct in6_addr *daddr)
{
@@ -105,14 +108,14 @@ static struct dst_entry *rxe_find_route(struct net_device *ndev,
saddr = &av->sgid_addr._sockaddr_in.sin_addr;
daddr = &av->dgid_addr._sockaddr_in.sin_addr;
- dst = rxe_find_route4(ndev, saddr, daddr);
+ dst = rxe_find_route4(qp, ndev, saddr, daddr);
} else if (av->network_type == RXE_NETWORK_TYPE_IPV6) {
struct in6_addr *saddr6;
struct in6_addr *daddr6;
saddr6 = &av->sgid_addr._sockaddr_in6.sin6_addr;
daddr6 = &av->dgid_addr._sockaddr_in6.sin6_addr;
- dst = rxe_find_route6(ndev, saddr6, daddr6);
+ dst = rxe_find_route6(qp, ndev, saddr6, daddr6);
#if IS_ENABLED(CONFIG_IPV6)
if (dst)
qp->dst_cookie =
@@ -282,7 +285,7 @@ static int prepare4(struct rxe_av *av, struct rxe_pkt_info *pkt,
dst = rxe_find_route(skb->dev, qp, av);
if (!dst) {
- pr_err("Host not reachable\n");
+ rxe_dbg_qp(qp, "Host not reachable\n");
return -EHOSTUNREACH;
}
@@ -306,7 +309,7 @@ static int prepare6(struct rxe_av *av, struct rxe_pkt_info *pkt,
dst = rxe_find_route(skb->dev, qp, av);
if (!dst) {
- pr_err("Host not reachable\n");
+ rxe_dbg_qp(qp, "Host not reachable\n");
return -EHOSTUNREACH;
}
@@ -345,7 +348,7 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb)
if (unlikely(qp->need_req_skb &&
skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW))
- rxe_run_task(&qp->req.task, 1);
+ rxe_sched_task(&qp->req.task);
rxe_put(qp);
}
@@ -365,7 +368,8 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
} else if (skb->protocol == htons(ETH_P_IPV6)) {
err = ip6_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
} else {
- pr_err("Unknown layer 3 protocol: %d\n", skb->protocol);
+ rxe_dbg_qp(pkt->qp, "Unknown layer 3 protocol: %d\n",
+ skb->protocol);
atomic_dec(&pkt->qp->skb_out);
rxe_put(pkt->qp);
kfree_skb(skb);
@@ -373,7 +377,7 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
}
if (unlikely(net_xmit_eval(err))) {
- pr_debug("error sending packet: %d\n", err);
+ rxe_dbg_qp(pkt->qp, "error sending packet: %d\n", err);
return -EAGAIN;
}
@@ -411,7 +415,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
if ((is_request && (qp->req.state != QP_STATE_READY)) ||
(!is_request && (qp->resp.state != QP_STATE_READY))) {
- pr_info("Packet dropped. QP is not in ready state\n");
+ rxe_dbg_qp(qp, "Packet dropped. QP is not in ready state\n");
goto drop;
}
@@ -429,7 +433,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
if ((qp_type(qp) != IB_QPT_RC) &&
(pkt->mask & RXE_END_MASK)) {
pkt->wqe->state = wqe_state_done;
- rxe_run_task(&qp->comp.task, 1);
+ rxe_sched_task(&qp->comp.task);
}
rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS);
@@ -592,7 +596,7 @@ static int rxe_notify(struct notifier_block *not_blk,
rxe_port_down(rxe);
break;
case NETDEV_CHANGEMTU:
- pr_info("%s changed mtu to %d\n", ndev->name, ndev->mtu);
+ rxe_dbg(rxe, "%s changed mtu to %d\n", ndev->name, ndev->mtu);
rxe_set_mtu(rxe, ndev->mtu);
break;
case NETDEV_CHANGE:
@@ -604,7 +608,7 @@ static int rxe_notify(struct notifier_block *not_blk,
case NETDEV_CHANGENAME:
case NETDEV_FEAT_CHANGE:
default:
- pr_info("ignoring netdev event = %ld for %s\n",
+ rxe_dbg(rxe, "ignoring netdev event = %ld for %s\n",
event, ndev->name);
break;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c
index d4ba4d506f1760..5c0d5c6ffda4f0 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.c
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.c
@@ -101,6 +101,18 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
[IB_QPT_UC] = WR_LOCAL_OP_MASK,
},
},
+ [IB_WR_FLUSH] = {
+ .name = "IB_WR_FLUSH",
+ .mask = {
+ [IB_QPT_RC] = WR_FLUSH_MASK,
+ },
+ },
+ [IB_WR_ATOMIC_WRITE] = {
+ .name = "IB_WR_ATOMIC_WRITE",
+ .mask = {
+ [IB_QPT_RC] = WR_ATOMIC_WRITE_MASK,
+ },
+ },
};
struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
@@ -378,6 +390,29 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
RXE_IETH_BYTES,
}
},
+ [IB_OPCODE_RC_FLUSH] = {
+ .name = "IB_OPCODE_RC_FLUSH",
+ .mask = RXE_FETH_MASK | RXE_RETH_MASK | RXE_FLUSH_MASK |
+ RXE_START_MASK | RXE_END_MASK | RXE_REQ_MASK,
+ .length = RXE_BTH_BYTES + RXE_FETH_BYTES + RXE_RETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_FETH] = RXE_BTH_BYTES,
+ [RXE_RETH] = RXE_BTH_BYTES + RXE_FETH_BYTES,
+ }
+ },
+ [IB_OPCODE_RC_ATOMIC_WRITE] = {
+ .name = "IB_OPCODE_RC_ATOMIC_WRITE",
+ .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK |
+ RXE_ATOMIC_WRITE_MASK | RXE_START_MASK |
+ RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_RETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RETH] = RXE_BTH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES + RXE_RETH_BYTES,
+ }
+ },
/* UC */
[IB_OPCODE_UC_SEND_FIRST] = {
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h
index 8f9aaaf260f293..cea4e0a639199c 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.h
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.h
@@ -20,6 +20,8 @@ enum rxe_wr_mask {
WR_READ_MASK = BIT(3),
WR_WRITE_MASK = BIT(4),
WR_LOCAL_OP_MASK = BIT(5),
+ WR_FLUSH_MASK = BIT(6),
+ WR_ATOMIC_WRITE_MASK = BIT(7),
WR_READ_OR_WRITE_MASK = WR_READ_MASK | WR_WRITE_MASK,
WR_WRITE_OR_SEND_MASK = WR_WRITE_MASK | WR_SEND_MASK,
@@ -47,6 +49,7 @@ enum rxe_hdr_type {
RXE_RDETH,
RXE_DETH,
RXE_IMMDT,
+ RXE_FETH,
RXE_PAYLOAD,
NUM_HDR_TYPES
};
@@ -63,6 +66,7 @@ enum rxe_hdr_mask {
RXE_IETH_MASK = BIT(RXE_IETH),
RXE_RDETH_MASK = BIT(RXE_RDETH),
RXE_DETH_MASK = BIT(RXE_DETH),
+ RXE_FETH_MASK = BIT(RXE_FETH),
RXE_PAYLOAD_MASK = BIT(RXE_PAYLOAD),
RXE_REQ_MASK = BIT(NUM_HDR_TYPES + 0),
@@ -71,16 +75,19 @@ enum rxe_hdr_mask {
RXE_WRITE_MASK = BIT(NUM_HDR_TYPES + 3),
RXE_READ_MASK = BIT(NUM_HDR_TYPES + 4),
RXE_ATOMIC_MASK = BIT(NUM_HDR_TYPES + 5),
+ RXE_FLUSH_MASK = BIT(NUM_HDR_TYPES + 6),
- RXE_RWR_MASK = BIT(NUM_HDR_TYPES + 6),
- RXE_COMP_MASK = BIT(NUM_HDR_TYPES + 7),
+ RXE_RWR_MASK = BIT(NUM_HDR_TYPES + 7),
+ RXE_COMP_MASK = BIT(NUM_HDR_TYPES + 8),
- RXE_START_MASK = BIT(NUM_HDR_TYPES + 8),
- RXE_MIDDLE_MASK = BIT(NUM_HDR_TYPES + 9),
- RXE_END_MASK = BIT(NUM_HDR_TYPES + 10),
+ RXE_START_MASK = BIT(NUM_HDR_TYPES + 9),
+ RXE_MIDDLE_MASK = BIT(NUM_HDR_TYPES + 10),
+ RXE_END_MASK = BIT(NUM_HDR_TYPES + 11),
RXE_LOOPBACK_MASK = BIT(NUM_HDR_TYPES + 12),
+ RXE_ATOMIC_WRITE_MASK = BIT(NUM_HDR_TYPES + 14),
+
RXE_READ_OR_ATOMIC_MASK = (RXE_READ_MASK | RXE_ATOMIC_MASK),
RXE_WRITE_OR_SEND_MASK = (RXE_WRITE_MASK | RXE_SEND_MASK),
RXE_READ_OR_WRITE_MASK = (RXE_READ_MASK | RXE_WRITE_MASK),
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index 86c7a8bf3cbbd8..a754fc902e3d19 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -51,7 +51,14 @@ enum rxe_device_param {
| IB_DEVICE_SRQ_RESIZE
| IB_DEVICE_MEM_MGT_EXTENSIONS
| IB_DEVICE_MEM_WINDOW
+ | IB_DEVICE_FLUSH_GLOBAL
+ | IB_DEVICE_FLUSH_PERSISTENT
+#ifdef CONFIG_64BIT
+ | IB_DEVICE_MEM_WINDOW_TYPE_2B
+ | IB_DEVICE_ATOMIC_WRITE,
+#else
| IB_DEVICE_MEM_WINDOW_TYPE_2B,
+#endif /* CONFIG_64BIT */
RXE_MAX_SGE = 32,
RXE_MAX_WQE_SIZE = sizeof(struct rxe_send_wqe) +
sizeof(struct ib_sge) * RXE_MAX_SGE,
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index a62bab88415cbf..ab72db68b58f69 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -19,33 +19,33 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap,
int has_srq)
{
if (cap->max_send_wr > rxe->attr.max_qp_wr) {
- pr_debug("invalid send wr = %u > %d\n",
+ rxe_dbg(rxe, "invalid send wr = %u > %d\n",
cap->max_send_wr, rxe->attr.max_qp_wr);
goto err1;
}
if (cap->max_send_sge > rxe->attr.max_send_sge) {
- pr_debug("invalid send sge = %u > %d\n",
+ rxe_dbg(rxe, "invalid send sge = %u > %d\n",
cap->max_send_sge, rxe->attr.max_send_sge);
goto err1;
}
if (!has_srq) {
if (cap->max_recv_wr > rxe->attr.max_qp_wr) {
- pr_debug("invalid recv wr = %u > %d\n",
+ rxe_dbg(rxe, "invalid recv wr = %u > %d\n",
cap->max_recv_wr, rxe->attr.max_qp_wr);
goto err1;
}
if (cap->max_recv_sge > rxe->attr.max_recv_sge) {
- pr_debug("invalid recv sge = %u > %d\n",
+ rxe_dbg(rxe, "invalid recv sge = %u > %d\n",
cap->max_recv_sge, rxe->attr.max_recv_sge);
goto err1;
}
}
if (cap->max_inline_data > rxe->max_inline_data) {
- pr_debug("invalid max inline data = %u > %d\n",
+ rxe_dbg(rxe, "invalid max inline data = %u > %d\n",
cap->max_inline_data, rxe->max_inline_data);
goto err1;
}
@@ -73,7 +73,7 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
}
if (!init->recv_cq || !init->send_cq) {
- pr_debug("missing cq\n");
+ rxe_dbg(rxe, "missing cq\n");
goto err1;
}
@@ -82,14 +82,14 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
if (init->qp_type == IB_QPT_GSI) {
if (!rdma_is_port_valid(&rxe->ib_dev, port_num)) {
- pr_debug("invalid port = %d\n", port_num);
+ rxe_dbg(rxe, "invalid port = %d\n", port_num);
goto err1;
}
port = &rxe->port;
if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) {
- pr_debug("GSI QP exists for port %d\n", port_num);
+ rxe_dbg(rxe, "GSI QP exists for port %d\n", port_num);
goto err1;
}
}
@@ -172,10 +172,6 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
spin_lock_init(&qp->state_lock);
- spin_lock_init(&qp->req.task.state_lock);
- spin_lock_init(&qp->resp.task.state_lock);
- spin_lock_init(&qp->comp.task.state_lock);
-
spin_lock_init(&qp->sq.sq_lock);
spin_lock_init(&qp->rq.producer_lock);
spin_lock_init(&qp->rq.consumer_lock);
@@ -242,10 +238,8 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
skb_queue_head_init(&qp->req_pkts);
- rxe_init_task(&qp->req.task, qp,
- rxe_requester, "req");
- rxe_init_task(&qp->comp.task, qp,
- rxe_completer, "comp");
+ rxe_init_task(&qp->req.task, qp, rxe_requester);
+ rxe_init_task(&qp->comp.task, qp, rxe_completer);
qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */
if (init->qp_type == IB_QPT_RC) {
@@ -270,9 +264,6 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
wqe_size = rcv_wqe_size(qp->rq.max_sge);
- pr_debug("qp#%d max_wr = %d, max_sge = %d, wqe_size = %d\n",
- qp_num(qp), qp->rq.max_wr, qp->rq.max_sge, wqe_size);
-
type = QUEUE_TYPE_FROM_CLIENT;
qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr,
wqe_size, type);
@@ -292,8 +283,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
skb_queue_head_init(&qp->resp_pkts);
- rxe_init_task(&qp->resp.task, qp,
- rxe_responder, "resp");
+ rxe_init_task(&qp->resp.task, qp, rxe_responder);
qp->resp.opcode = OPCODE_NONE;
qp->resp.msn = 0;
@@ -402,7 +392,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask)) {
- pr_debug("invalid mask or state for qp\n");
+ rxe_dbg_qp(qp, "invalid mask or state\n");
goto err1;
}
@@ -416,7 +406,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
if (mask & IB_QP_PORT) {
if (!rdma_is_port_valid(&rxe->ib_dev, attr->port_num)) {
- pr_debug("invalid port %d\n", attr->port_num);
+ rxe_dbg_qp(qp, "invalid port %d\n", attr->port_num);
goto err1;
}
}
@@ -424,18 +414,18 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
if (mask & IB_QP_CAP && rxe_qp_chk_cap(rxe, &attr->cap, !!qp->srq))
goto err1;
- if (mask & IB_QP_AV && rxe_av_chk_attr(rxe, &attr->ah_attr))
+ if (mask & IB_QP_AV && rxe_av_chk_attr(qp, &attr->ah_attr))
goto err1;
if (mask & IB_QP_ALT_PATH) {
- if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr))
+ if (rxe_av_chk_attr(qp, &attr->alt_ah_attr))
goto err1;
if (!rdma_is_port_valid(&rxe->ib_dev, attr->alt_port_num)) {
- pr_debug("invalid alt port %d\n", attr->alt_port_num);
+ rxe_dbg_qp(qp, "invalid alt port %d\n", attr->alt_port_num);
goto err1;
}
if (attr->alt_timeout > 31) {
- pr_debug("invalid QP alt timeout %d > 31\n",
+ rxe_dbg_qp(qp, "invalid alt timeout %d > 31\n",
attr->alt_timeout);
goto err1;
}
@@ -448,7 +438,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
enum ib_mtu mtu = attr->path_mtu;
if (mtu > max_mtu) {
- pr_debug("invalid mtu (%d) > (%d)\n",
+ rxe_dbg_qp(qp, "invalid mtu (%d) > (%d)\n",
ib_mtu_enum_to_int(mtu),
ib_mtu_enum_to_int(max_mtu));
goto err1;
@@ -457,7 +447,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
if (mask & IB_QP_MAX_QP_RD_ATOMIC) {
if (attr->max_rd_atomic > rxe->attr.max_qp_rd_atom) {
- pr_debug("invalid max_rd_atomic %d > %d\n",
+ rxe_dbg_qp(qp, "invalid max_rd_atomic %d > %d\n",
attr->max_rd_atomic,
rxe->attr.max_qp_rd_atom);
goto err1;
@@ -466,7 +456,8 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
if (mask & IB_QP_TIMEOUT) {
if (attr->timeout > 31) {
- pr_debug("invalid QP timeout %d > 31\n", attr->timeout);
+ rxe_dbg_qp(qp, "invalid timeout %d > 31\n",
+ attr->timeout);
goto err1;
}
}
@@ -543,10 +534,10 @@ static void rxe_qp_drain(struct rxe_qp *qp)
if (qp->req.state != QP_STATE_DRAINED) {
qp->req.state = QP_STATE_DRAIN;
if (qp_type(qp) == IB_QPT_RC)
- rxe_run_task(&qp->comp.task, 1);
+ rxe_sched_task(&qp->comp.task);
else
__rxe_do_task(&qp->comp.task);
- rxe_run_task(&qp->req.task, 1);
+ rxe_sched_task(&qp->req.task);
}
}
}
@@ -560,13 +551,13 @@ void rxe_qp_error(struct rxe_qp *qp)
qp->attr.qp_state = IB_QPS_ERR;
/* drain work and packet queues */
- rxe_run_task(&qp->resp.task, 1);
+ rxe_sched_task(&qp->resp.task);
if (qp_type(qp) == IB_QPT_RC)
- rxe_run_task(&qp->comp.task, 1);
+ rxe_sched_task(&qp->comp.task);
else
__rxe_do_task(&qp->comp.task);
- rxe_run_task(&qp->req.task, 1);
+ rxe_sched_task(&qp->req.task);
}
/* called by the modify qp verb */
@@ -644,27 +635,24 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
if (mask & IB_QP_RETRY_CNT) {
qp->attr.retry_cnt = attr->retry_cnt;
qp->comp.retry_cnt = attr->retry_cnt;
- pr_debug("qp#%d set retry count = %d\n", qp_num(qp),
- attr->retry_cnt);
+ rxe_dbg_qp(qp, "set retry count = %d\n", attr->retry_cnt);
}
if (mask & IB_QP_RNR_RETRY) {
qp->attr.rnr_retry = attr->rnr_retry;
qp->comp.rnr_retry = attr->rnr_retry;
- pr_debug("qp#%d set rnr retry count = %d\n", qp_num(qp),
- attr->rnr_retry);
+ rxe_dbg_qp(qp, "set rnr retry count = %d\n", attr->rnr_retry);
}
if (mask & IB_QP_RQ_PSN) {
qp->attr.rq_psn = (attr->rq_psn & BTH_PSN_MASK);
qp->resp.psn = qp->attr.rq_psn;
- pr_debug("qp#%d set resp psn = 0x%x\n", qp_num(qp),
- qp->resp.psn);
+ rxe_dbg_qp(qp, "set resp psn = 0x%x\n", qp->resp.psn);
}
if (mask & IB_QP_MIN_RNR_TIMER) {
qp->attr.min_rnr_timer = attr->min_rnr_timer;
- pr_debug("qp#%d set min rnr timer = 0x%x\n", qp_num(qp),
+ rxe_dbg_qp(qp, "set min rnr timer = 0x%x\n",
attr->min_rnr_timer);
}
@@ -672,7 +660,7 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
qp->attr.sq_psn = (attr->sq_psn & BTH_PSN_MASK);
qp->req.psn = qp->attr.sq_psn;
qp->comp.psn = qp->attr.sq_psn;
- pr_debug("qp#%d set req psn = 0x%x\n", qp_num(qp), qp->req.psn);
+ rxe_dbg_qp(qp, "set req psn = 0x%x\n", qp->req.psn);
}
if (mask & IB_QP_PATH_MIG_STATE)
@@ -686,40 +674,40 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
switch (attr->qp_state) {
case IB_QPS_RESET:
- pr_debug("qp#%d state -> RESET\n", qp_num(qp));
+ rxe_dbg_qp(qp, "state -> RESET\n");
rxe_qp_reset(qp);
break;
case IB_QPS_INIT:
- pr_debug("qp#%d state -> INIT\n", qp_num(qp));
+ rxe_dbg_qp(qp, "state -> INIT\n");
qp->req.state = QP_STATE_INIT;
qp->resp.state = QP_STATE_INIT;
qp->comp.state = QP_STATE_INIT;
break;
case IB_QPS_RTR:
- pr_debug("qp#%d state -> RTR\n", qp_num(qp));
+ rxe_dbg_qp(qp, "state -> RTR\n");
qp->resp.state = QP_STATE_READY;
break;
case IB_QPS_RTS:
- pr_debug("qp#%d state -> RTS\n", qp_num(qp));
+ rxe_dbg_qp(qp, "state -> RTS\n");
qp->req.state = QP_STATE_READY;
qp->comp.state = QP_STATE_READY;
break;
case IB_QPS_SQD:
- pr_debug("qp#%d state -> SQD\n", qp_num(qp));
+ rxe_dbg_qp(qp, "state -> SQD\n");
rxe_qp_drain(qp);
break;
case IB_QPS_SQE:
- pr_warn("qp#%d state -> SQE !!?\n", qp_num(qp));
+ rxe_dbg_qp(qp, "state -> SQE !!?\n");
/* Not possible from modify_qp. */
break;
case IB_QPS_ERR:
- pr_debug("qp#%d state -> ERR\n", qp_num(qp));
+ rxe_dbg_qp(qp, "state -> ERR\n");
rxe_qp_error(qp);
break;
}
@@ -759,7 +747,7 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
attr->sq_draining = 0;
}
- pr_debug("attr->sq_draining = %d\n", attr->sq_draining);
+ rxe_dbg_qp(qp, "attr->sq_draining = %d\n", attr->sq_draining);
return 0;
}
@@ -771,7 +759,7 @@ int rxe_qp_chk_destroy(struct rxe_qp *qp)
* will fail immediately.
*/
if (atomic_read(&qp->mcg_num)) {
- pr_debug("Attempt to destroy QP while attached to multicast group\n");
+ rxe_dbg_qp(qp, "Attempt to destroy while attached to multicast group\n");
return -EBUSY;
}
@@ -829,12 +817,12 @@ static void rxe_qp_do_cleanup(struct work_struct *work)
if (qp->resp.mr)
rxe_put(qp->resp.mr);
- if (qp_type(qp) == IB_QPT_RC)
- sk_dst_reset(qp->sk->sk);
-
free_rd_atomic_resources(qp);
if (qp->sk) {
+ if (qp_type(qp) == IB_QPT_RC)
+ sk_dst_reset(qp->sk->sk);
+
kernel_sock_shutdown(qp->sk, SHUT_RDWR);
sock_release(qp->sk);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index f637712079705f..899c8779f8001c 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -100,12 +100,12 @@ void rnr_nak_timer(struct timer_list *t)
{
struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer);
- pr_debug("%s: fired for qp#%d\n", __func__, qp_num(qp));
+ rxe_dbg_qp(qp, "nak timer fired\n");
/* request a send queue retry */
qp->req.need_retry = 1;
qp->req.wait_for_rnr_timer = 0;
- rxe_run_task(&qp->req.task, 1);
+ rxe_sched_task(&qp->req.task);
}
static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
@@ -241,6 +241,9 @@ static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits)
IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE :
IB_OPCODE_RC_SEND_FIRST;
+ case IB_WR_FLUSH:
+ return IB_OPCODE_RC_FLUSH;
+
case IB_WR_RDMA_READ:
return IB_OPCODE_RC_RDMA_READ_REQUEST;
@@ -258,6 +261,10 @@ static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits)
else
return fits ? IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE :
IB_OPCODE_RC_SEND_FIRST;
+
+ case IB_WR_ATOMIC_WRITE:
+ return IB_OPCODE_RC_ATOMIC_WRITE;
+
case IB_WR_REG_MR:
case IB_WR_LOCAL_INV:
return opcode;
@@ -421,11 +428,18 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
/* init optional headers */
if (pkt->mask & RXE_RETH_MASK) {
- reth_set_rkey(pkt, ibwr->wr.rdma.rkey);
+ if (pkt->mask & RXE_FETH_MASK)
+ reth_set_rkey(pkt, ibwr->wr.flush.rkey);
+ else
+ reth_set_rkey(pkt, ibwr->wr.rdma.rkey);
reth_set_va(pkt, wqe->iova);
reth_set_len(pkt, wqe->dma.resid);
}
+ /* Fill Flush Extension Transport Header */
+ if (pkt->mask & RXE_FETH_MASK)
+ feth_init(pkt, ibwr->wr.flush.type, ibwr->wr.flush.level);
+
if (pkt->mask & RXE_IMMDT_MASK)
immdt_set_imm(pkt, ibwr->ex.imm_data);
@@ -484,6 +498,14 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_av *av,
memset(pad, 0, bth_pad(pkt));
}
+ } else if (pkt->mask & RXE_FLUSH_MASK) {
+ /* oA19-2: shall have no payload. */
+ wqe->dma.resid = 0;
+ }
+
+ if (pkt->mask & RXE_ATOMIC_WRITE_MASK) {
+ memcpy(payload_addr(pkt), wqe->dma.atomic_wr, payload);
+ wqe->dma.resid -= payload;
}
return 0;
@@ -595,7 +617,7 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
}
break;
default:
- pr_err("Unexpected send wqe opcode %d\n", opcode);
+ rxe_dbg_qp(qp, "Unexpected send wqe opcode %d\n", opcode);
wqe->status = IB_WC_LOC_QP_OP_ERR;
return -EINVAL;
}
@@ -608,7 +630,7 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
* which can lead to a deadlock. So go ahead and complete
* it now.
*/
- rxe_run_task(&qp->comp.task, 1);
+ rxe_sched_task(&qp->comp.task);
return 0;
}
@@ -709,13 +731,15 @@ int rxe_requester(void *arg)
}
mask = rxe_opcode[opcode].mask;
- if (unlikely(mask & RXE_READ_OR_ATOMIC_MASK)) {
+ if (unlikely(mask & (RXE_READ_OR_ATOMIC_MASK |
+ RXE_ATOMIC_WRITE_MASK))) {
if (check_init_depth(qp, wqe))
goto exit;
}
mtu = get_mtu(qp);
- payload = (mask & RXE_WRITE_OR_SEND_MASK) ? wqe->dma.resid : 0;
+ payload = (mask & (RXE_WRITE_OR_SEND_MASK | RXE_ATOMIC_WRITE_MASK)) ?
+ wqe->dma.resid : 0;
if (payload > mtu) {
if (qp_type(qp) == IB_QPT_UD) {
/* C10-93.1.1: If the total sum of all the buffer lengths specified for a
@@ -733,7 +757,7 @@ int rxe_requester(void *arg)
qp->req.wqe_index);
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
- rxe_run_task(&qp->comp.task, 0);
+ rxe_run_task(&qp->comp.task);
goto done;
}
payload = mtu;
@@ -748,14 +772,14 @@ int rxe_requester(void *arg)
av = rxe_get_av(&pkt, &ah);
if (unlikely(!av)) {
- pr_err("qp#%d Failed no address vector\n", qp_num(qp));
+ rxe_dbg_qp(qp, "Failed no address vector\n");
wqe->status = IB_WC_LOC_QP_OP_ERR;
goto err;
}
skb = init_req_packet(qp, av, wqe, opcode, payload, &pkt);
if (unlikely(!skb)) {
- pr_err("qp#%d Failed allocating skb\n", qp_num(qp));
+ rxe_dbg_qp(qp, "Failed allocating skb\n");
wqe->status = IB_WC_LOC_QP_OP_ERR;
if (ah)
rxe_put(ah);
@@ -764,7 +788,7 @@ int rxe_requester(void *arg)
err = finish_packet(qp, av, wqe, &pkt, skb, payload);
if (unlikely(err)) {
- pr_debug("qp#%d Error during finish packet\n", qp_num(qp));
+ rxe_dbg_qp(qp, "Error during finish packet\n");
if (err == -EFAULT)
wqe->status = IB_WC_LOC_PROT_ERR;
else
@@ -795,7 +819,7 @@ int rxe_requester(void *arg)
rollback_state(wqe, qp, &rollback_wqe, rollback_psn);
if (err == -EAGAIN) {
- rxe_run_task(&qp->req.task, 1);
+ rxe_sched_task(&qp->req.task);
goto exit;
}
@@ -817,7 +841,7 @@ err:
qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index);
wqe->state = wqe_state_error;
qp->req.state = QP_STATE_ERROR;
- rxe_run_task(&qp->comp.task, 0);
+ rxe_run_task(&qp->comp.task);
exit:
ret = -EAGAIN;
out:
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 693081e813ec08..7a60c7709da045 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -22,6 +22,8 @@ enum resp_states {
RESPST_EXECUTE,
RESPST_READ_REPLY,
RESPST_ATOMIC_REPLY,
+ RESPST_ATOMIC_WRITE_REPLY,
+ RESPST_PROCESS_FLUSH,
RESPST_COMPLETE,
RESPST_ACKNOWLEDGE,
RESPST_CLEANUP,
@@ -57,6 +59,8 @@ static char *resp_state_name[] = {
[RESPST_EXECUTE] = "EXECUTE",
[RESPST_READ_REPLY] = "READ_REPLY",
[RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY",
+ [RESPST_ATOMIC_WRITE_REPLY] = "ATOMIC_WRITE_REPLY",
+ [RESPST_PROCESS_FLUSH] = "PROCESS_FLUSH",
[RESPST_COMPLETE] = "COMPLETE",
[RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE",
[RESPST_CLEANUP] = "CLEANUP",
@@ -91,7 +95,10 @@ void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) ||
(skb_queue_len(&qp->req_pkts) > 1);
- rxe_run_task(&qp->resp.task, must_sched);
+ if (must_sched)
+ rxe_sched_task(&qp->resp.task);
+ else
+ rxe_run_task(&qp->resp.task);
}
static inline enum resp_states get_req(struct rxe_qp *qp,
@@ -253,19 +260,37 @@ static enum resp_states check_op_seq(struct rxe_qp *qp,
}
}
+static bool check_qp_attr_access(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ if (((pkt->mask & RXE_READ_MASK) &&
+ !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
+ ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) &&
+ !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||
+ ((pkt->mask & RXE_ATOMIC_MASK) &&
+ !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
+ return false;
+
+ if (pkt->mask & RXE_FLUSH_MASK) {
+ u32 flush_type = feth_plt(pkt);
+
+ if ((flush_type & IB_FLUSH_GLOBAL &&
+ !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_GLOBAL)) ||
+ (flush_type & IB_FLUSH_PERSISTENT &&
+ !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_PERSISTENT)))
+ return false;
+ }
+
+ return true;
+}
+
static enum resp_states check_op_valid(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
switch (qp_type(qp)) {
case IB_QPT_RC:
- if (((pkt->mask & RXE_READ_MASK) &&
- !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
- ((pkt->mask & RXE_WRITE_MASK) &&
- !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||
- ((pkt->mask & RXE_ATOMIC_MASK) &&
- !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) {
+ if (!check_qp_attr_access(qp, pkt))
return RESPST_ERR_UNSUPPORTED_OPCODE;
- }
break;
@@ -314,7 +339,7 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp)
/* don't trust user space data */
if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) {
spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
- pr_warn("%s: invalid num_sge in SRQ entry\n", __func__);
+ rxe_dbg_qp(qp, "invalid num_sge in SRQ entry\n");
return RESPST_ERR_MALFORMED_WQE;
}
size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge);
@@ -364,7 +389,7 @@ static enum resp_states check_resource(struct rxe_qp *qp,
}
}
- if (pkt->mask & RXE_READ_OR_ATOMIC_MASK) {
+ if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) {
/* it is the requesters job to not send
* too many read/atomic ops, we just
* recycle the responder resource queue
@@ -387,19 +412,66 @@ static enum resp_states check_resource(struct rxe_qp *qp,
return RESPST_CHK_LENGTH;
}
-static enum resp_states check_length(struct rxe_qp *qp,
- struct rxe_pkt_info *pkt)
+static enum resp_states rxe_resp_check_length(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
{
- switch (qp_type(qp)) {
- case IB_QPT_RC:
- return RESPST_CHK_RKEY;
-
- case IB_QPT_UC:
- return RESPST_CHK_RKEY;
+ /*
+ * See IBA C9-92
+ * For UD QPs we only check if the packet will fit in the
+ * receive buffer later. For rmda operations additional
+ * length checks are performed in check_rkey.
+ */
+ if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) ||
+ (qp_type(qp) == IB_QPT_UC))) {
+ unsigned int mtu = qp->mtu;
+ unsigned int payload = payload_size(pkt);
+
+ if ((pkt->mask & RXE_START_MASK) &&
+ (pkt->mask & RXE_END_MASK)) {
+ if (unlikely(payload > mtu)) {
+ rxe_dbg_qp(qp, "only packet too long");
+ return RESPST_ERR_LENGTH;
+ }
+ } else if ((pkt->mask & RXE_START_MASK) ||
+ (pkt->mask & RXE_MIDDLE_MASK)) {
+ if (unlikely(payload != mtu)) {
+ rxe_dbg_qp(qp, "first or middle packet not mtu");
+ return RESPST_ERR_LENGTH;
+ }
+ } else if (pkt->mask & RXE_END_MASK) {
+ if (unlikely((payload == 0) || (payload > mtu))) {
+ rxe_dbg_qp(qp, "last packet zero or too long");
+ return RESPST_ERR_LENGTH;
+ }
+ }
+ }
- default:
- return RESPST_CHK_RKEY;
+ /* See IBA C9-94 */
+ if (pkt->mask & RXE_RETH_MASK) {
+ if (reth_len(pkt) > (1U << 31)) {
+ rxe_dbg_qp(qp, "dma length too long");
+ return RESPST_ERR_LENGTH;
+ }
}
+
+ return RESPST_CHK_RKEY;
+}
+
+static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
+{
+ qp->resp.va = reth_va(pkt);
+ qp->resp.offset = 0;
+ qp->resp.rkey = reth_rkey(pkt);
+ qp->resp.resid = reth_len(pkt);
+ qp->resp.length = reth_len(pkt);
+}
+
+static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
+{
+ qp->resp.va = atmeth_va(pkt);
+ qp->resp.offset = 0;
+ qp->resp.rkey = atmeth_rkey(pkt);
+ qp->resp.resid = sizeof(u64);
}
static enum resp_states check_rkey(struct rxe_qp *qp,
@@ -413,29 +485,32 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
u32 pktlen;
int mtu = qp->mtu;
enum resp_states state;
- int access;
-
- if (pkt->mask & RXE_READ_OR_WRITE_MASK) {
- if (pkt->mask & RXE_RETH_MASK) {
- qp->resp.va = reth_va(pkt);
- qp->resp.offset = 0;
- qp->resp.rkey = reth_rkey(pkt);
- qp->resp.resid = reth_len(pkt);
- qp->resp.length = reth_len(pkt);
- }
+ int access = 0;
+
+ if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
+ if (pkt->mask & RXE_RETH_MASK)
+ qp_resp_from_reth(qp, pkt);
+
access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ
: IB_ACCESS_REMOTE_WRITE;
+ } else if (pkt->mask & RXE_FLUSH_MASK) {
+ u32 flush_type = feth_plt(pkt);
+
+ if (pkt->mask & RXE_RETH_MASK)
+ qp_resp_from_reth(qp, pkt);
+
+ if (flush_type & IB_FLUSH_GLOBAL)
+ access |= IB_ACCESS_FLUSH_GLOBAL;
+ if (flush_type & IB_FLUSH_PERSISTENT)
+ access |= IB_ACCESS_FLUSH_PERSISTENT;
} else if (pkt->mask & RXE_ATOMIC_MASK) {
- qp->resp.va = atmeth_va(pkt);
- qp->resp.offset = 0;
- qp->resp.rkey = atmeth_rkey(pkt);
- qp->resp.resid = sizeof(u64);
+ qp_resp_from_atmeth(qp, pkt);
access = IB_ACCESS_REMOTE_ATOMIC;
} else {
return RESPST_EXECUTE;
}
- /* A zero-byte op is not required to set an addr or rkey. */
+ /* A zero-byte op is not required to set an addr or rkey. See C9-88 */
if ((pkt->mask & RXE_READ_OR_WRITE_MASK) &&
(pkt->mask & RXE_RETH_MASK) &&
reth_len(pkt) == 0) {
@@ -450,15 +525,14 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
if (rkey_is_mw(rkey)) {
mw = rxe_lookup_mw(qp, access, rkey);
if (!mw) {
- pr_debug("%s: no MW matches rkey %#x\n",
- __func__, rkey);
+ rxe_dbg_qp(qp, "no MW matches rkey %#x\n", rkey);
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
mr = mw->mr;
if (!mr) {
- pr_err("%s: MW doesn't have an MR\n", __func__);
+ rxe_dbg_qp(qp, "MW doesn't have an MR\n");
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
@@ -471,19 +545,27 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
} else {
mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE);
if (!mr) {
- pr_debug("%s: no MR matches rkey %#x\n",
- __func__, rkey);
+ rxe_dbg_qp(qp, "no MR matches rkey %#x\n", rkey);
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
}
+ if (pkt->mask & RXE_FLUSH_MASK) {
+ /* FLUSH MR may not set va or resid
+ * no need to check range since we will flush whole mr
+ */
+ if (feth_sel(pkt) == IB_FLUSH_MR)
+ goto skip_check_range;
+ }
+
if (mr_check_range(mr, va + qp->resp.offset, resid)) {
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
- if (pkt->mask & RXE_WRITE_MASK) {
+skip_check_range:
+ if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
if (resid > mtu) {
if (pktlen != mtu || bth_pad(pkt)) {
state = RESPST_ERR_LENGTH;
@@ -583,15 +665,66 @@ static struct resp_res *rxe_prepare_res(struct rxe_qp *qp,
res->state = rdatm_res_state_new;
break;
case RXE_ATOMIC_MASK:
+ case RXE_ATOMIC_WRITE_MASK:
res->first_psn = pkt->psn;
res->last_psn = pkt->psn;
res->cur_psn = pkt->psn;
break;
+ case RXE_FLUSH_MASK:
+ res->flush.va = qp->resp.va + qp->resp.offset;
+ res->flush.length = qp->resp.length;
+ res->flush.type = feth_plt(pkt);
+ res->flush.level = feth_sel(pkt);
}
return res;
}
+static enum resp_states process_flush(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ u64 length, start;
+ struct rxe_mr *mr = qp->resp.mr;
+ struct resp_res *res = qp->resp.res;
+
+ /* oA19-14, oA19-15 */
+ if (res && res->replay)
+ return RESPST_ACKNOWLEDGE;
+ else if (!res) {
+ res = rxe_prepare_res(qp, pkt, RXE_FLUSH_MASK);
+ qp->resp.res = res;
+ }
+
+ if (res->flush.level == IB_FLUSH_RANGE) {
+ start = res->flush.va;
+ length = res->flush.length;
+ } else { /* level == IB_FLUSH_MR */
+ start = mr->ibmr.iova;
+ length = mr->ibmr.length;
+ }
+
+ if (res->flush.type & IB_FLUSH_PERSISTENT) {
+ if (rxe_flush_pmem_iova(mr, start, length))
+ return RESPST_ERR_RKEY_VIOLATION;
+ /* Make data persistent. */
+ wmb();
+ } else if (res->flush.type & IB_FLUSH_GLOBAL) {
+ /* Make data global visibility. */
+ wmb();
+ }
+
+ qp->resp.msn++;
+
+ /* next expected psn, read handles this separately */
+ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+ qp->resp.ack_psn = qp->resp.psn;
+
+ qp->resp.opcode = pkt->opcode;
+ qp->resp.status = IB_WC_SUCCESS;
+
+ return RESPST_ACKNOWLEDGE;
+}
+
/* Guarantee atomicity of atomic operations at the machine level. */
static DEFINE_SPINLOCK(atomic_ops_lock);
@@ -652,6 +785,55 @@ out:
return ret;
}
+static enum resp_states atomic_write_reply(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ u64 src, *dst;
+ struct resp_res *res = qp->resp.res;
+ struct rxe_mr *mr = qp->resp.mr;
+ int payload = payload_size(pkt);
+
+ if (!res) {
+ res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK);
+ qp->resp.res = res;
+ }
+
+ if (!res->replay) {
+#ifdef CONFIG_64BIT
+ if (mr->state != RXE_MR_STATE_VALID)
+ return RESPST_ERR_RKEY_VIOLATION;
+
+ memcpy(&src, payload_addr(pkt), payload);
+
+ dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload);
+ /* check vaddr is 8 bytes aligned. */
+ if (!dst || (uintptr_t)dst & 7)
+ return RESPST_ERR_MISALIGNED_ATOMIC;
+
+ /* Do atomic write after all prior operations have completed */
+ smp_store_release(dst, src);
+
+ /* decrease resp.resid to zero */
+ qp->resp.resid -= sizeof(payload);
+
+ qp->resp.msn++;
+
+ /* next expected psn, read handles this separately */
+ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+ qp->resp.ack_psn = qp->resp.psn;
+
+ qp->resp.opcode = pkt->opcode;
+ qp->resp.status = IB_WC_SUCCESS;
+
+ return RESPST_ACKNOWLEDGE;
+#else
+ return RESPST_ERR_UNSUPPORTED_OPCODE;
+#endif /* CONFIG_64BIT */
+ }
+
+ return RESPST_ACKNOWLEDGE;
+}
+
static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
struct rxe_pkt_info *ack,
int opcode,
@@ -807,14 +989,19 @@ static enum resp_states read_reply(struct rxe_qp *qp,
skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload,
res->cur_psn, AETH_ACK_UNLIMITED);
if (!skb) {
- rxe_put(mr);
+ if (mr)
+ rxe_put(mr);
return RESPST_ERR_RNR;
}
- rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
- payload, RXE_FROM_MR_OBJ);
+ err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
+ payload, RXE_FROM_MR_OBJ);
if (mr)
rxe_put(mr);
+ if (err) {
+ kfree_skb(skb);
+ return RESPST_ERR_RKEY_VIOLATION;
+ }
if (bth_pad(&ack_pkt)) {
u8 *pad = payload_addr(&ack_pkt) + payload;
@@ -890,6 +1077,10 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
return RESPST_READ_REPLY;
} else if (pkt->mask & RXE_ATOMIC_MASK) {
return RESPST_ATOMIC_REPLY;
+ } else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) {
+ return RESPST_ATOMIC_WRITE_REPLY;
+ } else if (pkt->mask & RXE_FLUSH_MASK) {
+ return RESPST_PROCESS_FLUSH;
} else {
/* Unreachable */
WARN_ON_ONCE(1);
@@ -1040,7 +1231,7 @@ static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn,
err = rxe_xmit_packet(qp, &ack_pkt, skb);
if (err)
- pr_err_ratelimited("Failed sending %s\n", msg);
+ rxe_dbg_qp(qp, "Failed sending %s\n", msg);
return err;
}
@@ -1063,6 +1254,19 @@ static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
return ret;
}
+static int send_read_response_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
+{
+ int ret = send_common_ack(qp, syndrome, psn,
+ IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY,
+ "RDMA READ response of length zero ACK");
+
+ /* have to clear this since it is used to trigger
+ * long read replies
+ */
+ qp->resp.res = NULL;
+ return ret;
+}
+
static enum resp_states acknowledge(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
@@ -1073,6 +1277,8 @@ static enum resp_states acknowledge(struct rxe_qp *qp,
send_ack(qp, qp->resp.aeth_syndrome, pkt->psn);
else if (pkt->mask & RXE_ATOMIC_MASK)
send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
+ else if (pkt->mask & (RXE_FLUSH_MASK | RXE_ATOMIC_WRITE_MASK))
+ send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
else if (bth_ack(pkt))
send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
@@ -1129,6 +1335,22 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
/* SEND. Ack again and cleanup. C9-105. */
send_ack(qp, AETH_ACK_UNLIMITED, prev_psn);
return RESPST_CLEANUP;
+ } else if (pkt->mask & RXE_FLUSH_MASK) {
+ struct resp_res *res;
+
+ /* Find the operation in our list of responder resources. */
+ res = find_resource(qp, pkt->psn);
+ if (res) {
+ res->replay = 1;
+ res->cur_psn = pkt->psn;
+ qp->resp.res = res;
+ rc = RESPST_PROCESS_FLUSH;
+ goto out;
+ }
+
+ /* Resource not found. Class D error. Drop the request. */
+ rc = RESPST_CLEANUP;
+ goto out;
} else if (pkt->mask & RXE_READ_MASK) {
struct resp_res *res;
@@ -1184,7 +1406,9 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
res->replay = 1;
res->cur_psn = pkt->psn;
qp->resp.res = res;
- rc = RESPST_ATOMIC_REPLY;
+ rc = pkt->mask & RXE_ATOMIC_MASK ?
+ RESPST_ATOMIC_REPLY :
+ RESPST_ATOMIC_WRITE_REPLY;
goto out;
}
@@ -1286,8 +1510,7 @@ int rxe_responder(void *arg)
}
while (1) {
- pr_debug("qp#%d state = %s\n", qp_num(qp),
- resp_state_name[state]);
+ rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]);
switch (state) {
case RESPST_GET_REQ:
state = get_req(qp, &pkt);
@@ -1305,7 +1528,7 @@ int rxe_responder(void *arg)
state = check_resource(qp, pkt);
break;
case RESPST_CHK_LENGTH:
- state = check_length(qp, pkt);
+ state = rxe_resp_check_length(qp, pkt);
break;
case RESPST_CHK_RKEY:
state = check_rkey(qp, pkt);
@@ -1322,6 +1545,12 @@ int rxe_responder(void *arg)
case RESPST_ATOMIC_REPLY:
state = atomic_reply(qp, pkt);
break;
+ case RESPST_ATOMIC_WRITE_REPLY:
+ state = atomic_write_reply(qp, pkt);
+ break;
+ case RESPST_PROCESS_FLUSH:
+ state = process_flush(qp, pkt);
+ break;
case RESPST_ACKNOWLEDGE:
state = acknowledge(qp, pkt);
break;
@@ -1444,7 +1673,7 @@ int rxe_responder(void *arg)
case RESPST_ERROR:
qp->resp.goto_error = 0;
- pr_debug("qp#%d moved to error state\n", qp_num(qp));
+ rxe_dbg_qp(qp, "moved to error state\n");
rxe_qp_error(qp);
goto exit;
diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c
index 02b39498c370d2..82e37a41ced40d 100644
--- a/drivers/infiniband/sw/rxe/rxe_srq.c
+++ b/drivers/infiniband/sw/rxe/rxe_srq.c
@@ -13,13 +13,13 @@ int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init)
struct ib_srq_attr *attr = &init->attr;
if (attr->max_wr > rxe->attr.max_srq_wr) {
- pr_warn("max_wr(%d) > max_srq_wr(%d)\n",
+ rxe_dbg(rxe, "max_wr(%d) > max_srq_wr(%d)\n",
attr->max_wr, rxe->attr.max_srq_wr);
goto err1;
}
if (attr->max_wr <= 0) {
- pr_warn("max_wr(%d) <= 0\n", attr->max_wr);
+ rxe_dbg(rxe, "max_wr(%d) <= 0\n", attr->max_wr);
goto err1;
}
@@ -27,7 +27,7 @@ int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init)
attr->max_wr = RXE_MIN_SRQ_WR;
if (attr->max_sge > rxe->attr.max_srq_sge) {
- pr_warn("max_sge(%d) > max_srq_sge(%d)\n",
+ rxe_dbg(rxe, "max_sge(%d) > max_srq_sge(%d)\n",
attr->max_sge, rxe->attr.max_srq_sge);
goto err1;
}
@@ -65,7 +65,7 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
type = QUEUE_TYPE_FROM_CLIENT;
q = rxe_queue_init(rxe, &srq->rq.max_wr, srq_wqe_size, type);
if (!q) {
- pr_warn("unable to allocate queue for srq\n");
+ rxe_dbg_srq(srq, "Unable to allocate queue\n");
return -ENOMEM;
}
@@ -94,24 +94,24 @@ int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask)
{
if (srq->error) {
- pr_warn("srq in error state\n");
+ rxe_dbg_srq(srq, "in error state\n");
goto err1;
}
if (mask & IB_SRQ_MAX_WR) {
if (attr->max_wr > rxe->attr.max_srq_wr) {
- pr_warn("max_wr(%d) > max_srq_wr(%d)\n",
+ rxe_dbg_srq(srq, "max_wr(%d) > max_srq_wr(%d)\n",
attr->max_wr, rxe->attr.max_srq_wr);
goto err1;
}
if (attr->max_wr <= 0) {
- pr_warn("max_wr(%d) <= 0\n", attr->max_wr);
+ rxe_dbg_srq(srq, "max_wr(%d) <= 0\n", attr->max_wr);
goto err1;
}
if (srq->limit && (attr->max_wr < srq->limit)) {
- pr_warn("max_wr (%d) < srq->limit (%d)\n",
+ rxe_dbg_srq(srq, "max_wr (%d) < srq->limit (%d)\n",
attr->max_wr, srq->limit);
goto err1;
}
@@ -122,13 +122,13 @@ int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
if (mask & IB_SRQ_LIMIT) {
if (attr->srq_limit > rxe->attr.max_srq_wr) {
- pr_warn("srq_limit(%d) > max_srq_wr(%d)\n",
+ rxe_dbg_srq(srq, "srq_limit(%d) > max_srq_wr(%d)\n",
attr->srq_limit, rxe->attr.max_srq_wr);
goto err1;
}
if (attr->srq_limit > srq->rq.queue->buf->index_mask) {
- pr_warn("srq_limit (%d) > cur limit(%d)\n",
+ rxe_dbg_srq(srq, "srq_limit (%d) > cur limit(%d)\n",
attr->srq_limit,
srq->rq.queue->buf->index_mask);
goto err1;
diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index ec2b7de1c49721..60b90e33a88496 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -4,10 +4,6 @@
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*/
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/hardirq.h>
-
#include "rxe.h"
int __rxe_do_task(struct rxe_task *task)
@@ -28,30 +24,31 @@ int __rxe_do_task(struct rxe_task *task)
* a second caller finds the task already running
* but looks just after the last call to func
*/
-void rxe_do_task(struct tasklet_struct *t)
+static void do_task(struct tasklet_struct *t)
{
int cont;
int ret;
struct rxe_task *task = from_tasklet(task, t, tasklet);
+ struct rxe_qp *qp = (struct rxe_qp *)task->arg;
unsigned int iterations = RXE_MAX_ITERATIONS;
- spin_lock_bh(&task->state_lock);
+ spin_lock_bh(&task->lock);
switch (task->state) {
case TASK_STATE_START:
task->state = TASK_STATE_BUSY;
- spin_unlock_bh(&task->state_lock);
+ spin_unlock_bh(&task->lock);
break;
case TASK_STATE_BUSY:
task->state = TASK_STATE_ARMED;
fallthrough;
case TASK_STATE_ARMED:
- spin_unlock_bh(&task->state_lock);
+ spin_unlock_bh(&task->lock);
return;
default:
- spin_unlock_bh(&task->state_lock);
- pr_warn("%s failed with bad state %d\n", __func__, task->state);
+ spin_unlock_bh(&task->lock);
+ rxe_dbg_qp(qp, "failed with bad state %d\n", task->state);
return;
}
@@ -59,7 +56,7 @@ void rxe_do_task(struct tasklet_struct *t)
cont = 0;
ret = task->func(task->arg);
- spin_lock_bh(&task->state_lock);
+ spin_lock_bh(&task->lock);
switch (task->state) {
case TASK_STATE_BUSY:
if (ret) {
@@ -85,27 +82,25 @@ void rxe_do_task(struct tasklet_struct *t)
break;
default:
- pr_warn("%s failed with bad state %d\n", __func__,
- task->state);
+ rxe_dbg_qp(qp, "failed with bad state %d\n",
+ task->state);
}
- spin_unlock_bh(&task->state_lock);
+ spin_unlock_bh(&task->lock);
} while (cont);
task->ret = ret;
}
-int rxe_init_task(struct rxe_task *task,
- void *arg, int (*func)(void *), char *name)
+int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *))
{
task->arg = arg;
task->func = func;
- snprintf(task->name, sizeof(task->name), "%s", name);
task->destroyed = false;
- tasklet_setup(&task->tasklet, rxe_do_task);
+ tasklet_setup(&task->tasklet, do_task);
task->state = TASK_STATE_START;
- spin_lock_init(&task->state_lock);
+ spin_lock_init(&task->lock);
return 0;
}
@@ -121,23 +116,28 @@ void rxe_cleanup_task(struct rxe_task *task)
task->destroyed = true;
do {
- spin_lock_bh(&task->state_lock);
+ spin_lock_bh(&task->lock);
idle = (task->state == TASK_STATE_START);
- spin_unlock_bh(&task->state_lock);
+ spin_unlock_bh(&task->lock);
} while (!idle);
tasklet_kill(&task->tasklet);
}
-void rxe_run_task(struct rxe_task *task, int sched)
+void rxe_run_task(struct rxe_task *task)
+{
+ if (task->destroyed)
+ return;
+
+ do_task(&task->tasklet);
+}
+
+void rxe_sched_task(struct rxe_task *task)
{
if (task->destroyed)
return;
- if (sched)
- tasklet_schedule(&task->tasklet);
- else
- rxe_do_task(&task->tasklet);
+ tasklet_schedule(&task->tasklet);
}
void rxe_disable_task(struct rxe_task *task)
diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h
index 7f612a1c68a7ba..7b88129702ac6d 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.h
+++ b/drivers/infiniband/sw/rxe/rxe_task.h
@@ -21,11 +21,10 @@ enum {
struct rxe_task {
struct tasklet_struct tasklet;
int state;
- spinlock_t state_lock; /* spinlock for task state */
+ spinlock_t lock;
void *arg;
int (*func)(void *arg);
int ret;
- char name[16];
bool destroyed;
};
@@ -34,8 +33,7 @@ struct rxe_task {
* arg => parameter to pass to fcn
* func => function to call until it returns != 0
*/
-int rxe_init_task(struct rxe_task *task,
- void *arg, int (*func)(void *), char *name);
+int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *));
/* cleanup task */
void rxe_cleanup_task(struct rxe_task *task);
@@ -46,18 +44,9 @@ void rxe_cleanup_task(struct rxe_task *task);
*/
int __rxe_do_task(struct rxe_task *task);
-/*
- * common function called by any of the main tasklets
- * If there is any chance that there is additional
- * work to do someone must reschedule the task before
- * leaving
- */
-void rxe_do_task(struct tasklet_struct *t);
+void rxe_run_task(struct rxe_task *task);
-/* run a task, else schedule it to run as a tasklet, The decision
- * to run or schedule tasklet is based on the parameter sched.
- */
-void rxe_run_task(struct rxe_task *task, int sched);
+void rxe_sched_task(struct rxe_task *task);
/* keep a task from scheduling */
void rxe_disable_task(struct rxe_task *task);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 88825edc7dce19..025b35bf014e2a 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -172,10 +172,6 @@ static int rxe_create_ah(struct ib_ah *ibah,
ah->is_user = false;
}
- err = rxe_av_chk_attr(rxe, init_attr->ah_attr);
- if (err)
- return err;
-
err = rxe_add_to_pool_ah(&rxe->ah_pool, ah,
init_attr->flags & RDMA_CREATE_AH_SLEEPABLE);
if (err)
@@ -184,6 +180,12 @@ static int rxe_create_ah(struct ib_ah *ibah,
/* create index > 0 */
ah->ah_num = ah->elem.index;
+ err = rxe_ah_chk_attr(ah, init_attr->ah_attr);
+ if (err) {
+ rxe_cleanup(ah);
+ return err;
+ }
+
if (uresp) {
/* only if new user provider */
err = copy_to_user(&uresp->ah_num, &ah->ah_num,
@@ -206,10 +208,9 @@ static int rxe_create_ah(struct ib_ah *ibah,
static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
{
int err;
- struct rxe_dev *rxe = to_rdev(ibah->device);
struct rxe_ah *ah = to_rah(ibah);
- err = rxe_av_chk_attr(rxe, attr);
+ err = rxe_ah_chk_attr(ah, attr);
if (err)
return err;
@@ -238,7 +239,6 @@ static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
{
- int err;
int i;
u32 length;
struct rxe_recv_wqe *recv_wqe;
@@ -246,15 +246,11 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
int full;
full = queue_full(rq->queue, QUEUE_TYPE_TO_DRIVER);
- if (unlikely(full)) {
- err = -ENOMEM;
- goto err1;
- }
+ if (unlikely(full))
+ return -ENOMEM;
- if (unlikely(num_sge > rq->max_sge)) {
- err = -EINVAL;
- goto err1;
- }
+ if (unlikely(num_sge > rq->max_sge))
+ return -EINVAL;
length = 0;
for (i = 0; i < num_sge; i++)
@@ -275,9 +271,6 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
queue_advance_producer(rq->queue, QUEUE_TYPE_TO_DRIVER);
return 0;
-
-err1:
- return err;
}
static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
@@ -343,10 +336,7 @@ static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
if (err)
return err;
- err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata);
- if (err)
- return err;
- return 0;
+ return rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata);
}
static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
@@ -453,11 +443,11 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
err = rxe_qp_chk_attr(rxe, qp, attr, mask);
if (err)
- goto err1;
+ return err;
err = rxe_qp_from_attr(qp, attr, mask, udata);
if (err)
- goto err1;
+ return err;
if ((mask & IB_QP_AV) && (attr->ah_attr.ah_flags & IB_AH_GRH))
qp->src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
@@ -465,9 +455,6 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
qp->attr.dest_qp_num);
return 0;
-
-err1:
- return err;
}
static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
@@ -501,24 +488,21 @@ static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
struct rxe_sq *sq = &qp->sq;
if (unlikely(num_sge > sq->max_sge))
- goto err1;
+ return -EINVAL;
if (unlikely(mask & WR_ATOMIC_MASK)) {
if (length < 8)
- goto err1;
+ return -EINVAL;
if (atomic_wr(ibwr)->remote_addr & 0x7)
- goto err1;
+ return -EINVAL;
}
if (unlikely((ibwr->send_flags & IB_SEND_INLINE) &&
(length > sq->max_inline)))
- goto err1;
+ return -EINVAL;
return 0;
-
-err1:
- return -EINVAL;
}
static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
@@ -695,9 +679,9 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr,
wr = next;
}
- rxe_run_task(&qp->req.task, 1);
+ rxe_sched_task(&qp->req.task);
if (unlikely(qp->req.state == QP_STATE_ERROR))
- rxe_run_task(&qp->comp.task, 1);
+ rxe_sched_task(&qp->comp.task);
return err;
}
@@ -719,7 +703,7 @@ static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
if (qp->is_user) {
/* Utilize process context to do protocol processing */
- rxe_run_task(&qp->req.task, 0);
+ rxe_run_task(&qp->req.task);
return 0;
} else
return rxe_post_send_kernel(qp, wr, bad_wr);
@@ -735,14 +719,12 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) {
*bad_wr = wr;
- err = -EINVAL;
- goto err1;
+ return -EINVAL;
}
if (unlikely(qp->srq)) {
*bad_wr = wr;
- err = -EINVAL;
- goto err1;
+ return -EINVAL;
}
spin_lock_irqsave(&rq->producer_lock, flags);
@@ -759,9 +741,8 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
spin_unlock_irqrestore(&rq->producer_lock, flags);
if (qp->resp.state == QP_STATE_ERROR)
- rxe_run_task(&qp->resp.task, 1);
+ rxe_sched_task(&qp->resp.task);
-err1:
return err;
}
@@ -826,16 +807,9 @@ static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
err = rxe_cq_chk_attr(rxe, cq, cqe, 0);
if (err)
- goto err1;
-
- err = rxe_cq_resize_queue(cq, cqe, uresp, udata);
- if (err)
- goto err1;
-
- return 0;
+ return err;
-err1:
- return err;
+ return rxe_cq_resize_queue(cq, cqe, uresp, udata);
}
static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
@@ -902,6 +876,7 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
rxe_get(pd);
mr->ibmr.pd = ibpd;
+ mr->ibmr.device = ibpd->device;
rxe_mr_init_dma(access, mr);
rxe_finalize(mr);
@@ -921,26 +896,23 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
struct rxe_mr *mr;
mr = rxe_alloc(&rxe->mr_pool);
- if (!mr) {
- err = -ENOMEM;
- goto err2;
- }
-
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
rxe_get(pd);
mr->ibmr.pd = ibpd;
+ mr->ibmr.device = ibpd->device;
err = rxe_mr_init_user(rxe, start, length, iova, access, mr);
if (err)
- goto err3;
+ goto err1;
rxe_finalize(mr);
return &mr->ibmr;
-err3:
+err1:
rxe_cleanup(mr);
-err2:
return ERR_PTR(err);
}
@@ -956,25 +928,23 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
return ERR_PTR(-EINVAL);
mr = rxe_alloc(&rxe->mr_pool);
- if (!mr) {
- err = -ENOMEM;
- goto err1;
- }
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
rxe_get(pd);
mr->ibmr.pd = ibpd;
+ mr->ibmr.device = ibpd->device;
err = rxe_mr_init_fast(max_num_sg, mr);
if (err)
- goto err2;
+ goto err1;
rxe_finalize(mr);
return &mr->ibmr;
-err2:
- rxe_cleanup(mr);
err1:
+ rxe_cleanup(mr);
return ERR_PTR(err);
}
@@ -1134,7 +1104,7 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
err = ib_register_device(dev, ibdev_name, NULL);
if (err)
- pr_warn("%s failed with error %d\n", __func__, err);
+ rxe_dbg(rxe, "failed with error %d\n", err);
/*
* Note that rxe may be invalid at this point if another thread
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 5f5cbfcb35695b..19ddfa89048035 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -165,6 +165,12 @@ struct resp_res {
u64 va;
u32 resid;
} read;
+ struct {
+ u32 length;
+ u64 va;
+ u8 type;
+ u8 level;
+ } flush;
};
};
@@ -304,7 +310,6 @@ struct rxe_mr {
u32 lkey;
u32 rkey;
enum rxe_mr_state state;
- enum ib_mr_type type;
u32 offset;
int access;
diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c
index d68e37859e73bf..403029de6b92d4 100644
--- a/drivers/infiniband/sw/siw/siw_cq.c
+++ b/drivers/infiniband/sw/siw/siw_cq.c
@@ -56,8 +56,6 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc)
if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) {
memset(wc, 0, sizeof(*wc));
wc->wr_id = cqe->id;
- wc->status = map_cqe_status[cqe->status].ib;
- wc->opcode = map_wc_opcode[cqe->opcode];
wc->byte_len = cqe->bytes;
/*
@@ -71,10 +69,32 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc)
wc->wc_flags = IB_WC_WITH_INVALIDATE;
}
wc->qp = cqe->base_qp;
+ wc->opcode = map_wc_opcode[cqe->opcode];
+ wc->status = map_cqe_status[cqe->status].ib;
siw_dbg_cq(cq,
"idx %u, type %d, flags %2x, id 0x%pK\n",
cq->cq_get % cq->num_cqe, cqe->opcode,
cqe->flags, (void *)(uintptr_t)cqe->id);
+ } else {
+ /*
+ * A malicious user may set invalid opcode or
+ * status in the user mmapped CQE array.
+ * Sanity check and correct values in that case
+ * to avoid out-of-bounds access to global arrays
+ * for opcode and status mapping.
+ */
+ u8 opcode = cqe->opcode;
+ u16 status = cqe->status;
+
+ if (opcode >= SIW_NUM_OPCODES) {
+ opcode = 0;
+ status = SIW_WC_GENERAL_ERR;
+ } else if (status >= SIW_NUM_WC_STATUS) {
+ status = SIW_WC_GENERAL_ERR;
+ }
+ wc->opcode = map_wc_opcode[opcode];
+ wc->status = map_cqe_status[status].ib;
+
}
WRITE_ONCE(cqe->flags, 0);
cq->cq_get++;
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 3e814cfb298cf8..906fde1a2a0de2 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -676,13 +676,45 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr,
static int siw_sq_flush_wr(struct siw_qp *qp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr)
{
- struct siw_sqe sqe = {};
int rv = 0;
while (wr) {
- sqe.id = wr->wr_id;
- sqe.opcode = wr->opcode;
- rv = siw_sqe_complete(qp, &sqe, 0, SIW_WC_WR_FLUSH_ERR);
+ struct siw_sqe sqe = {};
+
+ switch (wr->opcode) {
+ case IB_WR_RDMA_WRITE:
+ sqe.opcode = SIW_OP_WRITE;
+ break;
+ case IB_WR_RDMA_READ:
+ sqe.opcode = SIW_OP_READ;
+ break;
+ case IB_WR_RDMA_READ_WITH_INV:
+ sqe.opcode = SIW_OP_READ_LOCAL_INV;
+ break;
+ case IB_WR_SEND:
+ sqe.opcode = SIW_OP_SEND;
+ break;
+ case IB_WR_SEND_WITH_IMM:
+ sqe.opcode = SIW_OP_SEND_WITH_IMM;
+ break;
+ case IB_WR_SEND_WITH_INV:
+ sqe.opcode = SIW_OP_SEND_REMOTE_INV;
+ break;
+ case IB_WR_LOCAL_INV:
+ sqe.opcode = SIW_OP_INVAL_STAG;
+ break;
+ case IB_WR_REG_MR:
+ sqe.opcode = SIW_OP_REG_MR;
+ break;
+ default:
+ rv = -EINVAL;
+ break;
+ }
+ if (!rv) {
+ sqe.id = wr->wr_id;
+ rv = siw_sqe_complete(qp, &sqe, 0,
+ SIW_WC_WR_FLUSH_ERR);
+ }
if (rv) {
if (bad_wr)
*bad_wr = wr;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
index ea16ba5d8da6c8..9ad8d985627524 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
@@ -41,6 +41,11 @@ static const struct nla_policy ipoib_policy[IFLA_IPOIB_MAX + 1] = {
[IFLA_IPOIB_UMCAST] = { .type = NLA_U16 },
};
+static unsigned int ipoib_get_max_num_queues(void)
+{
+ return min_t(unsigned int, num_possible_cpus(), 128);
+}
+
static int ipoib_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
@@ -172,6 +177,8 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
.changelink = ipoib_changelink,
.get_size = ipoib_get_size,
.fill_info = ipoib_fill_info,
+ .get_num_rx_queues = ipoib_get_max_num_queues,
+ .get_num_tx_queues = ipoib_get_max_num_queues,
};
struct rtnl_link_ops *ipoib_get_link_ops(void)
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index a00ca117303a98..1b8eda0dae4e0c 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -347,22 +347,6 @@ static void iser_device_try_release(struct iser_device *device)
mutex_unlock(&ig.device_list_mutex);
}
-/*
- * Called with state mutex held
- */
-static int iser_conn_state_comp_exch(struct iser_conn *iser_conn,
- enum iser_conn_state comp,
- enum iser_conn_state exch)
-{
- int ret;
-
- ret = (iser_conn->state == comp);
- if (ret)
- iser_conn->state = exch;
-
- return ret;
-}
-
void iser_release_work(struct work_struct *work)
{
struct iser_conn *iser_conn;
@@ -464,11 +448,13 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
struct ib_conn *ib_conn = &iser_conn->ib_conn;
int err = 0;
+ lockdep_assert_held(&iser_conn->state_mutex);
+
/* terminate the iser conn only if the conn state is UP */
- if (!iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP,
- ISER_CONN_TERMINATING))
+ if (iser_conn->state != ISER_CONN_UP)
return 0;
+ iser_conn->state = ISER_CONN_TERMINATING;
iser_info("iser_conn %p state %d\n", iser_conn, iser_conn->state);
/* suspend queuing of new iscsi commands */
@@ -498,9 +484,10 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
*/
static void iser_connect_error(struct rdma_cm_id *cma_id)
{
- struct iser_conn *iser_conn;
+ struct iser_conn *iser_conn = cma_id->context;
+
+ lockdep_assert_held(&iser_conn->state_mutex);
- iser_conn = cma_id->context;
iser_conn->state = ISER_CONN_TERMINATING;
}
@@ -542,12 +529,13 @@ static void iser_calc_scsi_params(struct iser_conn *iser_conn,
*/
static void iser_addr_handler(struct rdma_cm_id *cma_id)
{
+ struct iser_conn *iser_conn = cma_id->context;
struct iser_device *device;
- struct iser_conn *iser_conn;
struct ib_conn *ib_conn;
int ret;
- iser_conn = cma_id->context;
+ lockdep_assert_held(&iser_conn->state_mutex);
+
if (iser_conn->state != ISER_CONN_PENDING)
/* bailout */
return;
@@ -597,6 +585,8 @@ static void iser_route_handler(struct rdma_cm_id *cma_id)
struct ib_conn *ib_conn = &iser_conn->ib_conn;
struct ib_device *ib_dev = ib_conn->device->ib_device;
+ lockdep_assert_held(&iser_conn->state_mutex);
+
if (iser_conn->state != ISER_CONN_PENDING)
/* bailout */
return;
@@ -629,14 +619,18 @@ failure:
iser_connect_error(cma_id);
}
+/*
+ * Called with state mutex held
+ */
static void iser_connected_handler(struct rdma_cm_id *cma_id,
const void *private_data)
{
- struct iser_conn *iser_conn;
+ struct iser_conn *iser_conn = cma_id->context;
struct ib_qp_attr attr;
struct ib_qp_init_attr init_attr;
- iser_conn = cma_id->context;
+ lockdep_assert_held(&iser_conn->state_mutex);
+
if (iser_conn->state != ISER_CONN_PENDING)
/* bailout */
return;
@@ -657,30 +651,27 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id,
complete(&iser_conn->up_completion);
}
-static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
-{
- struct iser_conn *iser_conn = cma_id->context;
-
- if (iser_conn_terminate(iser_conn)) {
- if (iser_conn->iscsi_conn)
- iscsi_conn_failure(iser_conn->iscsi_conn,
- ISCSI_ERR_CONN_FAILED);
- else
- iser_err("iscsi_iser connection isn't bound\n");
- }
-}
-
+/*
+ * Called with state mutex held
+ */
static void iser_cleanup_handler(struct rdma_cm_id *cma_id,
bool destroy)
{
struct iser_conn *iser_conn = cma_id->context;
+ lockdep_assert_held(&iser_conn->state_mutex);
/*
* We are not guaranteed that we visited disconnected_handler
* by now, call it here to be safe that we handle CM drep
* and flush errors.
*/
- iser_disconnected_handler(cma_id);
+ if (iser_conn_terminate(iser_conn)) {
+ if (iser_conn->iscsi_conn)
+ iscsi_conn_failure(iser_conn->iscsi_conn,
+ ISCSI_ERR_CONN_FAILED);
+ else
+ iser_err("iscsi_iser connection isn't bound\n");
+ }
iser_free_ib_conn_res(iser_conn, destroy);
complete(&iser_conn->ib_completion);
}
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index b360a1527cd102..75404885cf9810 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -993,9 +993,8 @@ isert_rx_login_req(struct isert_conn *isert_conn)
* login request PDU.
*/
login->leading_connection = (!login_req->tsih) ? 1 : 0;
- login->current_stage =
- (login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK)
- >> 2;
+ login->current_stage = ISCSI_LOGIN_CURRENT_STAGE(
+ login_req->flags);
login->version_min = login_req->min_version;
login->version_max = login_req->max_version;
memcpy(login->isid, login_req->isid, 6);
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index 205fd44a4727a8..80abf45a197ac0 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -1064,10 +1064,8 @@ static int rtrs_map_sg_fr(struct rtrs_clt_io_req *req, size_t count)
/* Align the MR to a 4K page size to match the block virt boundary */
nr = ib_map_mr_sg(req->mr, req->sglist, count, NULL, SZ_4K);
- if (nr < 0)
- return nr;
- if (nr < req->sg_cnt)
- return -EINVAL;
+ if (nr != count)
+ return nr < 0 ? nr : -EINVAL;
ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
return nr;
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
index a2420eecaf5a10..ab25619261d28b 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
@@ -68,10 +68,7 @@ enum {
struct rtrs_ib_dev;
struct rtrs_rdma_dev_pd_ops {
- struct rtrs_ib_dev *(*alloc)(void);
- void (*free)(struct rtrs_ib_dev *dev);
int (*init)(struct rtrs_ib_dev *dev);
- void (*deinit)(struct rtrs_ib_dev *dev);
};
struct rtrs_rdma_dev_pd {
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
index 2a3c9ac64a42e2..c76ba29da1e206 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
@@ -203,7 +203,6 @@ rtrs_srv_destroy_once_sysfs_root_folders(struct rtrs_srv_path *srv_path)
mutex_lock(&srv->paths_mutex);
if (!--srv->dev_ref) {
- kobject_del(srv->kobj_paths);
kobject_put(srv->kobj_paths);
mutex_unlock(&srv->paths_mutex);
device_del(&srv->dev);
@@ -304,12 +303,18 @@ destroy_root:
void rtrs_srv_destroy_path_files(struct rtrs_srv_path *srv_path)
{
- if (srv_path->kobj.state_in_sysfs) {
+ if (srv_path->stats->kobj_stats.state_in_sysfs) {
+ sysfs_remove_group(&srv_path->stats->kobj_stats,
+ &rtrs_srv_stats_attr_group);
kobject_del(&srv_path->stats->kobj_stats);
kobject_put(&srv_path->stats->kobj_stats);
+ }
+
+ if (srv_path->kobj.state_in_sysfs) {
sysfs_remove_group(&srv_path->kobj, &rtrs_srv_path_attr_group);
+ kobject_del(&srv_path->kobj);
kobject_put(&srv_path->kobj);
-
- rtrs_srv_destroy_once_sysfs_root_folders(srv_path);
}
+
+ rtrs_srv_destroy_once_sysfs_root_folders(srv_path);
}
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
index 22d7ba05e9fe83..d1703e2c0b82fc 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
@@ -561,9 +561,11 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path)
{
struct rtrs_srv_sess *srv = srv_path->srv;
struct rtrs_path *ss = &srv_path->s;
- int i, mri, err, mrs_num;
+ int i, err, mrs_num;
unsigned int chunk_bits;
int chunks_per_mr = 1;
+ struct ib_mr *mr;
+ struct sg_table *sgt;
/*
* Here we map queue_depth chunks to MR. Firstly we have to
@@ -586,16 +588,14 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path)
if (!srv_path->mrs)
return -ENOMEM;
- srv_path->mrs_num = mrs_num;
-
- for (mri = 0; mri < mrs_num; mri++) {
- struct rtrs_srv_mr *srv_mr = &srv_path->mrs[mri];
- struct sg_table *sgt = &srv_mr->sgt;
+ for (srv_path->mrs_num = 0; srv_path->mrs_num < mrs_num;
+ srv_path->mrs_num++) {
+ struct rtrs_srv_mr *srv_mr = &srv_path->mrs[srv_path->mrs_num];
struct scatterlist *s;
- struct ib_mr *mr;
int nr, nr_sgt, chunks;
- chunks = chunks_per_mr * mri;
+ sgt = &srv_mr->sgt;
+ chunks = chunks_per_mr * srv_path->mrs_num;
if (!always_invalidate)
chunks_per_mr = min_t(int, chunks_per_mr,
srv->queue_depth - chunks);
@@ -622,7 +622,7 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path)
}
nr = ib_map_mr_sg(mr, sgt->sgl, nr_sgt,
NULL, max_chunk_size);
- if (nr < 0 || nr < sgt->nents) {
+ if (nr != nr_sgt) {
err = nr < 0 ? nr : -EINVAL;
goto dereg_mr;
}
@@ -644,31 +644,24 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path)
ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
srv_mr->mr = mr;
-
- continue;
-err:
- while (mri--) {
- srv_mr = &srv_path->mrs[mri];
- sgt = &srv_mr->sgt;
- mr = srv_mr->mr;
- rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1);
-dereg_mr:
- ib_dereg_mr(mr);
-unmap_sg:
- ib_dma_unmap_sg(srv_path->s.dev->ib_dev, sgt->sgl,
- sgt->nents, DMA_BIDIRECTIONAL);
-free_sg:
- sg_free_table(sgt);
- }
- kfree(srv_path->mrs);
-
- return err;
}
chunk_bits = ilog2(srv->queue_depth - 1) + 1;
srv_path->mem_bits = (MAX_IMM_PAYL_BITS - chunk_bits);
return 0;
+
+dereg_mr:
+ ib_dereg_mr(mr);
+unmap_sg:
+ ib_dma_unmap_sg(srv_path->s.dev->ib_dev, sgt->sgl,
+ sgt->nents, DMA_BIDIRECTIONAL);
+free_sg:
+ sg_free_table(sgt);
+err:
+ unmap_cont_bufs(srv_path);
+
+ return err;
}
static void rtrs_srv_hb_err_handler(struct rtrs_con *c)
@@ -1678,12 +1671,6 @@ static int create_con(struct rtrs_srv_path *srv_path,
srv->queue_depth * (1 + 2) + 1);
max_recv_wr = srv->queue_depth + 1;
- /*
- * If we have all receive requests posted and
- * all write requests posted and each read request
- * requires an invalidate request + drain
- * and qp gets into error state.
- */
}
cq_num = max_send_wr + max_recv_wr;
atomic_set(&con->c.sq_wr_avail, max_send_wr);
@@ -1950,22 +1937,21 @@ static int rtrs_srv_rdma_cm_handler(struct rdma_cm_id *cm_id,
{
struct rtrs_srv_path *srv_path = NULL;
struct rtrs_path *s = NULL;
+ struct rtrs_con *c = NULL;
- if (ev->event != RDMA_CM_EVENT_CONNECT_REQUEST) {
- struct rtrs_con *c = cm_id->context;
-
- s = c->path;
- srv_path = to_srv_path(s);
- }
-
- switch (ev->event) {
- case RDMA_CM_EVENT_CONNECT_REQUEST:
+ if (ev->event == RDMA_CM_EVENT_CONNECT_REQUEST)
/*
* In case of error cma.c will destroy cm_id,
* see cma_process_remove()
*/
return rtrs_rdma_connect(cm_id, ev->param.conn.private_data,
ev->param.conn.private_data_len);
+
+ c = cm_id->context;
+ s = c->path;
+ srv_path = to_srv_path(s);
+
+ switch (ev->event) {
case RDMA_CM_EVENT_ESTABLISHED:
/* Nothing here */
break;
diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c
index ed324b47d93ae4..4bf9d868cc522b 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs.c
@@ -557,7 +557,6 @@ EXPORT_SYMBOL(rtrs_addr_to_sockaddr);
void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags,
struct rtrs_rdma_dev_pd *pool)
{
- WARN_ON(pool->ops && (!pool->ops->alloc ^ !pool->ops->free));
INIT_LIST_HEAD(&pool->list);
mutex_init(&pool->mutex);
pool->pd_flags = pd_flags;
@@ -583,15 +582,8 @@ static void dev_free(struct kref *ref)
list_del(&dev->entry);
mutex_unlock(&pool->mutex);
- if (pool->ops && pool->ops->deinit)
- pool->ops->deinit(dev);
-
ib_dealloc_pd(dev->ib_pd);
-
- if (pool->ops && pool->ops->free)
- pool->ops->free(dev);
- else
- kfree(dev);
+ kfree(dev);
}
int rtrs_ib_dev_put(struct rtrs_ib_dev *dev)
@@ -618,11 +610,8 @@ rtrs_ib_dev_find_or_add(struct ib_device *ib_dev,
goto out_unlock;
}
mutex_unlock(&pool->mutex);
- if (pool->ops && pool->ops->alloc)
- dev = pool->ops->alloc();
- else
- dev = kzalloc(sizeof(*dev), GFP_KERNEL);
- if (IS_ERR_OR_NULL(dev))
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
goto out_err;
kref_init(&dev->ref);
@@ -644,10 +633,7 @@ out_unlock:
out_free_pd:
ib_dealloc_pd(dev->ib_pd);
out_free_dev:
- if (pool->ops && pool->ops->free)
- pool->ops->free(dev);
- else
- kfree(dev);
+ kfree(dev);
out_err:
return NULL;
}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 1075c2ac8fe209..b4d6a4a5ae81e6 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -3410,7 +3410,8 @@ static int srp_parse_options(struct net *net, const char *buf,
break;
case SRP_OPT_PKEY:
- if (match_hex(args, &token)) {
+ ret = match_hex(args, &token);
+ if (ret) {
pr_warn("bad P_Key parameter '%s'\n", p);
goto out;
}
@@ -3470,7 +3471,8 @@ static int srp_parse_options(struct net *net, const char *buf,
break;
case SRP_OPT_MAX_SECT:
- if (match_int(args, &token)) {
+ ret = match_int(args, &token);
+ if (ret) {
pr_warn("bad max sect parameter '%s'\n", p);
goto out;
}
@@ -3478,8 +3480,15 @@ static int srp_parse_options(struct net *net, const char *buf,
break;
case SRP_OPT_QUEUE_SIZE:
- if (match_int(args, &token) || token < 1) {
+ ret = match_int(args, &token);
+ if (ret) {
+ pr_warn("match_int() failed for queue_size parameter '%s', Error %d\n",
+ p, ret);
+ goto out;
+ }
+ if (token < 1) {
pr_warn("bad queue_size parameter '%s'\n", p);
+ ret = -EINVAL;
goto out;
}
target->scsi_host->can_queue = token;
@@ -3490,25 +3499,40 @@ static int srp_parse_options(struct net *net, const char *buf,
break;
case SRP_OPT_MAX_CMD_PER_LUN:
- if (match_int(args, &token) || token < 1) {
+ ret = match_int(args, &token);
+ if (ret) {
+ pr_warn("match_int() failed for max cmd_per_lun parameter '%s', Error %d\n",
+ p, ret);
+ goto out;
+ }
+ if (token < 1) {
pr_warn("bad max cmd_per_lun parameter '%s'\n",
p);
+ ret = -EINVAL;
goto out;
}
target->scsi_host->cmd_per_lun = token;
break;
case SRP_OPT_TARGET_CAN_QUEUE:
- if (match_int(args, &token) || token < 1) {
+ ret = match_int(args, &token);
+ if (ret) {
+ pr_warn("match_int() failed for max target_can_queue parameter '%s', Error %d\n",
+ p, ret);
+ goto out;
+ }
+ if (token < 1) {
pr_warn("bad max target_can_queue parameter '%s'\n",
p);
+ ret = -EINVAL;
goto out;
}
target->target_can_queue = token;
break;
case SRP_OPT_IO_CLASS:
- if (match_hex(args, &token)) {
+ ret = match_hex(args, &token);
+ if (ret) {
pr_warn("bad IO class parameter '%s'\n", p);
goto out;
}
@@ -3517,6 +3541,7 @@ static int srp_parse_options(struct net *net, const char *buf,
pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
token, SRP_REV10_IB_IO_CLASS,
SRP_REV16A_IB_IO_CLASS);
+ ret = -EINVAL;
goto out;
}
target->io_class = token;
@@ -3539,16 +3564,24 @@ static int srp_parse_options(struct net *net, const char *buf,
break;
case SRP_OPT_CMD_SG_ENTRIES:
- if (match_int(args, &token) || token < 1 || token > 255) {
+ ret = match_int(args, &token);
+ if (ret) {
+ pr_warn("match_int() failed for max cmd_sg_entries parameter '%s', Error %d\n",
+ p, ret);
+ goto out;
+ }
+ if (token < 1 || token > 255) {
pr_warn("bad max cmd_sg_entries parameter '%s'\n",
p);
+ ret = -EINVAL;
goto out;
}
target->cmd_sg_cnt = token;
break;
case SRP_OPT_ALLOW_EXT_SG:
- if (match_int(args, &token)) {
+ ret = match_int(args, &token);
+ if (ret) {
pr_warn("bad allow_ext_sg parameter '%s'\n", p);
goto out;
}
@@ -3556,43 +3589,77 @@ static int srp_parse_options(struct net *net, const char *buf,
break;
case SRP_OPT_SG_TABLESIZE:
- if (match_int(args, &token) || token < 1 ||
- token > SG_MAX_SEGMENTS) {
+ ret = match_int(args, &token);
+ if (ret) {
+ pr_warn("match_int() failed for max sg_tablesize parameter '%s', Error %d\n",
+ p, ret);
+ goto out;
+ }
+ if (token < 1 || token > SG_MAX_SEGMENTS) {
pr_warn("bad max sg_tablesize parameter '%s'\n",
p);
+ ret = -EINVAL;
goto out;
}
target->sg_tablesize = token;
break;
case SRP_OPT_COMP_VECTOR:
- if (match_int(args, &token) || token < 0) {
+ ret = match_int(args, &token);
+ if (ret) {
+ pr_warn("match_int() failed for comp_vector parameter '%s', Error %d\n",
+ p, ret);
+ goto out;
+ }
+ if (token < 0) {
pr_warn("bad comp_vector parameter '%s'\n", p);
+ ret = -EINVAL;
goto out;
}
target->comp_vector = token;
break;
case SRP_OPT_TL_RETRY_COUNT:
- if (match_int(args, &token) || token < 2 || token > 7) {
+ ret = match_int(args, &token);
+ if (ret) {
+ pr_warn("match_int() failed for tl_retry_count parameter '%s', Error %d\n",
+ p, ret);
+ goto out;
+ }
+ if (token < 2 || token > 7) {
pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
p);
+ ret = -EINVAL;
goto out;
}
target->tl_retry_count = token;
break;
case SRP_OPT_MAX_IT_IU_SIZE:
- if (match_int(args, &token) || token < 0) {
+ ret = match_int(args, &token);
+ if (ret) {
+ pr_warn("match_int() failed for max it_iu_size parameter '%s', Error %d\n",
+ p, ret);
+ goto out;
+ }
+ if (token < 0) {
pr_warn("bad maximum initiator to target IU size '%s'\n", p);
+ ret = -EINVAL;
goto out;
}
target->max_it_iu_size = token;
break;
case SRP_OPT_CH_COUNT:
- if (match_int(args, &token) || token < 1) {
+ ret = match_int(args, &token);
+ if (ret) {
+ pr_warn("match_int() failed for channel count parameter '%s', Error %d\n",
+ p, ret);
+ goto out;
+ }
+ if (token < 1) {
pr_warn("bad channel count %s\n", p);
+ ret = -EINVAL;
goto out;
}
target->ch_count = token;
@@ -3601,6 +3668,7 @@ static int srp_parse_options(struct net *net, const char *buf,
default:
pr_warn("unknown parameter or missing value '%s' in target creation request\n",
p);
+ ret = -EINVAL;
goto out;
}
}
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 690b69cae4e3a8..e708c2d049839f 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -671,8 +671,7 @@ free_q:
return err;
}
-int mana_gd_destroy_dma_region(struct gdma_context *gc,
- gdma_obj_handle_t dma_region_handle)
+int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle)
{
struct gdma_destroy_dma_region_req req = {};
struct gdma_general_resp resp = {};
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index d80c78506f19b4..b3ba04615caa32 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -65,8 +65,6 @@ enum {
GDMA_DEVICE_MANA = 2,
};
-typedef u64 gdma_obj_handle_t;
-
struct gdma_resource {
/* Protect the bitmap */
spinlock_t lock;
@@ -200,7 +198,7 @@ struct gdma_mem_info {
u64 length;
/* Allocated by the PF driver */
- gdma_obj_handle_t dma_region_handle;
+ u64 dma_region_handle;
};
#define REGISTER_ATB_MST_MKEY_LOWER_SIZE 8
@@ -632,7 +630,7 @@ struct gdma_create_queue_req {
u32 reserved1;
u32 pdid;
u32 doolbell_id;
- gdma_obj_handle_t gdma_region;
+ u64 gdma_region;
u32 reserved2;
u32 queue_size;
u32 log2_throttle_limit;
@@ -707,14 +705,14 @@ struct gdma_create_dma_region_req {
struct gdma_create_dma_region_resp {
struct gdma_resp_hdr hdr;
- gdma_obj_handle_t dma_region_handle;
+ u64 dma_region_handle;
}; /* HW DATA */
/* GDMA_DMA_REGION_ADD_PAGES */
struct gdma_dma_region_add_pages_req {
struct gdma_req_hdr hdr;
- gdma_obj_handle_t dma_region_handle;
+ u64 dma_region_handle;
u32 page_addr_list_len;
u32 reserved3;
@@ -726,7 +724,7 @@ struct gdma_dma_region_add_pages_req {
struct gdma_destroy_dma_region_req {
struct gdma_req_hdr hdr;
- gdma_obj_handle_t dma_region_handle;
+ u64 dma_region_handle;
}; /* HW DATA */
enum gdma_pd_flags {
@@ -741,14 +739,14 @@ struct gdma_create_pd_req {
struct gdma_create_pd_resp {
struct gdma_resp_hdr hdr;
- gdma_obj_handle_t pd_handle;
+ u64 pd_handle;
u32 pd_id;
u32 reserved;
};/* HW DATA */
struct gdma_destroy_pd_req {
struct gdma_req_hdr hdr;
- gdma_obj_handle_t pd_handle;
+ u64 pd_handle;
};/* HW DATA */
struct gdma_destory_pd_resp {
@@ -764,11 +762,11 @@ enum gdma_mr_type {
};
struct gdma_create_mr_params {
- gdma_obj_handle_t pd_handle;
+ u64 pd_handle;
enum gdma_mr_type mr_type;
union {
struct {
- gdma_obj_handle_t dma_region_handle;
+ u64 dma_region_handle;
u64 virtual_address;
enum gdma_mr_access_flags access_flags;
} gva;
@@ -777,13 +775,13 @@ struct gdma_create_mr_params {
struct gdma_create_mr_request {
struct gdma_req_hdr hdr;
- gdma_obj_handle_t pd_handle;
+ u64 pd_handle;
enum gdma_mr_type mr_type;
u32 reserved_1;
union {
struct {
- gdma_obj_handle_t dma_region_handle;
+ u64 dma_region_handle;
u64 virtual_address;
enum gdma_mr_access_flags access_flags;
} gva;
@@ -794,14 +792,14 @@ struct gdma_create_mr_request {
struct gdma_create_mr_response {
struct gdma_resp_hdr hdr;
- gdma_obj_handle_t mr_handle;
+ u64 mr_handle;
u32 lkey;
u32 rkey;
};/* HW DATA */
struct gdma_destroy_mr_request {
struct gdma_req_hdr hdr;
- gdma_obj_handle_t mr_handle;
+ u64 mr_handle;
};/* HW DATA */
struct gdma_destroy_mr_response {
@@ -835,7 +833,6 @@ void mana_gd_free_memory(struct gdma_mem_info *gmi);
int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req,
u32 resp_len, void *resp);
-int mana_gd_destroy_dma_region(struct gdma_context *gc,
- gdma_obj_handle_t dma_region_handle);
+int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle);
#endif /* _GDMA_H */
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 575ea36ce606bd..3bb579962a14aa 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -412,6 +412,9 @@ int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf);
extern const struct ethtool_ops mana_ethtool_ops;
+/* A CQ can be created not associated with any EQ */
+#define GDMA_CQ_NO_EQ 0xffff
+
struct mana_obj_spec {
u32 queue_index;
u64 gdma_region;
diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index a9162f25beaf53..b8c56d7dc35dab 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -84,6 +84,8 @@ enum {
/* opcode 0x15 is reserved */
IB_OPCODE_SEND_LAST_WITH_INVALIDATE = 0x16,
IB_OPCODE_SEND_ONLY_WITH_INVALIDATE = 0x17,
+ IB_OPCODE_FLUSH = 0x1C,
+ IB_OPCODE_ATOMIC_WRITE = 0x1D,
/* real constants follow -- see comment about above IB_OPCODE()
macro for more details */
@@ -112,6 +114,8 @@ enum {
IB_OPCODE(RC, FETCH_ADD),
IB_OPCODE(RC, SEND_LAST_WITH_INVALIDATE),
IB_OPCODE(RC, SEND_ONLY_WITH_INVALIDATE),
+ IB_OPCODE(RC, FLUSH),
+ IB_OPCODE(RC, ATOMIC_WRITE),
/* UC */
IB_OPCODE(UC, SEND_FIRST),
@@ -149,6 +153,7 @@ enum {
IB_OPCODE(RD, ATOMIC_ACKNOWLEDGE),
IB_OPCODE(RD, COMPARE_SWAP),
IB_OPCODE(RD, FETCH_ADD),
+ IB_OPCODE(RD, FLUSH),
/* UD */
IB_OPCODE(UD, SEND_ONLY),
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 975d6e9efbcb44..a9a429172c0a11 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -270,6 +270,10 @@ enum ib_device_cap_flags {
/* The device supports padding incoming writes to cacheline. */
IB_DEVICE_PCI_WRITE_END_PADDING =
IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING,
+ /* Placement type attributes */
+ IB_DEVICE_FLUSH_GLOBAL = IB_UVERBS_DEVICE_FLUSH_GLOBAL,
+ IB_DEVICE_FLUSH_PERSISTENT = IB_UVERBS_DEVICE_FLUSH_PERSISTENT,
+ IB_DEVICE_ATOMIC_WRITE = IB_UVERBS_DEVICE_ATOMIC_WRITE,
};
enum ib_kernel_cap_flags {
@@ -982,9 +986,11 @@ enum ib_wc_opcode {
IB_WC_BIND_MW = IB_UVERBS_WC_BIND_MW,
IB_WC_LOCAL_INV = IB_UVERBS_WC_LOCAL_INV,
IB_WC_LSO = IB_UVERBS_WC_TSO,
+ IB_WC_ATOMIC_WRITE = IB_UVERBS_WC_ATOMIC_WRITE,
IB_WC_REG_MR,
IB_WC_MASKED_COMP_SWAP,
IB_WC_MASKED_FETCH_ADD,
+ IB_WC_FLUSH = IB_UVERBS_WC_FLUSH,
/*
* Set value of IB_WC_RECV so consumers can test if a completion is a
* receive by testing (opcode & IB_WC_RECV).
@@ -1325,6 +1331,8 @@ enum ib_wr_opcode {
IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP,
IB_WR_MASKED_ATOMIC_FETCH_AND_ADD =
IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD,
+ IB_WR_FLUSH = IB_UVERBS_WR_FLUSH,
+ IB_WR_ATOMIC_WRITE = IB_UVERBS_WR_ATOMIC_WRITE,
/* These are kernel only and can not be issued by userspace */
IB_WR_REG_MR = 0x20,
@@ -1458,10 +1466,12 @@ enum ib_access_flags {
IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND,
IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB,
IB_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_RELAXED_ORDERING,
+ IB_ACCESS_FLUSH_GLOBAL = IB_UVERBS_ACCESS_FLUSH_GLOBAL,
+ IB_ACCESS_FLUSH_PERSISTENT = IB_UVERBS_ACCESS_FLUSH_PERSISTENT,
IB_ACCESS_OPTIONAL = IB_UVERBS_ACCESS_OPTIONAL_RANGE,
IB_ACCESS_SUPPORTED =
- ((IB_ACCESS_HUGETLB << 1) - 1) | IB_ACCESS_OPTIONAL,
+ ((IB_ACCESS_FLUSH_PERSISTENT << 1) - 1) | IB_ACCESS_OPTIONAL,
};
/*
@@ -2203,6 +2213,7 @@ struct ib_port_data {
struct ib_port_cache cache;
struct net_device __rcu *netdev;
+ netdevice_tracker netdev_tracker;
struct hlist_node ndev_hash_link;
struct rdma_port_counter port_counter;
struct ib_port *sysfs;
@@ -4321,6 +4332,8 @@ int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata);
static inline int ib_check_mr_access(struct ib_device *ib_dev,
unsigned int flags)
{
+ u64 device_cap = ib_dev->attrs.device_cap_flags;
+
/*
* Local write permission is required if remote write or
* remote atomic permission is also requested.
@@ -4334,7 +4347,14 @@ static inline int ib_check_mr_access(struct ib_device *ib_dev,
if (flags & IB_ACCESS_ON_DEMAND &&
!(ib_dev->attrs.kernel_cap_flags & IBK_ON_DEMAND_PAGING))
- return -EINVAL;
+ return -EOPNOTSUPP;
+
+ if ((flags & IB_ACCESS_FLUSH_GLOBAL &&
+ !(device_cap & IB_DEVICE_FLUSH_GLOBAL)) ||
+ (flags & IB_ACCESS_FLUSH_PERSISTENT &&
+ !(device_cap & IB_DEVICE_FLUSH_PERSISTENT)))
+ return -EOPNOTSUPP;
+
return 0;
}
diff --git a/include/rdma/opa_vnic.h b/include/rdma/opa_vnic.h
index f3d5377b217a68..d297f084001a5d 100644
--- a/include/rdma/opa_vnic.h
+++ b/include/rdma/opa_vnic.h
@@ -51,7 +51,7 @@ static inline void *opa_vnic_dev_priv(const struct net_device *dev)
return oparn->dev_priv;
}
-/* opa_vnic skb meta data structrue */
+/* opa_vnic skb meta data structure */
struct opa_vnic_skb_mdata {
u8 vl;
u8 entropy;
diff --git a/include/trace/events/ib_mad.h b/include/trace/events/ib_mad.h
index 59363a083ecb9a..d92691c78cff60 100644
--- a/include/trace/events/ib_mad.h
+++ b/include/trace/events/ib_mad.h
@@ -49,7 +49,6 @@ DECLARE_EVENT_CLASS(ib_mad_send_template,
__field(int, retries_left)
__field(int, max_retries)
__field(int, retry)
- __field(u16, pkey)
),
TP_fast_assign(
@@ -89,7 +88,7 @@ DECLARE_EVENT_CLASS(ib_mad_send_template,
"hdr : base_ver 0x%x class 0x%x class_ver 0x%x " \
"method 0x%x status 0x%x class_specific 0x%x tid 0x%llx " \
"attr_id 0x%x attr_mod 0x%x => dlid 0x%08x sl %d "\
- "pkey 0x%x rpqn 0x%x rqpkey 0x%x",
+ "rpqn 0x%x rqpkey 0x%x",
__entry->dev_index, __entry->port_num, __entry->qp_num,
__entry->agent_priv, be64_to_cpu(__entry->wrtid),
__entry->retries_left, __entry->max_retries,
@@ -100,7 +99,7 @@ DECLARE_EVENT_CLASS(ib_mad_send_template,
be16_to_cpu(__entry->class_specific),
be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id),
be32_to_cpu(__entry->attr_mod),
- be32_to_cpu(__entry->dlid), __entry->sl, __entry->pkey,
+ be32_to_cpu(__entry->dlid), __entry->sl,
__entry->rqpn, __entry->rqkey
)
);
@@ -204,7 +203,6 @@ TRACE_EVENT(ib_mad_recv_done_handler,
__field(u16, wc_status)
__field(u32, slid)
__field(u32, dev_index)
- __field(u16, pkey)
),
TP_fast_assign(
@@ -224,9 +222,6 @@ TRACE_EVENT(ib_mad_recv_done_handler,
__entry->slid = wc->slid;
__entry->src_qp = wc->src_qp;
__entry->sl = wc->sl;
- ib_query_pkey(qp_info->port_priv->device,
- qp_info->port_priv->port_num,
- wc->pkey_index, &__entry->pkey);
__entry->wc_status = wc->status;
),
@@ -234,7 +229,7 @@ TRACE_EVENT(ib_mad_recv_done_handler,
"base_ver 0x%02x class 0x%02x class_ver 0x%02x " \
"method 0x%02x status 0x%04x class_specific 0x%04x " \
"tid 0x%016llx attr_id 0x%04x attr_mod 0x%08x " \
- "slid 0x%08x src QP%d, sl %d pkey 0x%04x",
+ "slid 0x%08x src QP%d, sl %d",
__entry->dev_index, __entry->port_num, __entry->qp_num,
__entry->wc_status,
__entry->length,
@@ -244,7 +239,7 @@ TRACE_EVENT(ib_mad_recv_done_handler,
be16_to_cpu(__entry->class_specific),
be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id),
be32_to_cpu(__entry->attr_mod),
- __entry->slid, __entry->src_qp, __entry->sl, __entry->pkey
+ __entry->slid, __entry->src_qp, __entry->sl
)
);
diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h
index f6fde06db4b4ea..745790ce3c261e 100644
--- a/include/uapi/rdma/hns-abi.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -85,11 +85,26 @@ struct hns_roce_ib_create_qp_resp {
__aligned_u64 dwqe_mmap_key;
};
+enum {
+ HNS_ROCE_EXSGE_FLAGS = 1 << 0,
+};
+
+enum {
+ HNS_ROCE_RSP_EXSGE_FLAGS = 1 << 0,
+};
+
struct hns_roce_ib_alloc_ucontext_resp {
__u32 qp_tab_size;
__u32 cqe_size;
__u32 srq_tab_size;
__u32 reserved;
+ __u32 config;
+ __u32 max_inline_data;
+};
+
+struct hns_roce_ib_alloc_ucontext {
+ __u32 config;
+ __u32 reserved;
};
struct hns_roce_ib_alloc_pd_resp {
diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
index 7dd56210226f53..d7c5aaa3274453 100644
--- a/include/uapi/rdma/ib_user_ioctl_verbs.h
+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -57,6 +57,8 @@ enum ib_uverbs_access_flags {
IB_UVERBS_ACCESS_ZERO_BASED = 1 << 5,
IB_UVERBS_ACCESS_ON_DEMAND = 1 << 6,
IB_UVERBS_ACCESS_HUGETLB = 1 << 7,
+ IB_UVERBS_ACCESS_FLUSH_GLOBAL = 1 << 8,
+ IB_UVERBS_ACCESS_FLUSH_PERSISTENT = 1 << 9,
IB_UVERBS_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_OPTIONAL_FIRST,
IB_UVERBS_ACCESS_OPTIONAL_RANGE =
@@ -251,6 +253,7 @@ enum rdma_driver_id {
RDMA_DRIVER_EFA,
RDMA_DRIVER_SIW,
RDMA_DRIVER_ERDMA,
+ RDMA_DRIVER_MANA,
};
enum ib_uverbs_gid_type {
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 43672cb1fd5798..e16650f0c85dd6 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -105,6 +105,18 @@ enum {
IB_USER_VERBS_EX_CMD_MODIFY_CQ
};
+/* see IBA A19.4.1.1 Placement Types */
+enum ib_placement_type {
+ IB_FLUSH_GLOBAL = 1U << 0,
+ IB_FLUSH_PERSISTENT = 1U << 1,
+};
+
+/* see IBA A19.4.1.2 Selectivity Level */
+enum ib_selectivity_level {
+ IB_FLUSH_RANGE = 0,
+ IB_FLUSH_MR,
+};
+
/*
* Make sure that all structs defined in this file remain laid out so
* that they pack the same way on 32-bit and 64-bit architectures (to
@@ -466,6 +478,8 @@ enum ib_uverbs_wc_opcode {
IB_UVERBS_WC_BIND_MW = 5,
IB_UVERBS_WC_LOCAL_INV = 6,
IB_UVERBS_WC_TSO = 7,
+ IB_UVERBS_WC_FLUSH = 8,
+ IB_UVERBS_WC_ATOMIC_WRITE = 9,
};
struct ib_uverbs_wc {
@@ -784,6 +798,8 @@ enum ib_uverbs_wr_opcode {
IB_UVERBS_WR_RDMA_READ_WITH_INV = 11,
IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP = 12,
IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD = 13,
+ IB_UVERBS_WR_FLUSH = 14,
+ IB_UVERBS_WR_ATOMIC_WRITE = 15,
/* Review enum ib_wr_opcode before modifying this */
};
@@ -1331,6 +1347,11 @@ enum ib_uverbs_device_cap_flags {
/* Deprecated. Please use IB_UVERBS_RAW_PACKET_CAP_SCATTER_FCS. */
IB_UVERBS_DEVICE_RAW_SCATTER_FCS = 1ULL << 34,
IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING = 1ULL << 36,
+ /* Flush placement types */
+ IB_UVERBS_DEVICE_FLUSH_GLOBAL = 1ULL << 38,
+ IB_UVERBS_DEVICE_FLUSH_PERSISTENT = 1ULL << 39,
+ /* Atomic write attributes */
+ IB_UVERBS_DEVICE_ATOMIC_WRITE = 1ULL << 40,
};
enum ib_uverbs_raw_packet_caps {
diff --git a/include/uapi/rdma/mana-abi.h b/include/uapi/rdma/mana-abi.h
new file mode 100644
index 00000000000000..5fcb31b37fb91a
--- /dev/null
+++ b/include/uapi/rdma/mana-abi.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) */
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef MANA_ABI_USER_H
+#define MANA_ABI_USER_H
+
+#include <linux/types.h>
+#include <rdma/ib_user_ioctl_verbs.h>
+
+/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+
+#define MANA_IB_UVERBS_ABI_VERSION 1
+
+struct mana_ib_create_cq {
+ __aligned_u64 buf_addr;
+};
+
+struct mana_ib_create_qp {
+ __aligned_u64 sq_buf_addr;
+ __u32 sq_buf_size;
+ __u32 port;
+};
+
+struct mana_ib_create_qp_resp {
+ __u32 sqid;
+ __u32 cqid;
+ __u32 tx_vp_offset;
+ __u32 reserved;
+};
+
+struct mana_ib_create_wq {
+ __aligned_u64 wq_buf_addr;
+ __u32 wq_buf_size;
+ __u32 reserved;
+};
+
+/* RX Hash function flags */
+enum mana_ib_rx_hash_function_flags {
+ MANA_IB_RX_HASH_FUNC_TOEPLITZ = 1 << 0,
+};
+
+struct mana_ib_create_qp_rss {
+ __aligned_u64 rx_hash_fields_mask;
+ __u8 rx_hash_function;
+ __u8 reserved[7];
+ __u32 rx_hash_key_len;
+ __u8 rx_hash_key[40];
+ __u32 port;
+};
+
+struct rss_resp_entry {
+ __u32 cqid;
+ __u32 wqid;
+};
+
+struct mana_ib_create_qp_rss_resp {
+ __aligned_u64 num_entries;
+ struct rss_resp_entry entries[64];
+};
+
+#endif
diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h
index 73f679dfd2dfb1..bb092fccb813cc 100644
--- a/include/uapi/rdma/rdma_user_rxe.h
+++ b/include/uapi/rdma/rdma_user_rxe.h
@@ -84,6 +84,13 @@ struct rxe_send_wr {
union {
struct {
__aligned_u64 remote_addr;
+ __u32 length;
+ __u32 rkey;
+ __u8 type;
+ __u8 level;
+ } flush;
+ struct {
+ __aligned_u64 remote_addr;
__u32 rkey;
__u32 reserved;
} rdma;
@@ -146,6 +153,7 @@ struct rxe_dma_info {
__u32 reserved;
union {
__DECLARE_FLEX_ARRAY(__u8, inline_data);
+ __DECLARE_FLEX_ARRAY(__u8, atomic_wr);
__DECLARE_FLEX_ARRAY(struct rxe_sge, sge);
};
};