aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTadeusz Struk <tadeusz.struk@intel.com>2016-10-25 08:57:55 -0700
committerDoug Ledford <dledford@redhat.com>2016-11-15 16:16:41 -0500
commitacd7c8fe14938a315f0ac1b92a92375f7226c2fd (patch)
treea7c672b8310d578ed82ea0f9f353ee8826abcfd9
parentd9ac4555fb2bcd6b794aaa0b39acad81111d9f42 (diff)
downloadlinux-acd7c8fe14938a315f0ac1b92a92375f7226c2fd.tar.gz
IB/hfi1: Fix an Oops on pci device force remove
This patch fixes an Oops on device unbind, when the device is used by a PSM user process. PSM processes access device resources which are freed on device removal. Similar protection exists in uverbs in ib_core for Verbs clients, but PSM doesn't use ib_uverbs hence a separate protection is required for PSM clients. Cc: Jason Gunthorpe <jgunthorpe@obsidianresearch.com> Reviewed-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Dean Luick <dean.luick@intel.com> Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c5
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c19
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h4
-rw-r--r--drivers/infiniband/hw/hfi1/init.c21
4 files changed, 44 insertions, 5 deletions
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 9bf5f23544d4cf..799215255b4966 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -14691,6 +14691,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret)
goto bail_free_cntrs;
+ init_completion(&dd->user_comp);
+
+ /* The user refcount starts with one to inidicate an active device */
+ atomic_set(&dd->user_refcount, 1);
+
goto bail;
bail_free_rcverr:
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 677efa0e8cd689..bd786b7bd30b12 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -172,6 +172,9 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
struct hfi1_devdata,
user_cdev);
+ if (!atomic_inc_not_zero(&dd->user_refcount))
+ return -ENXIO;
+
/* Just take a ref now. Not all opens result in a context assign */
kobject_get(&dd->kobj);
@@ -183,11 +186,17 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
fd->rec_cpu_num = -1; /* no cpu affinity by default */
fd->mm = current->mm;
atomic_inc(&fd->mm->mm_count);
- }
+ fp->private_data = fd;
+ } else {
+ fp->private_data = NULL;
+
+ if (atomic_dec_and_test(&dd->user_refcount))
+ complete(&dd->user_comp);
- fp->private_data = fd;
+ return -ENOMEM;
+ }
- return fd ? 0 : -ENOMEM;
+ return 0;
}
static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
@@ -798,6 +807,10 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
done:
mmdrop(fdata->mm);
kobject_put(&dd->kobj);
+
+ if (atomic_dec_and_test(&dd->user_refcount))
+ complete(&dd->user_comp);
+
kfree(fdata);
return 0;
}
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 3c06d204bafd3d..368e96c109a501 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1174,6 +1174,10 @@ struct hfi1_devdata {
spinlock_t aspm_lock;
/* Number of verbs contexts which have disabled ASPM */
atomic_t aspm_disabled_cnt;
+ /* Keeps track of user space clients */
+ atomic_t user_refcount;
+ /* Used to wait for outstanding user space clients before dev removal */
+ struct completion user_comp;
struct hfi1_affinity *affinity;
struct rhashtable sdma_rht;
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 60db61536fedf3..e28a6b633ea993 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1538,12 +1538,31 @@ bail:
return ret;
}
+static void wait_for_clients(struct hfi1_devdata *dd)
+{
+ /*
+ * Remove the device init value and complete the device if there is
+ * no clients or wait for active clients to finish.
+ */
+ if (atomic_dec_and_test(&dd->user_refcount))
+ complete(&dd->user_comp);
+
+ wait_for_completion(&dd->user_comp);
+}
+
static void remove_one(struct pci_dev *pdev)
{
struct hfi1_devdata *dd = pci_get_drvdata(pdev);
/* close debugfs files before ib unregister */
hfi1_dbg_ibdev_exit(&dd->verbs_dev);
+
+ /* remove the /dev hfi1 interface */
+ hfi1_device_remove(dd);
+
+ /* wait for existing user space clients to finish */
+ wait_for_clients(dd);
+
/* unregister from IB core */
hfi1_unregister_ib_device(dd);
@@ -1558,8 +1577,6 @@ static void remove_one(struct pci_dev *pdev)
/* wait until all of our (qsfp) queue_work() calls complete */
flush_workqueue(ib_wq);
- hfi1_device_remove(dd);
-
postinit_cleanup(dd);
}