diff options
author | Jack Morgenstein <jackm@dev.mellanox.co.il> | 2007-09-20 11:22:37 -0700 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2007-09-20 11:23:50 -0700 |
commit | c45efd89ef667b30b84e4f63d8c712d1ebcabde2 (patch) | |
tree | d1c418c61fe83de7e1b27a496d5861b34a52b976 | |
parent | 338a180f3ca81d12dbc5b6587433d557769ee098 (diff) | |
download | libmlx4-c45efd89ef667b30b84e4f63d8c712d1ebcabde2.tar.gz |
Fix data corruption triggered by wrong headroom marking order
This is an addendum to commit 561da8d1 ("Handle new FW requirement for
send request prefetching"). We also need to handle prefetch marking
properly for S/G segments, or else the HCA may end up processing S/G
segments that are not fully written and end up sending the wrong data.
We write S/G segments in reverse order into the WQE, in order to
guarantee that the first dword of all cachelines containing S/G
segments is written last (overwriting the headroom invalidation
pattern). The entire cacheline will thus contain valid data when the
invalidation pattern is overwritten.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r-- | src/qp.c | 23 |
1 files changed, 20 insertions, 3 deletions
@@ -143,14 +143,31 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, dseg->qkey = htonl(wr->wr.ud.remote_qkey); } -static __always_inline void set_data_seg(struct mlx4_wqe_data_seg *dseg, - struct ibv_sge *sg) +static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ibv_sge *sg) { dseg->byte_count = htonl(sg->length); dseg->lkey = htonl(sg->lkey); dseg->addr = htonll(sg->addr); } +static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ibv_sge *sg) +{ + dseg->lkey = htonl(sg->lkey); + dseg->addr = htonll(sg->addr); + + /* + * Need a barrier here before writing the byte_count field to + * make sure that all the data is visible before the + * byte_count field is set. Otherwise, if the segment begins + * a new cacheline, the HCA prefetcher could grab the 64-byte + * chunk and get a valid (!= * 0xffffffff) byte count but + * stale data, and end up sending the wrong data. + */ + wmb(); + + dseg->byte_count = htonl(sg->length); +} + int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, struct ibv_send_wr **bad_wr) { @@ -430,7 +447,7 @@ int mlx4_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, scat = get_recv_wqe(qp, ind); for (i = 0; i < wr->num_sge; ++i) - set_data_seg(scat + i, wr->sg_list + i); + __set_data_seg(scat + i, wr->sg_list + i); if (i < qp->rq.max_gs) { scat[i].byte_count = 0; |