Merge branch 'from-tomtucker' into for-2.6.28
This commit is contained in:
commit
107e0008df
4 changed files with 710 additions and 123 deletions
|
@ -72,6 +72,7 @@ extern atomic_t rdma_stat_sq_prod;
|
|||
*/
|
||||
struct svc_rdma_op_ctxt {
|
||||
struct svc_rdma_op_ctxt *read_hdr;
|
||||
struct svc_rdma_fastreg_mr *frmr;
|
||||
int hdr_count;
|
||||
struct xdr_buf arg;
|
||||
struct list_head dto_q;
|
||||
|
@ -103,16 +104,30 @@ struct svc_rdma_chunk_sge {
|
|||
int start; /* sge no for this chunk */
|
||||
int count; /* sge count for this chunk */
|
||||
};
|
||||
struct svc_rdma_fastreg_mr {
|
||||
struct ib_mr *mr;
|
||||
void *kva;
|
||||
struct ib_fast_reg_page_list *page_list;
|
||||
int page_list_len;
|
||||
unsigned long access_flags;
|
||||
unsigned long map_len;
|
||||
enum dma_data_direction direction;
|
||||
struct list_head frmr_list;
|
||||
};
|
||||
struct svc_rdma_req_map {
|
||||
struct svc_rdma_fastreg_mr *frmr;
|
||||
unsigned long count;
|
||||
union {
|
||||
struct kvec sge[RPCSVC_MAXPAGES];
|
||||
struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES];
|
||||
};
|
||||
};
|
||||
|
||||
#define RDMACTXT_F_FAST_UNREG 1
|
||||
#define RDMACTXT_F_LAST_CTXT 2
|
||||
|
||||
#define SVCRDMA_DEVCAP_FAST_REG 1 /* fast mr registration */
|
||||
#define SVCRDMA_DEVCAP_READ_W_INV 2 /* read w/ invalidate */
|
||||
|
||||
struct svcxprt_rdma {
|
||||
struct svc_xprt sc_xprt; /* SVC transport structure */
|
||||
struct rdma_cm_id *sc_cm_id; /* RDMA connection id */
|
||||
|
@ -136,6 +151,11 @@ struct svcxprt_rdma {
|
|||
struct ib_cq *sc_rq_cq;
|
||||
struct ib_cq *sc_sq_cq;
|
||||
struct ib_mr *sc_phys_mr; /* MR for server memory */
|
||||
u32 sc_dev_caps; /* distilled device caps */
|
||||
u32 sc_dma_lkey; /* local dma key */
|
||||
unsigned int sc_frmr_pg_list_len;
|
||||
struct list_head sc_frmr_q;
|
||||
spinlock_t sc_frmr_q_lock;
|
||||
|
||||
spinlock_t sc_lock; /* transport lock */
|
||||
|
||||
|
@ -192,8 +212,13 @@ extern int svc_rdma_post_recv(struct svcxprt_rdma *);
|
|||
extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
|
||||
extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
|
||||
extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
|
||||
extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt);
|
||||
extern struct svc_rdma_req_map *svc_rdma_get_req_map(void);
|
||||
extern void svc_rdma_put_req_map(struct svc_rdma_req_map *);
|
||||
extern int svc_rdma_fastreg(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *);
|
||||
extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *);
|
||||
extern void svc_rdma_put_frmr(struct svcxprt_rdma *,
|
||||
struct svc_rdma_fastreg_mr *);
|
||||
extern void svc_sq_reap(struct svcxprt_rdma *);
|
||||
extern void svc_rq_reap(struct svcxprt_rdma *);
|
||||
extern struct svc_xprt_class svc_rdma_class;
|
||||
|
|
|
@ -116,7 +116,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
|
|||
*
|
||||
* Assumptions:
|
||||
* - chunk[0]->position points to pages[0] at an offset of 0
|
||||
* - pages[] is not physically or virtually contigous and consists of
|
||||
* - pages[] is not physically or virtually contiguous and consists of
|
||||
* PAGE_SIZE elements.
|
||||
*
|
||||
* Output:
|
||||
|
@ -125,7 +125,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
|
|||
* chunk in the read list
|
||||
*
|
||||
*/
|
||||
static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
|
||||
static int map_read_chunks(struct svcxprt_rdma *xprt,
|
||||
struct svc_rqst *rqstp,
|
||||
struct svc_rdma_op_ctxt *head,
|
||||
struct rpcrdma_msg *rmsgp,
|
||||
|
@ -211,26 +211,128 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
|
|||
return sge_no;
|
||||
}
|
||||
|
||||
static void rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
|
||||
struct svc_rdma_op_ctxt *ctxt,
|
||||
struct kvec *vec,
|
||||
u64 *sgl_offset,
|
||||
int count)
|
||||
/* Map a read-chunk-list to an XDR and fast register the page-list.
|
||||
*
|
||||
* Assumptions:
|
||||
* - chunk[0] position points to pages[0] at an offset of 0
|
||||
* - pages[] will be made physically contiguous by creating a one-off memory
|
||||
* region using the fastreg verb.
|
||||
* - byte_count is # of bytes in read-chunk-list
|
||||
* - ch_count is # of chunks in read-chunk-list
|
||||
*
|
||||
* Output:
|
||||
* - sge array pointing into pages[] array.
|
||||
* - chunk_sge array specifying sge index and count for each
|
||||
* chunk in the read list
|
||||
*/
|
||||
static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
|
||||
struct svc_rqst *rqstp,
|
||||
struct svc_rdma_op_ctxt *head,
|
||||
struct rpcrdma_msg *rmsgp,
|
||||
struct svc_rdma_req_map *rpl_map,
|
||||
struct svc_rdma_req_map *chl_map,
|
||||
int ch_count,
|
||||
int byte_count)
|
||||
{
|
||||
int page_no;
|
||||
int ch_no;
|
||||
u32 offset;
|
||||
struct rpcrdma_read_chunk *ch;
|
||||
struct svc_rdma_fastreg_mr *frmr;
|
||||
int ret = 0;
|
||||
|
||||
frmr = svc_rdma_get_frmr(xprt);
|
||||
if (IS_ERR(frmr))
|
||||
return -ENOMEM;
|
||||
|
||||
head->frmr = frmr;
|
||||
head->arg.head[0] = rqstp->rq_arg.head[0];
|
||||
head->arg.tail[0] = rqstp->rq_arg.tail[0];
|
||||
head->arg.pages = &head->pages[head->count];
|
||||
head->hdr_count = head->count; /* save count of hdr pages */
|
||||
head->arg.page_base = 0;
|
||||
head->arg.page_len = byte_count;
|
||||
head->arg.len = rqstp->rq_arg.len + byte_count;
|
||||
head->arg.buflen = rqstp->rq_arg.buflen + byte_count;
|
||||
|
||||
/* Fast register the page list */
|
||||
frmr->kva = page_address(rqstp->rq_arg.pages[0]);
|
||||
frmr->direction = DMA_FROM_DEVICE;
|
||||
frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
|
||||
frmr->map_len = byte_count;
|
||||
frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
|
||||
for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
|
||||
frmr->page_list->page_list[page_no] =
|
||||
ib_dma_map_single(xprt->sc_cm_id->device,
|
||||
page_address(rqstp->rq_arg.pages[page_no]),
|
||||
PAGE_SIZE, DMA_TO_DEVICE);
|
||||
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
|
||||
frmr->page_list->page_list[page_no]))
|
||||
goto fatal_err;
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
|
||||
}
|
||||
head->count += page_no;
|
||||
|
||||
/* rq_respages points one past arg pages */
|
||||
rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
|
||||
|
||||
/* Create the reply and chunk maps */
|
||||
offset = 0;
|
||||
ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
|
||||
for (ch_no = 0; ch_no < ch_count; ch_no++) {
|
||||
rpl_map->sge[ch_no].iov_base = frmr->kva + offset;
|
||||
rpl_map->sge[ch_no].iov_len = ch->rc_target.rs_length;
|
||||
chl_map->ch[ch_no].count = 1;
|
||||
chl_map->ch[ch_no].start = ch_no;
|
||||
offset += ch->rc_target.rs_length;
|
||||
ch++;
|
||||
}
|
||||
|
||||
ret = svc_rdma_fastreg(xprt, frmr);
|
||||
if (ret)
|
||||
goto fatal_err;
|
||||
|
||||
return ch_no;
|
||||
|
||||
fatal_err:
|
||||
printk("svcrdma: error fast registering xdr for xprt %p", xprt);
|
||||
svc_rdma_put_frmr(xprt, frmr);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
|
||||
struct svc_rdma_op_ctxt *ctxt,
|
||||
struct svc_rdma_fastreg_mr *frmr,
|
||||
struct kvec *vec,
|
||||
u64 *sgl_offset,
|
||||
int count)
|
||||
{
|
||||
int i;
|
||||
|
||||
ctxt->count = count;
|
||||
ctxt->direction = DMA_FROM_DEVICE;
|
||||
for (i = 0; i < count; i++) {
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
ctxt->sge[i].addr =
|
||||
ib_dma_map_single(xprt->sc_cm_id->device,
|
||||
vec[i].iov_base, vec[i].iov_len,
|
||||
DMA_FROM_DEVICE);
|
||||
ctxt->sge[i].length = 0; /* in case map fails */
|
||||
if (!frmr) {
|
||||
ctxt->sge[i].addr =
|
||||
ib_dma_map_single(xprt->sc_cm_id->device,
|
||||
vec[i].iov_base,
|
||||
vec[i].iov_len,
|
||||
DMA_FROM_DEVICE);
|
||||
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
|
||||
ctxt->sge[i].addr))
|
||||
return -EINVAL;
|
||||
ctxt->sge[i].lkey = xprt->sc_dma_lkey;
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
} else {
|
||||
ctxt->sge[i].addr = (unsigned long)vec[i].iov_base;
|
||||
ctxt->sge[i].lkey = frmr->mr->lkey;
|
||||
}
|
||||
ctxt->sge[i].length = vec[i].iov_len;
|
||||
ctxt->sge[i].lkey = xprt->sc_phys_mr->lkey;
|
||||
*sgl_offset = *sgl_offset + vec[i].iov_len;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
|
||||
|
@ -278,6 +380,7 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
|
|||
struct svc_rdma_op_ctxt *hdr_ctxt)
|
||||
{
|
||||
struct ib_send_wr read_wr;
|
||||
struct ib_send_wr inv_wr;
|
||||
int err = 0;
|
||||
int ch_no;
|
||||
int ch_count;
|
||||
|
@ -301,9 +404,20 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
|
|||
svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
|
||||
if (ch_count > RPCSVC_MAXPAGES)
|
||||
return -EINVAL;
|
||||
sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp,
|
||||
rpl_map, chl_map,
|
||||
ch_count, byte_count);
|
||||
|
||||
if (!xprt->sc_frmr_pg_list_len)
|
||||
sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
|
||||
rpl_map, chl_map, ch_count,
|
||||
byte_count);
|
||||
else
|
||||
sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
|
||||
rpl_map, chl_map, ch_count,
|
||||
byte_count);
|
||||
if (sge_count < 0) {
|
||||
err = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
sgl_offset = 0;
|
||||
ch_no = 0;
|
||||
|
||||
|
@ -312,13 +426,16 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
|
|||
next_sge:
|
||||
ctxt = svc_rdma_get_context(xprt);
|
||||
ctxt->direction = DMA_FROM_DEVICE;
|
||||
ctxt->frmr = hdr_ctxt->frmr;
|
||||
ctxt->read_hdr = NULL;
|
||||
clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
|
||||
clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
|
||||
|
||||
/* Prepare READ WR */
|
||||
memset(&read_wr, 0, sizeof read_wr);
|
||||
ctxt->wr_op = IB_WR_RDMA_READ;
|
||||
read_wr.wr_id = (unsigned long)ctxt;
|
||||
read_wr.opcode = IB_WR_RDMA_READ;
|
||||
ctxt->wr_op = read_wr.opcode;
|
||||
read_wr.send_flags = IB_SEND_SIGNALED;
|
||||
read_wr.wr.rdma.rkey = ch->rc_target.rs_handle;
|
||||
read_wr.wr.rdma.remote_addr =
|
||||
|
@ -327,10 +444,15 @@ next_sge:
|
|||
read_wr.sg_list = ctxt->sge;
|
||||
read_wr.num_sge =
|
||||
rdma_read_max_sge(xprt, chl_map->ch[ch_no].count);
|
||||
rdma_set_ctxt_sge(xprt, ctxt,
|
||||
&rpl_map->sge[chl_map->ch[ch_no].start],
|
||||
&sgl_offset,
|
||||
read_wr.num_sge);
|
||||
err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr,
|
||||
&rpl_map->sge[chl_map->ch[ch_no].start],
|
||||
&sgl_offset,
|
||||
read_wr.num_sge);
|
||||
if (err) {
|
||||
svc_rdma_unmap_dma(ctxt);
|
||||
svc_rdma_put_context(ctxt, 0);
|
||||
goto out;
|
||||
}
|
||||
if (((ch+1)->rc_discrim == 0) &&
|
||||
(read_wr.num_sge == chl_map->ch[ch_no].count)) {
|
||||
/*
|
||||
|
@ -339,6 +461,29 @@ next_sge:
|
|||
* the client and the RPC needs to be enqueued.
|
||||
*/
|
||||
set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
|
||||
if (hdr_ctxt->frmr) {
|
||||
set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
|
||||
/*
|
||||
* Invalidate the local MR used to map the data
|
||||
* sink.
|
||||
*/
|
||||
if (xprt->sc_dev_caps &
|
||||
SVCRDMA_DEVCAP_READ_W_INV) {
|
||||
read_wr.opcode =
|
||||
IB_WR_RDMA_READ_WITH_INV;
|
||||
ctxt->wr_op = read_wr.opcode;
|
||||
read_wr.ex.invalidate_rkey =
|
||||
ctxt->frmr->mr->lkey;
|
||||
} else {
|
||||
/* Prepare INVALIDATE WR */
|
||||
memset(&inv_wr, 0, sizeof inv_wr);
|
||||
inv_wr.opcode = IB_WR_LOCAL_INV;
|
||||
inv_wr.send_flags = IB_SEND_SIGNALED;
|
||||
inv_wr.ex.invalidate_rkey =
|
||||
hdr_ctxt->frmr->mr->lkey;
|
||||
read_wr.next = &inv_wr;
|
||||
}
|
||||
}
|
||||
ctxt->read_hdr = hdr_ctxt;
|
||||
}
|
||||
/* Post the read */
|
||||
|
|
|
@ -69,9 +69,127 @@
|
|||
* array is only concerned with the reply we are assured that we have
|
||||
* on extra page for the RPCRMDA header.
|
||||
*/
|
||||
static void xdr_to_sge(struct svcxprt_rdma *xprt,
|
||||
struct xdr_buf *xdr,
|
||||
struct svc_rdma_req_map *vec)
|
||||
int fast_reg_xdr(struct svcxprt_rdma *xprt,
|
||||
struct xdr_buf *xdr,
|
||||
struct svc_rdma_req_map *vec)
|
||||
{
|
||||
int sge_no;
|
||||
u32 sge_bytes;
|
||||
u32 page_bytes;
|
||||
u32 page_off;
|
||||
int page_no = 0;
|
||||
u8 *frva;
|
||||
struct svc_rdma_fastreg_mr *frmr;
|
||||
|
||||
frmr = svc_rdma_get_frmr(xprt);
|
||||
if (IS_ERR(frmr))
|
||||
return -ENOMEM;
|
||||
vec->frmr = frmr;
|
||||
|
||||
/* Skip the RPCRDMA header */
|
||||
sge_no = 1;
|
||||
|
||||
/* Map the head. */
|
||||
frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK);
|
||||
vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
|
||||
vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
|
||||
vec->count = 2;
|
||||
sge_no++;
|
||||
|
||||
/* Build the FRMR */
|
||||
frmr->kva = frva;
|
||||
frmr->direction = DMA_TO_DEVICE;
|
||||
frmr->access_flags = 0;
|
||||
frmr->map_len = PAGE_SIZE;
|
||||
frmr->page_list_len = 1;
|
||||
frmr->page_list->page_list[page_no] =
|
||||
ib_dma_map_single(xprt->sc_cm_id->device,
|
||||
(void *)xdr->head[0].iov_base,
|
||||
PAGE_SIZE, DMA_TO_DEVICE);
|
||||
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
|
||||
frmr->page_list->page_list[page_no]))
|
||||
goto fatal_err;
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
|
||||
page_off = xdr->page_base;
|
||||
page_bytes = xdr->page_len + page_off;
|
||||
if (!page_bytes)
|
||||
goto encode_tail;
|
||||
|
||||
/* Map the pages */
|
||||
vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
|
||||
vec->sge[sge_no].iov_len = page_bytes;
|
||||
sge_no++;
|
||||
while (page_bytes) {
|
||||
struct page *page;
|
||||
|
||||
page = xdr->pages[page_no++];
|
||||
sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
|
||||
page_bytes -= sge_bytes;
|
||||
|
||||
frmr->page_list->page_list[page_no] =
|
||||
ib_dma_map_page(xprt->sc_cm_id->device, page, 0,
|
||||
PAGE_SIZE, DMA_TO_DEVICE);
|
||||
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
|
||||
frmr->page_list->page_list[page_no]))
|
||||
goto fatal_err;
|
||||
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
page_off = 0; /* reset for next time through loop */
|
||||
frmr->map_len += PAGE_SIZE;
|
||||
frmr->page_list_len++;
|
||||
}
|
||||
vec->count++;
|
||||
|
||||
encode_tail:
|
||||
/* Map tail */
|
||||
if (0 == xdr->tail[0].iov_len)
|
||||
goto done;
|
||||
|
||||
vec->count++;
|
||||
vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
|
||||
|
||||
if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) ==
|
||||
((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) {
|
||||
/*
|
||||
* If head and tail use the same page, we don't need
|
||||
* to map it again.
|
||||
*/
|
||||
vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
|
||||
} else {
|
||||
void *va;
|
||||
|
||||
/* Map another page for the tail */
|
||||
page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
|
||||
va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK);
|
||||
vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
|
||||
|
||||
frmr->page_list->page_list[page_no] =
|
||||
ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE,
|
||||
DMA_TO_DEVICE);
|
||||
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
|
||||
frmr->page_list->page_list[page_no]))
|
||||
goto fatal_err;
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
frmr->map_len += PAGE_SIZE;
|
||||
frmr->page_list_len++;
|
||||
}
|
||||
|
||||
done:
|
||||
if (svc_rdma_fastreg(xprt, frmr))
|
||||
goto fatal_err;
|
||||
|
||||
return 0;
|
||||
|
||||
fatal_err:
|
||||
printk("svcrdma: Error fast registering memory for xprt %p\n", xprt);
|
||||
svc_rdma_put_frmr(xprt, frmr);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static int map_xdr(struct svcxprt_rdma *xprt,
|
||||
struct xdr_buf *xdr,
|
||||
struct svc_rdma_req_map *vec)
|
||||
{
|
||||
int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
|
||||
int sge_no;
|
||||
|
@ -83,6 +201,9 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt,
|
|||
BUG_ON(xdr->len !=
|
||||
(xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
|
||||
|
||||
if (xprt->sc_frmr_pg_list_len)
|
||||
return fast_reg_xdr(xprt, xdr, vec);
|
||||
|
||||
/* Skip the first sge, this is for the RPCRDMA header */
|
||||
sge_no = 1;
|
||||
|
||||
|
@ -116,9 +237,12 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt,
|
|||
|
||||
BUG_ON(sge_no > sge_max);
|
||||
vec->count = sge_no;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Assumptions:
|
||||
* - We are using FRMR
|
||||
* - or -
|
||||
* - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
|
||||
*/
|
||||
static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
|
||||
|
@ -158,30 +282,35 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
|
|||
sge_no = 0;
|
||||
|
||||
/* Copy the remaining SGE */
|
||||
while (bc != 0 && xdr_sge_no < vec->count) {
|
||||
sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
|
||||
sge_bytes = min((size_t)bc,
|
||||
(size_t)(vec->sge[xdr_sge_no].iov_len-sge_off));
|
||||
while (bc != 0) {
|
||||
sge_bytes = min_t(size_t,
|
||||
bc, vec->sge[xdr_sge_no].iov_len-sge_off);
|
||||
sge[sge_no].length = sge_bytes;
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
sge[sge_no].addr =
|
||||
ib_dma_map_single(xprt->sc_cm_id->device,
|
||||
(void *)
|
||||
vec->sge[xdr_sge_no].iov_base + sge_off,
|
||||
sge_bytes, DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(xprt->sc_cm_id->device->dma_device,
|
||||
sge[sge_no].addr))
|
||||
goto err;
|
||||
if (!vec->frmr) {
|
||||
sge[sge_no].addr =
|
||||
ib_dma_map_single(xprt->sc_cm_id->device,
|
||||
(void *)
|
||||
vec->sge[xdr_sge_no].iov_base + sge_off,
|
||||
sge_bytes, DMA_TO_DEVICE);
|
||||
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
|
||||
sge[sge_no].addr))
|
||||
goto err;
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
sge[sge_no].lkey = xprt->sc_dma_lkey;
|
||||
} else {
|
||||
sge[sge_no].addr = (unsigned long)
|
||||
vec->sge[xdr_sge_no].iov_base + sge_off;
|
||||
sge[sge_no].lkey = vec->frmr->mr->lkey;
|
||||
}
|
||||
ctxt->count++;
|
||||
ctxt->frmr = vec->frmr;
|
||||
sge_off = 0;
|
||||
sge_no++;
|
||||
ctxt->count++;
|
||||
xdr_sge_no++;
|
||||
BUG_ON(xdr_sge_no > vec->count);
|
||||
bc -= sge_bytes;
|
||||
}
|
||||
|
||||
BUG_ON(bc != 0);
|
||||
BUG_ON(xdr_sge_no > vec->count);
|
||||
|
||||
/* Prepare WRITE WR */
|
||||
memset(&write_wr, 0, sizeof write_wr);
|
||||
ctxt->wr_op = IB_WR_RDMA_WRITE;
|
||||
|
@ -226,7 +355,10 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
|
|||
res_ary = (struct rpcrdma_write_array *)
|
||||
&rdma_resp->rm_body.rm_chunks[1];
|
||||
|
||||
max_write = xprt->sc_max_sge * PAGE_SIZE;
|
||||
if (vec->frmr)
|
||||
max_write = vec->frmr->map_len;
|
||||
else
|
||||
max_write = xprt->sc_max_sge * PAGE_SIZE;
|
||||
|
||||
/* Write chunks start at the pagelist */
|
||||
for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
|
||||
|
@ -297,7 +429,10 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
|
|||
res_ary = (struct rpcrdma_write_array *)
|
||||
&rdma_resp->rm_body.rm_chunks[2];
|
||||
|
||||
max_write = xprt->sc_max_sge * PAGE_SIZE;
|
||||
if (vec->frmr)
|
||||
max_write = vec->frmr->map_len;
|
||||
else
|
||||
max_write = xprt->sc_max_sge * PAGE_SIZE;
|
||||
|
||||
/* xdr offset starts at RPC message */
|
||||
for (xdr_off = 0, chunk_no = 0;
|
||||
|
@ -307,7 +442,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
|
|||
ch = &arg_ary->wc_array[chunk_no].wc_target;
|
||||
write_len = min(xfer_len, ch->rs_length);
|
||||
|
||||
|
||||
/* Prepare the reply chunk given the length actually
|
||||
* written */
|
||||
rs_offset = get_unaligned(&(ch->rs_offset));
|
||||
|
@ -366,6 +500,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
|
|||
int byte_count)
|
||||
{
|
||||
struct ib_send_wr send_wr;
|
||||
struct ib_send_wr inv_wr;
|
||||
int sge_no;
|
||||
int sge_bytes;
|
||||
int page_no;
|
||||
|
@ -385,27 +520,45 @@ static int send_reply(struct svcxprt_rdma *rdma,
|
|||
/* Prepare the context */
|
||||
ctxt->pages[0] = page;
|
||||
ctxt->count = 1;
|
||||
ctxt->frmr = vec->frmr;
|
||||
if (vec->frmr)
|
||||
set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
|
||||
else
|
||||
clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
|
||||
|
||||
/* Prepare the SGE for the RPCRDMA Header */
|
||||
atomic_inc(&rdma->sc_dma_used);
|
||||
ctxt->sge[0].addr =
|
||||
ib_dma_map_page(rdma->sc_cm_id->device,
|
||||
page, 0, PAGE_SIZE, DMA_TO_DEVICE);
|
||||
if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
|
||||
goto err;
|
||||
atomic_inc(&rdma->sc_dma_used);
|
||||
|
||||
ctxt->direction = DMA_TO_DEVICE;
|
||||
|
||||
ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
|
||||
ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey;
|
||||
ctxt->sge[0].lkey = rdma->sc_dma_lkey;
|
||||
|
||||
/* Determine how many of our SGE are to be transmitted */
|
||||
for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
|
||||
sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
|
||||
byte_count -= sge_bytes;
|
||||
atomic_inc(&rdma->sc_dma_used);
|
||||
ctxt->sge[sge_no].addr =
|
||||
ib_dma_map_single(rdma->sc_cm_id->device,
|
||||
vec->sge[sge_no].iov_base,
|
||||
sge_bytes, DMA_TO_DEVICE);
|
||||
if (!vec->frmr) {
|
||||
ctxt->sge[sge_no].addr =
|
||||
ib_dma_map_single(rdma->sc_cm_id->device,
|
||||
vec->sge[sge_no].iov_base,
|
||||
sge_bytes, DMA_TO_DEVICE);
|
||||
if (ib_dma_mapping_error(rdma->sc_cm_id->device,
|
||||
ctxt->sge[sge_no].addr))
|
||||
goto err;
|
||||
atomic_inc(&rdma->sc_dma_used);
|
||||
ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
|
||||
} else {
|
||||
ctxt->sge[sge_no].addr = (unsigned long)
|
||||
vec->sge[sge_no].iov_base;
|
||||
ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey;
|
||||
}
|
||||
ctxt->sge[sge_no].length = sge_bytes;
|
||||
ctxt->sge[sge_no].lkey = rdma->sc_phys_mr->lkey;
|
||||
}
|
||||
BUG_ON(byte_count != 0);
|
||||
|
||||
|
@ -417,11 +570,16 @@ static int send_reply(struct svcxprt_rdma *rdma,
|
|||
ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
|
||||
ctxt->count++;
|
||||
rqstp->rq_respages[page_no] = NULL;
|
||||
/* If there are more pages than SGE, terminate SGE list */
|
||||
/*
|
||||
* If there are more pages than SGE, terminate SGE
|
||||
* list so that svc_rdma_unmap_dma doesn't attempt to
|
||||
* unmap garbage.
|
||||
*/
|
||||
if (page_no+1 >= sge_no)
|
||||
ctxt->sge[page_no+1].length = 0;
|
||||
}
|
||||
BUG_ON(sge_no > rdma->sc_max_sge);
|
||||
BUG_ON(sge_no > ctxt->count);
|
||||
memset(&send_wr, 0, sizeof send_wr);
|
||||
ctxt->wr_op = IB_WR_SEND;
|
||||
send_wr.wr_id = (unsigned long)ctxt;
|
||||
|
@ -429,12 +587,26 @@ static int send_reply(struct svcxprt_rdma *rdma,
|
|||
send_wr.num_sge = sge_no;
|
||||
send_wr.opcode = IB_WR_SEND;
|
||||
send_wr.send_flags = IB_SEND_SIGNALED;
|
||||
if (vec->frmr) {
|
||||
/* Prepare INVALIDATE WR */
|
||||
memset(&inv_wr, 0, sizeof inv_wr);
|
||||
inv_wr.opcode = IB_WR_LOCAL_INV;
|
||||
inv_wr.send_flags = IB_SEND_SIGNALED;
|
||||
inv_wr.ex.invalidate_rkey =
|
||||
vec->frmr->mr->lkey;
|
||||
send_wr.next = &inv_wr;
|
||||
}
|
||||
|
||||
ret = svc_rdma_send(rdma, &send_wr);
|
||||
if (ret)
|
||||
svc_rdma_put_context(ctxt, 1);
|
||||
goto err;
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
|
||||
err:
|
||||
svc_rdma_put_frmr(rdma, vec->frmr);
|
||||
svc_rdma_put_context(ctxt, 1);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
|
||||
|
@ -477,8 +649,9 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
|
|||
ctxt = svc_rdma_get_context(rdma);
|
||||
ctxt->direction = DMA_TO_DEVICE;
|
||||
vec = svc_rdma_get_req_map();
|
||||
xdr_to_sge(rdma, &rqstp->rq_res, vec);
|
||||
|
||||
ret = map_xdr(rdma, &rqstp->rq_res, vec);
|
||||
if (ret)
|
||||
goto err0;
|
||||
inline_bytes = rqstp->rq_res.len;
|
||||
|
||||
/* Create the RDMA response header */
|
||||
|
@ -498,7 +671,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
|
|||
if (ret < 0) {
|
||||
printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n",
|
||||
ret);
|
||||
goto error;
|
||||
goto err1;
|
||||
}
|
||||
inline_bytes -= ret;
|
||||
|
||||
|
@ -508,7 +681,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
|
|||
if (ret < 0) {
|
||||
printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n",
|
||||
ret);
|
||||
goto error;
|
||||
goto err1;
|
||||
}
|
||||
inline_bytes -= ret;
|
||||
|
||||
|
@ -517,9 +690,11 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
|
|||
svc_rdma_put_req_map(vec);
|
||||
dprintk("svcrdma: send_reply returns %d\n", ret);
|
||||
return ret;
|
||||
error:
|
||||
|
||||
err1:
|
||||
put_page(res_page);
|
||||
err0:
|
||||
svc_rdma_put_req_map(vec);
|
||||
svc_rdma_put_context(ctxt, 0);
|
||||
put_page(res_page);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -100,20 +100,29 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
|
|||
ctxt->xprt = xprt;
|
||||
INIT_LIST_HEAD(&ctxt->dto_q);
|
||||
ctxt->count = 0;
|
||||
ctxt->frmr = NULL;
|
||||
atomic_inc(&xprt->sc_ctxt_used);
|
||||
return ctxt;
|
||||
}
|
||||
|
||||
static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
|
||||
void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
|
||||
{
|
||||
struct svcxprt_rdma *xprt = ctxt->xprt;
|
||||
int i;
|
||||
for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
|
||||
atomic_dec(&xprt->sc_dma_used);
|
||||
ib_dma_unmap_single(xprt->sc_cm_id->device,
|
||||
ctxt->sge[i].addr,
|
||||
ctxt->sge[i].length,
|
||||
ctxt->direction);
|
||||
/*
|
||||
* Unmap the DMA addr in the SGE if the lkey matches
|
||||
* the sc_dma_lkey, otherwise, ignore it since it is
|
||||
* an FRMR lkey and will be unmapped later when the
|
||||
* last WR that uses it completes.
|
||||
*/
|
||||
if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
|
||||
atomic_dec(&xprt->sc_dma_used);
|
||||
ib_dma_unmap_single(xprt->sc_cm_id->device,
|
||||
ctxt->sge[i].addr,
|
||||
ctxt->sge[i].length,
|
||||
ctxt->direction);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -150,6 +159,7 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void)
|
|||
schedule_timeout_uninterruptible(msecs_to_jiffies(500));
|
||||
}
|
||||
map->count = 0;
|
||||
map->frmr = NULL;
|
||||
return map;
|
||||
}
|
||||
|
||||
|
@ -315,6 +325,50 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
|
|||
svc_xprt_enqueue(&xprt->sc_xprt);
|
||||
}
|
||||
|
||||
/*
|
||||
* Processs a completion context
|
||||
*/
|
||||
static void process_context(struct svcxprt_rdma *xprt,
|
||||
struct svc_rdma_op_ctxt *ctxt)
|
||||
{
|
||||
svc_rdma_unmap_dma(ctxt);
|
||||
|
||||
switch (ctxt->wr_op) {
|
||||
case IB_WR_SEND:
|
||||
if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
|
||||
svc_rdma_put_frmr(xprt, ctxt->frmr);
|
||||
svc_rdma_put_context(ctxt, 1);
|
||||
break;
|
||||
|
||||
case IB_WR_RDMA_WRITE:
|
||||
svc_rdma_put_context(ctxt, 0);
|
||||
break;
|
||||
|
||||
case IB_WR_RDMA_READ:
|
||||
case IB_WR_RDMA_READ_WITH_INV:
|
||||
if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
|
||||
struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
|
||||
BUG_ON(!read_hdr);
|
||||
if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
|
||||
svc_rdma_put_frmr(xprt, ctxt->frmr);
|
||||
spin_lock_bh(&xprt->sc_rq_dto_lock);
|
||||
set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
|
||||
list_add_tail(&read_hdr->dto_q,
|
||||
&xprt->sc_read_complete_q);
|
||||
spin_unlock_bh(&xprt->sc_rq_dto_lock);
|
||||
svc_xprt_enqueue(&xprt->sc_xprt);
|
||||
}
|
||||
svc_rdma_put_context(ctxt, 0);
|
||||
break;
|
||||
|
||||
default:
|
||||
printk(KERN_ERR "svcrdma: unexpected completion type, "
|
||||
"opcode=%d\n",
|
||||
ctxt->wr_op);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Send Queue Completion Handler - potentially called on interrupt context.
|
||||
*
|
||||
|
@ -327,17 +381,12 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
|
|||
struct ib_cq *cq = xprt->sc_sq_cq;
|
||||
int ret;
|
||||
|
||||
|
||||
if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
|
||||
return;
|
||||
|
||||
ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
|
||||
atomic_inc(&rdma_stat_sq_poll);
|
||||
while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
|
||||
ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
|
||||
xprt = ctxt->xprt;
|
||||
|
||||
svc_rdma_unmap_dma(ctxt);
|
||||
if (wc.status != IB_WC_SUCCESS)
|
||||
/* Close the transport */
|
||||
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
|
||||
|
@ -346,35 +395,10 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
|
|||
atomic_dec(&xprt->sc_sq_count);
|
||||
wake_up(&xprt->sc_send_wait);
|
||||
|
||||
switch (ctxt->wr_op) {
|
||||
case IB_WR_SEND:
|
||||
svc_rdma_put_context(ctxt, 1);
|
||||
break;
|
||||
ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
|
||||
if (ctxt)
|
||||
process_context(xprt, ctxt);
|
||||
|
||||
case IB_WR_RDMA_WRITE:
|
||||
svc_rdma_put_context(ctxt, 0);
|
||||
break;
|
||||
|
||||
case IB_WR_RDMA_READ:
|
||||
if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
|
||||
struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
|
||||
BUG_ON(!read_hdr);
|
||||
spin_lock_bh(&xprt->sc_rq_dto_lock);
|
||||
set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
|
||||
list_add_tail(&read_hdr->dto_q,
|
||||
&xprt->sc_read_complete_q);
|
||||
spin_unlock_bh(&xprt->sc_rq_dto_lock);
|
||||
svc_xprt_enqueue(&xprt->sc_xprt);
|
||||
}
|
||||
svc_rdma_put_context(ctxt, 0);
|
||||
break;
|
||||
|
||||
default:
|
||||
printk(KERN_ERR "svcrdma: unexpected completion type, "
|
||||
"opcode=%d, status=%d\n",
|
||||
wc.opcode, wc.status);
|
||||
break;
|
||||
}
|
||||
svc_xprt_put(&xprt->sc_xprt);
|
||||
}
|
||||
|
||||
|
@ -425,10 +449,12 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
|
|||
INIT_LIST_HEAD(&cma_xprt->sc_dto_q);
|
||||
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
|
||||
INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
|
||||
INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
|
||||
init_waitqueue_head(&cma_xprt->sc_send_wait);
|
||||
|
||||
spin_lock_init(&cma_xprt->sc_lock);
|
||||
spin_lock_init(&cma_xprt->sc_rq_dto_lock);
|
||||
spin_lock_init(&cma_xprt->sc_frmr_q_lock);
|
||||
|
||||
cma_xprt->sc_ord = svcrdma_ord;
|
||||
|
||||
|
@ -462,7 +488,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
|
|||
struct ib_recv_wr recv_wr, *bad_recv_wr;
|
||||
struct svc_rdma_op_ctxt *ctxt;
|
||||
struct page *page;
|
||||
unsigned long pa;
|
||||
dma_addr_t pa;
|
||||
int sge_no;
|
||||
int buflen;
|
||||
int ret;
|
||||
|
@ -474,13 +500,15 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
|
|||
BUG_ON(sge_no >= xprt->sc_max_sge);
|
||||
page = svc_rdma_get_page();
|
||||
ctxt->pages[sge_no] = page;
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
pa = ib_dma_map_page(xprt->sc_cm_id->device,
|
||||
page, 0, PAGE_SIZE,
|
||||
DMA_FROM_DEVICE);
|
||||
if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
|
||||
goto err_put_ctxt;
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
ctxt->sge[sge_no].addr = pa;
|
||||
ctxt->sge[sge_no].length = PAGE_SIZE;
|
||||
ctxt->sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
|
||||
ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
|
||||
buflen += PAGE_SIZE;
|
||||
}
|
||||
ctxt->count = sge_no;
|
||||
|
@ -496,6 +524,10 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
|
|||
svc_rdma_put_context(ctxt, 1);
|
||||
}
|
||||
return ret;
|
||||
|
||||
err_put_ctxt:
|
||||
svc_rdma_put_context(ctxt, 1);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -566,7 +598,7 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
|
|||
dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
|
||||
"event=%d\n", cma_id, cma_id->context, event->event);
|
||||
handle_connect_req(cma_id,
|
||||
event->param.conn.responder_resources);
|
||||
event->param.conn.initiator_depth);
|
||||
break;
|
||||
|
||||
case RDMA_CM_EVENT_ESTABLISHED:
|
||||
|
@ -686,6 +718,97 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
|
|||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
|
||||
{
|
||||
struct ib_mr *mr;
|
||||
struct ib_fast_reg_page_list *pl;
|
||||
struct svc_rdma_fastreg_mr *frmr;
|
||||
|
||||
frmr = kmalloc(sizeof(*frmr), GFP_KERNEL);
|
||||
if (!frmr)
|
||||
goto err;
|
||||
|
||||
mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES);
|
||||
if (!mr)
|
||||
goto err_free_frmr;
|
||||
|
||||
pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device,
|
||||
RPCSVC_MAXPAGES);
|
||||
if (!pl)
|
||||
goto err_free_mr;
|
||||
|
||||
frmr->mr = mr;
|
||||
frmr->page_list = pl;
|
||||
INIT_LIST_HEAD(&frmr->frmr_list);
|
||||
return frmr;
|
||||
|
||||
err_free_mr:
|
||||
ib_dereg_mr(mr);
|
||||
err_free_frmr:
|
||||
kfree(frmr);
|
||||
err:
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt)
|
||||
{
|
||||
struct svc_rdma_fastreg_mr *frmr;
|
||||
|
||||
while (!list_empty(&xprt->sc_frmr_q)) {
|
||||
frmr = list_entry(xprt->sc_frmr_q.next,
|
||||
struct svc_rdma_fastreg_mr, frmr_list);
|
||||
list_del_init(&frmr->frmr_list);
|
||||
ib_dereg_mr(frmr->mr);
|
||||
ib_free_fast_reg_page_list(frmr->page_list);
|
||||
kfree(frmr);
|
||||
}
|
||||
}
|
||||
|
||||
struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
|
||||
{
|
||||
struct svc_rdma_fastreg_mr *frmr = NULL;
|
||||
|
||||
spin_lock_bh(&rdma->sc_frmr_q_lock);
|
||||
if (!list_empty(&rdma->sc_frmr_q)) {
|
||||
frmr = list_entry(rdma->sc_frmr_q.next,
|
||||
struct svc_rdma_fastreg_mr, frmr_list);
|
||||
list_del_init(&frmr->frmr_list);
|
||||
frmr->map_len = 0;
|
||||
frmr->page_list_len = 0;
|
||||
}
|
||||
spin_unlock_bh(&rdma->sc_frmr_q_lock);
|
||||
if (frmr)
|
||||
return frmr;
|
||||
|
||||
return rdma_alloc_frmr(rdma);
|
||||
}
|
||||
|
||||
static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
|
||||
struct svc_rdma_fastreg_mr *frmr)
|
||||
{
|
||||
int page_no;
|
||||
for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
|
||||
dma_addr_t addr = frmr->page_list->page_list[page_no];
|
||||
if (ib_dma_mapping_error(frmr->mr->device, addr))
|
||||
continue;
|
||||
atomic_dec(&xprt->sc_dma_used);
|
||||
ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE,
|
||||
frmr->direction);
|
||||
}
|
||||
}
|
||||
|
||||
void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
|
||||
struct svc_rdma_fastreg_mr *frmr)
|
||||
{
|
||||
if (frmr) {
|
||||
frmr_unmap_dma(rdma, frmr);
|
||||
spin_lock_bh(&rdma->sc_frmr_q_lock);
|
||||
BUG_ON(!list_empty(&frmr->frmr_list));
|
||||
list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
|
||||
spin_unlock_bh(&rdma->sc_frmr_q_lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This is the xpo_recvfrom function for listening endpoints. Its
|
||||
* purpose is to accept incoming connections. The CMA callback handler
|
||||
|
@ -704,6 +827,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
|
|||
struct rdma_conn_param conn_param;
|
||||
struct ib_qp_init_attr qp_attr;
|
||||
struct ib_device_attr devattr;
|
||||
int dma_mr_acc;
|
||||
int need_dma_mr;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
|
@ -819,15 +944,77 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
|
|||
}
|
||||
newxprt->sc_qp = newxprt->sc_cm_id->qp;
|
||||
|
||||
/* Register all of physical memory */
|
||||
newxprt->sc_phys_mr = ib_get_dma_mr(newxprt->sc_pd,
|
||||
IB_ACCESS_LOCAL_WRITE |
|
||||
IB_ACCESS_REMOTE_WRITE);
|
||||
if (IS_ERR(newxprt->sc_phys_mr)) {
|
||||
dprintk("svcrdma: Failed to create DMA MR ret=%d\n", ret);
|
||||
/*
|
||||
* Use the most secure set of MR resources based on the
|
||||
* transport type and available memory management features in
|
||||
* the device. Here's the table implemented below:
|
||||
*
|
||||
* Fast Global DMA Remote WR
|
||||
* Reg LKEY MR Access
|
||||
* Sup'd Sup'd Needed Needed
|
||||
*
|
||||
* IWARP N N Y Y
|
||||
* N Y Y Y
|
||||
* Y N Y N
|
||||
* Y Y N -
|
||||
*
|
||||
* IB N N Y N
|
||||
* N Y N -
|
||||
* Y N Y N
|
||||
* Y Y N -
|
||||
*
|
||||
* NB: iWARP requires remote write access for the data sink
|
||||
* of an RDMA_READ. IB does not.
|
||||
*/
|
||||
if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
|
||||
newxprt->sc_frmr_pg_list_len =
|
||||
devattr.max_fast_reg_page_list_len;
|
||||
newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine if a DMA MR is required and if so, what privs are required
|
||||
*/
|
||||
switch (rdma_node_get_transport(newxprt->sc_cm_id->device->node_type)) {
|
||||
case RDMA_TRANSPORT_IWARP:
|
||||
newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
|
||||
if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) {
|
||||
need_dma_mr = 1;
|
||||
dma_mr_acc =
|
||||
(IB_ACCESS_LOCAL_WRITE |
|
||||
IB_ACCESS_REMOTE_WRITE);
|
||||
} else if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
|
||||
need_dma_mr = 1;
|
||||
dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
|
||||
} else
|
||||
need_dma_mr = 0;
|
||||
break;
|
||||
case RDMA_TRANSPORT_IB:
|
||||
if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
|
||||
need_dma_mr = 1;
|
||||
dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
|
||||
} else
|
||||
need_dma_mr = 0;
|
||||
break;
|
||||
default:
|
||||
goto errout;
|
||||
}
|
||||
|
||||
/* Create the DMA MR if needed, otherwise, use the DMA LKEY */
|
||||
if (need_dma_mr) {
|
||||
/* Register all of physical memory */
|
||||
newxprt->sc_phys_mr =
|
||||
ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc);
|
||||
if (IS_ERR(newxprt->sc_phys_mr)) {
|
||||
dprintk("svcrdma: Failed to create DMA MR ret=%d\n",
|
||||
ret);
|
||||
goto errout;
|
||||
}
|
||||
newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey;
|
||||
} else
|
||||
newxprt->sc_dma_lkey =
|
||||
newxprt->sc_cm_id->device->local_dma_lkey;
|
||||
|
||||
/* Post receive buffers */
|
||||
for (i = 0; i < newxprt->sc_max_requests; i++) {
|
||||
ret = svc_rdma_post_recv(newxprt);
|
||||
|
@ -961,6 +1148,9 @@ static void __svc_rdma_free(struct work_struct *work)
|
|||
WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0);
|
||||
WARN_ON(atomic_read(&rdma->sc_dma_used) != 0);
|
||||
|
||||
/* De-allocate fastreg mr */
|
||||
rdma_dealloc_frmr_q(rdma);
|
||||
|
||||
/* Destroy the QP if present (not a listener) */
|
||||
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
|
||||
ib_destroy_qp(rdma->sc_qp);
|
||||
|
@ -1014,21 +1204,59 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
|
|||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Attempt to register the kvec representing the RPC memory with the
|
||||
* device.
|
||||
*
|
||||
* Returns:
|
||||
* NULL : The device does not support fastreg or there were no more
|
||||
* fastreg mr.
|
||||
* frmr : The kvec register request was successfully posted.
|
||||
* <0 : An error was encountered attempting to register the kvec.
|
||||
*/
|
||||
int svc_rdma_fastreg(struct svcxprt_rdma *xprt,
|
||||
struct svc_rdma_fastreg_mr *frmr)
|
||||
{
|
||||
struct ib_send_wr fastreg_wr;
|
||||
u8 key;
|
||||
|
||||
/* Bump the key */
|
||||
key = (u8)(frmr->mr->lkey & 0x000000FF);
|
||||
ib_update_fast_reg_key(frmr->mr, ++key);
|
||||
|
||||
/* Prepare FASTREG WR */
|
||||
memset(&fastreg_wr, 0, sizeof fastreg_wr);
|
||||
fastreg_wr.opcode = IB_WR_FAST_REG_MR;
|
||||
fastreg_wr.send_flags = IB_SEND_SIGNALED;
|
||||
fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
|
||||
fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
|
||||
fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
|
||||
fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
|
||||
fastreg_wr.wr.fast_reg.length = frmr->map_len;
|
||||
fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
|
||||
fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
|
||||
return svc_rdma_send(xprt, &fastreg_wr);
|
||||
}
|
||||
|
||||
int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
|
||||
{
|
||||
struct ib_send_wr *bad_wr;
|
||||
struct ib_send_wr *bad_wr, *n_wr;
|
||||
int wr_count;
|
||||
int i;
|
||||
int ret;
|
||||
|
||||
if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
|
||||
return -ENOTCONN;
|
||||
|
||||
BUG_ON(wr->send_flags != IB_SEND_SIGNALED);
|
||||
BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op !=
|
||||
wr->opcode);
|
||||
wr_count = 1;
|
||||
for (n_wr = wr->next; n_wr; n_wr = n_wr->next)
|
||||
wr_count++;
|
||||
|
||||
/* If the SQ is full, wait until an SQ entry is available */
|
||||
while (1) {
|
||||
spin_lock_bh(&xprt->sc_lock);
|
||||
if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) {
|
||||
if (xprt->sc_sq_depth < atomic_read(&xprt->sc_sq_count) + wr_count) {
|
||||
spin_unlock_bh(&xprt->sc_lock);
|
||||
atomic_inc(&rdma_stat_sq_starve);
|
||||
|
||||
|
@ -1043,19 +1271,26 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
|
|||
return 0;
|
||||
continue;
|
||||
}
|
||||
/* Bumped used SQ WR count and post */
|
||||
svc_xprt_get(&xprt->sc_xprt);
|
||||
/* Take a transport ref for each WR posted */
|
||||
for (i = 0; i < wr_count; i++)
|
||||
svc_xprt_get(&xprt->sc_xprt);
|
||||
|
||||
/* Bump used SQ WR count and post */
|
||||
atomic_add(wr_count, &xprt->sc_sq_count);
|
||||
ret = ib_post_send(xprt->sc_qp, wr, &bad_wr);
|
||||
if (!ret)
|
||||
atomic_inc(&xprt->sc_sq_count);
|
||||
else {
|
||||
svc_xprt_put(&xprt->sc_xprt);
|
||||
if (ret) {
|
||||
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
|
||||
atomic_sub(wr_count, &xprt->sc_sq_count);
|
||||
for (i = 0; i < wr_count; i ++)
|
||||
svc_xprt_put(&xprt->sc_xprt);
|
||||
dprintk("svcrdma: failed to post SQ WR rc=%d, "
|
||||
"sc_sq_count=%d, sc_sq_depth=%d\n",
|
||||
ret, atomic_read(&xprt->sc_sq_count),
|
||||
xprt->sc_sq_depth);
|
||||
}
|
||||
spin_unlock_bh(&xprt->sc_lock);
|
||||
if (ret)
|
||||
wake_up(&xprt->sc_send_wait);
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
|
@ -1079,10 +1314,14 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
|
|||
length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
|
||||
|
||||
/* Prepare SGE for local address */
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
sge.addr = ib_dma_map_page(xprt->sc_cm_id->device,
|
||||
p, 0, PAGE_SIZE, DMA_FROM_DEVICE);
|
||||
sge.lkey = xprt->sc_phys_mr->lkey;
|
||||
if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) {
|
||||
put_page(p);
|
||||
return;
|
||||
}
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
sge.lkey = xprt->sc_dma_lkey;
|
||||
sge.length = length;
|
||||
|
||||
ctxt = svc_rdma_get_context(xprt);
|
||||
|
@ -1103,6 +1342,9 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
|
|||
if (ret) {
|
||||
dprintk("svcrdma: Error %d posting send for protocol error\n",
|
||||
ret);
|
||||
ib_dma_unmap_page(xprt->sc_cm_id->device,
|
||||
sge.addr, PAGE_SIZE,
|
||||
DMA_FROM_DEVICE);
|
||||
svc_rdma_put_context(ctxt, 1);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue