]> rtime.felk.cvut.cz Git - linux-imx.git/blob - drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
RDMA/ocrdma: Change macros to inline funtions
[linux-imx.git] / drivers / infiniband / hw / ocrdma / ocrdma_verbs.c
1 /*******************************************************************
2  * This file is part of the Emulex RoCE Device Driver for          *
3  * RoCE (RDMA over Converged Ethernet) adapters.                   *
4  * Copyright (C) 2008-2012 Emulex. All rights reserved.            *
5  * EMULEX and SLI are trademarks of Emulex.                        *
6  * www.emulex.com                                                  *
7  *                                                                 *
8  * This program is free software; you can redistribute it and/or   *
9  * modify it under the terms of version 2 of the GNU General       *
10  * Public License as published by the Free Software Foundation.    *
11  * This program is distributed in the hope that it will be useful. *
12  * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
13  * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
14  * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
15  * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
16  * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
17  * more details, a copy of which can be found in the file COPYING  *
18  * included with this package.                                     *
19  *
20  * Contact Information:
21  * linux-drivers@emulex.com
22  *
23  * Emulex
24  * 3333 Susan Street
25  * Costa Mesa, CA 92626
26  *******************************************************************/
27
28 #include <linux/dma-mapping.h>
29 #include <rdma/ib_verbs.h>
30 #include <rdma/ib_user_verbs.h>
31 #include <rdma/iw_cm.h>
32 #include <rdma/ib_umem.h>
33 #include <rdma/ib_addr.h>
34
35 #include "ocrdma.h"
36 #include "ocrdma_hw.h"
37 #include "ocrdma_verbs.h"
38 #include "ocrdma_abi.h"
39
40 int ocrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
41 {
42         if (index > 1)
43                 return -EINVAL;
44
45         *pkey = 0xffff;
46         return 0;
47 }
48
49 int ocrdma_query_gid(struct ib_device *ibdev, u8 port,
50                      int index, union ib_gid *sgid)
51 {
52         struct ocrdma_dev *dev;
53
54         dev = get_ocrdma_dev(ibdev);
55         memset(sgid, 0, sizeof(*sgid));
56         if (index >= OCRDMA_MAX_SGID)
57                 return -EINVAL;
58
59         memcpy(sgid, &dev->sgid_tbl[index], sizeof(*sgid));
60
61         return 0;
62 }
63
64 int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr)
65 {
66         struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
67
68         memset(attr, 0, sizeof *attr);
69         memcpy(&attr->fw_ver, &dev->attr.fw_ver[0],
70                min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver)));
71         ocrdma_get_guid(dev, (u8 *)&attr->sys_image_guid);
72         attr->max_mr_size = ~0ull;
73         attr->page_size_cap = 0xffff000;
74         attr->vendor_id = dev->nic_info.pdev->vendor;
75         attr->vendor_part_id = dev->nic_info.pdev->device;
76         attr->hw_ver = 0;
77         attr->max_qp = dev->attr.max_qp;
78         attr->max_ah = dev->attr.max_qp;
79         attr->max_qp_wr = dev->attr.max_wqe;
80
81         attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
82                                         IB_DEVICE_RC_RNR_NAK_GEN |
83                                         IB_DEVICE_SHUTDOWN_PORT |
84                                         IB_DEVICE_SYS_IMAGE_GUID |
85                                         IB_DEVICE_LOCAL_DMA_LKEY;
86         attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_srq_sge);
87         attr->max_sge_rd = 0;
88         attr->max_cq = dev->attr.max_cq;
89         attr->max_cqe = dev->attr.max_cqe;
90         attr->max_mr = dev->attr.max_mr;
91         attr->max_mw = 0;
92         attr->max_pd = dev->attr.max_pd;
93         attr->atomic_cap = 0;
94         attr->max_fmr = 0;
95         attr->max_map_per_fmr = 0;
96         attr->max_qp_rd_atom =
97             min(dev->attr.max_ord_per_qp, dev->attr.max_ird_per_qp);
98         attr->max_qp_init_rd_atom = dev->attr.max_ord_per_qp;
99         attr->max_srq = (dev->attr.max_qp - 1);
100         attr->max_srq_sge = dev->attr.max_srq_sge;
101         attr->max_srq_wr = dev->attr.max_rqe;
102         attr->local_ca_ack_delay = dev->attr.local_ca_ack_delay;
103         attr->max_fast_reg_page_list_len = 0;
104         attr->max_pkeys = 1;
105         return 0;
106 }
107
108 int ocrdma_query_port(struct ib_device *ibdev,
109                       u8 port, struct ib_port_attr *props)
110 {
111         enum ib_port_state port_state;
112         struct ocrdma_dev *dev;
113         struct net_device *netdev;
114
115         dev = get_ocrdma_dev(ibdev);
116         if (port > 1) {
117                 pr_err("%s(%d) invalid_port=0x%x\n", __func__,
118                        dev->id, port);
119                 return -EINVAL;
120         }
121         netdev = dev->nic_info.netdev;
122         if (netif_running(netdev) && netif_oper_up(netdev)) {
123                 port_state = IB_PORT_ACTIVE;
124                 props->phys_state = 5;
125         } else {
126                 port_state = IB_PORT_DOWN;
127                 props->phys_state = 3;
128         }
129         props->max_mtu = IB_MTU_4096;
130         props->active_mtu = iboe_get_mtu(netdev->mtu);
131         props->lid = 0;
132         props->lmc = 0;
133         props->sm_lid = 0;
134         props->sm_sl = 0;
135         props->state = port_state;
136         props->port_cap_flags =
137             IB_PORT_CM_SUP |
138             IB_PORT_REINIT_SUP |
139             IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP;
140         props->gid_tbl_len = OCRDMA_MAX_SGID;
141         props->pkey_tbl_len = 1;
142         props->bad_pkey_cntr = 0;
143         props->qkey_viol_cntr = 0;
144         props->active_width = IB_WIDTH_1X;
145         props->active_speed = 4;
146         props->max_msg_sz = 0x80000000;
147         props->max_vl_num = 4;
148         return 0;
149 }
150
151 int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
152                        struct ib_port_modify *props)
153 {
154         struct ocrdma_dev *dev;
155
156         dev = get_ocrdma_dev(ibdev);
157         if (port > 1) {
158                 pr_err("%s(%d) invalid_port=0x%x\n", __func__, dev->id, port);
159                 return -EINVAL;
160         }
161         return 0;
162 }
163
164 static int ocrdma_add_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
165                            unsigned long len)
166 {
167         struct ocrdma_mm *mm;
168
169         mm = kzalloc(sizeof(*mm), GFP_KERNEL);
170         if (mm == NULL)
171                 return -ENOMEM;
172         mm->key.phy_addr = phy_addr;
173         mm->key.len = len;
174         INIT_LIST_HEAD(&mm->entry);
175
176         mutex_lock(&uctx->mm_list_lock);
177         list_add_tail(&mm->entry, &uctx->mm_head);
178         mutex_unlock(&uctx->mm_list_lock);
179         return 0;
180 }
181
182 static void ocrdma_del_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
183                             unsigned long len)
184 {
185         struct ocrdma_mm *mm, *tmp;
186
187         mutex_lock(&uctx->mm_list_lock);
188         list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
189                 if (len != mm->key.len || phy_addr != mm->key.phy_addr)
190                         continue;
191
192                 list_del(&mm->entry);
193                 kfree(mm);
194                 break;
195         }
196         mutex_unlock(&uctx->mm_list_lock);
197 }
198
199 static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
200                               unsigned long len)
201 {
202         bool found = false;
203         struct ocrdma_mm *mm;
204
205         mutex_lock(&uctx->mm_list_lock);
206         list_for_each_entry(mm, &uctx->mm_head, entry) {
207                 if (len != mm->key.len || phy_addr != mm->key.phy_addr)
208                         continue;
209
210                 found = true;
211                 break;
212         }
213         mutex_unlock(&uctx->mm_list_lock);
214         return found;
215 }
216
217 struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
218                                           struct ib_udata *udata)
219 {
220         int status;
221         struct ocrdma_ucontext *ctx;
222         struct ocrdma_alloc_ucontext_resp resp;
223         struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
224         struct pci_dev *pdev = dev->nic_info.pdev;
225         u32 map_len = roundup(sizeof(u32) * 2048, PAGE_SIZE);
226
227         if (!udata)
228                 return ERR_PTR(-EFAULT);
229         ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
230         if (!ctx)
231                 return ERR_PTR(-ENOMEM);
232         ctx->dev = dev;
233         INIT_LIST_HEAD(&ctx->mm_head);
234         mutex_init(&ctx->mm_list_lock);
235
236         ctx->ah_tbl.va = dma_alloc_coherent(&pdev->dev, map_len,
237                                             &ctx->ah_tbl.pa, GFP_KERNEL);
238         if (!ctx->ah_tbl.va) {
239                 kfree(ctx);
240                 return ERR_PTR(-ENOMEM);
241         }
242         memset(ctx->ah_tbl.va, 0, map_len);
243         ctx->ah_tbl.len = map_len;
244
245         resp.ah_tbl_len = ctx->ah_tbl.len;
246         resp.ah_tbl_page = ctx->ah_tbl.pa;
247
248         status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len);
249         if (status)
250                 goto map_err;
251         resp.dev_id = dev->id;
252         resp.max_inline_data = dev->attr.max_inline_data;
253         resp.wqe_size = dev->attr.wqe_size;
254         resp.rqe_size = dev->attr.rqe_size;
255         resp.dpp_wqe_size = dev->attr.wqe_size;
256         resp.rsvd = 0;
257
258         memcpy(resp.fw_ver, dev->attr.fw_ver, sizeof(resp.fw_ver));
259         status = ib_copy_to_udata(udata, &resp, sizeof(resp));
260         if (status)
261                 goto cpy_err;
262         return &ctx->ibucontext;
263
264 cpy_err:
265         ocrdma_del_mmap(ctx, ctx->ah_tbl.pa, ctx->ah_tbl.len);
266 map_err:
267         dma_free_coherent(&pdev->dev, ctx->ah_tbl.len, ctx->ah_tbl.va,
268                           ctx->ah_tbl.pa);
269         kfree(ctx);
270         return ERR_PTR(status);
271 }
272
273 int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
274 {
275         struct ocrdma_mm *mm, *tmp;
276         struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx);
277         struct pci_dev *pdev = uctx->dev->nic_info.pdev;
278
279         ocrdma_del_mmap(uctx, uctx->ah_tbl.pa, uctx->ah_tbl.len);
280         dma_free_coherent(&pdev->dev, uctx->ah_tbl.len, uctx->ah_tbl.va,
281                           uctx->ah_tbl.pa);
282
283         list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
284                 list_del(&mm->entry);
285                 kfree(mm);
286         }
287         kfree(uctx);
288         return 0;
289 }
290
291 int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
292 {
293         struct ocrdma_ucontext *ucontext = get_ocrdma_ucontext(context);
294         struct ocrdma_dev *dev = ucontext->dev;
295         unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
296         u64 unmapped_db = (u64) dev->nic_info.unmapped_db;
297         unsigned long len = (vma->vm_end - vma->vm_start);
298         int status = 0;
299         bool found;
300
301         if (vma->vm_start & (PAGE_SIZE - 1))
302                 return -EINVAL;
303         found = ocrdma_search_mmap(ucontext, vma->vm_pgoff << PAGE_SHIFT, len);
304         if (!found)
305                 return -EINVAL;
306
307         if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
308                 dev->nic_info.db_total_size)) &&
309                 (len <= dev->nic_info.db_page_size)) {
310                 /* doorbell mapping */
311                 status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
312                                             len, vma->vm_page_prot);
313         } else if (dev->nic_info.dpp_unmapped_len &&
314                 (vm_page >= (u64) dev->nic_info.dpp_unmapped_addr) &&
315                 (vm_page <= (u64) (dev->nic_info.dpp_unmapped_addr +
316                         dev->nic_info.dpp_unmapped_len)) &&
317                 (len <= dev->nic_info.dpp_unmapped_len)) {
318                 /* dpp area mapping */
319                 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
320                 status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
321                                             len, vma->vm_page_prot);
322         } else {
323                 /* queue memory mapping */
324                 status = remap_pfn_range(vma, vma->vm_start,
325                                          vma->vm_pgoff, len, vma->vm_page_prot);
326         }
327         return status;
328 }
329
330 static int ocrdma_copy_pd_uresp(struct ocrdma_pd *pd,
331                                 struct ib_ucontext *ib_ctx,
332                                 struct ib_udata *udata)
333 {
334         int status;
335         u64 db_page_addr;
336         u64 dpp_page_addr = 0;
337         u32 db_page_size;
338         struct ocrdma_alloc_pd_uresp rsp;
339         struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx);
340
341         rsp.id = pd->id;
342         rsp.dpp_enabled = pd->dpp_enabled;
343         db_page_addr = pd->dev->nic_info.unmapped_db +
344                         (pd->id * pd->dev->nic_info.db_page_size);
345         db_page_size = pd->dev->nic_info.db_page_size;
346
347         status = ocrdma_add_mmap(uctx, db_page_addr, db_page_size);
348         if (status)
349                 return status;
350
351         if (pd->dpp_enabled) {
352                 dpp_page_addr = pd->dev->nic_info.dpp_unmapped_addr +
353                                 (pd->id * OCRDMA_DPP_PAGE_SIZE);
354                 status = ocrdma_add_mmap(uctx, dpp_page_addr,
355                                  OCRDMA_DPP_PAGE_SIZE);
356                 if (status)
357                         goto dpp_map_err;
358                 rsp.dpp_page_addr_hi = upper_32_bits(dpp_page_addr);
359                 rsp.dpp_page_addr_lo = dpp_page_addr;
360         }
361
362         status = ib_copy_to_udata(udata, &rsp, sizeof(rsp));
363         if (status)
364                 goto ucopy_err;
365
366         pd->uctx = uctx;
367         return 0;
368
369 ucopy_err:
370         if (pd->dpp_enabled)
371                 ocrdma_del_mmap(pd->uctx, dpp_page_addr, OCRDMA_DPP_PAGE_SIZE);
372 dpp_map_err:
373         ocrdma_del_mmap(pd->uctx, db_page_addr, db_page_size);
374         return status;
375 }
376
377 struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev,
378                               struct ib_ucontext *context,
379                               struct ib_udata *udata)
380 {
381         struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
382         struct ocrdma_pd *pd;
383         int status;
384
385         pd = kzalloc(sizeof(*pd), GFP_KERNEL);
386         if (!pd)
387                 return ERR_PTR(-ENOMEM);
388         pd->dev = dev;
389         if (udata && context) {
390                 pd->dpp_enabled = (dev->nic_info.dev_family ==
391                                         OCRDMA_GEN2_FAMILY) ? true : false;
392                 pd->num_dpp_qp =
393                         pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0;
394         }
395         status = ocrdma_mbx_alloc_pd(dev, pd);
396         if (status) {
397                 kfree(pd);
398                 return ERR_PTR(status);
399         }
400
401         if (udata && context) {
402                 status = ocrdma_copy_pd_uresp(pd, context, udata);
403                 if (status)
404                         goto err;
405         }
406         return &pd->ibpd;
407
408 err:
409         ocrdma_dealloc_pd(&pd->ibpd);
410         return ERR_PTR(status);
411 }
412
413 int ocrdma_dealloc_pd(struct ib_pd *ibpd)
414 {
415         struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
416         struct ocrdma_dev *dev = pd->dev;
417         int status;
418         u64 usr_db;
419
420         status = ocrdma_mbx_dealloc_pd(dev, pd);
421         if (pd->uctx) {
422                 u64 dpp_db = dev->nic_info.dpp_unmapped_addr +
423                     (pd->id * OCRDMA_DPP_PAGE_SIZE);
424                 if (pd->dpp_enabled)
425                         ocrdma_del_mmap(pd->uctx, dpp_db, OCRDMA_DPP_PAGE_SIZE);
426                 usr_db = dev->nic_info.unmapped_db +
427                     (pd->id * dev->nic_info.db_page_size);
428                 ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size);
429         }
430         kfree(pd);
431         return status;
432 }
433
434 static struct ocrdma_mr *ocrdma_alloc_lkey(struct ib_pd *ibpd,
435                                            int acc, u32 num_pbls,
436                                            u32 addr_check)
437 {
438         int status;
439         struct ocrdma_mr *mr;
440         struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
441         struct ocrdma_dev *dev = pd->dev;
442
443         if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) {
444                 pr_err("%s(%d) leaving err, invalid access rights\n",
445                        __func__, dev->id);
446                 return ERR_PTR(-EINVAL);
447         }
448
449         mr = kzalloc(sizeof(*mr), GFP_KERNEL);
450         if (!mr)
451                 return ERR_PTR(-ENOMEM);
452         mr->hwmr.dev = dev;
453         mr->hwmr.fr_mr = 0;
454         mr->hwmr.local_rd = 1;
455         mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
456         mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
457         mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
458         mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0;
459         mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
460         mr->hwmr.num_pbls = num_pbls;
461
462         status = ocrdma_mbx_alloc_lkey(dev, &mr->hwmr, pd->id, addr_check);
463         if (status) {
464                 kfree(mr);
465                 return ERR_PTR(-ENOMEM);
466         }
467         mr->pd = pd;
468         mr->ibmr.lkey = mr->hwmr.lkey;
469         if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
470                 mr->ibmr.rkey = mr->hwmr.lkey;
471         return mr;
472 }
473
474 struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *ibpd, int acc)
475 {
476         struct ocrdma_mr *mr;
477
478         mr = ocrdma_alloc_lkey(ibpd, acc, 0, OCRDMA_ADDR_CHECK_DISABLE);
479         if (IS_ERR(mr))
480                 return ERR_CAST(mr);
481
482         return &mr->ibmr;
483 }
484
485 static void ocrdma_free_mr_pbl_tbl(struct ocrdma_dev *dev,
486                                    struct ocrdma_hw_mr *mr)
487 {
488         struct pci_dev *pdev = dev->nic_info.pdev;
489         int i = 0;
490
491         if (mr->pbl_table) {
492                 for (i = 0; i < mr->num_pbls; i++) {
493                         if (!mr->pbl_table[i].va)
494                                 continue;
495                         dma_free_coherent(&pdev->dev, mr->pbl_size,
496                                           mr->pbl_table[i].va,
497                                           mr->pbl_table[i].pa);
498                 }
499                 kfree(mr->pbl_table);
500                 mr->pbl_table = NULL;
501         }
502 }
503
504 static int ocrdma_get_pbl_info(struct ocrdma_mr *mr, u32 num_pbes)
505 {
506         u32 num_pbls = 0;
507         u32 idx = 0;
508         int status = 0;
509         u32 pbl_size;
510
511         do {
512                 pbl_size = OCRDMA_MIN_HPAGE_SIZE * (1 << idx);
513                 if (pbl_size > MAX_OCRDMA_PBL_SIZE) {
514                         status = -EFAULT;
515                         break;
516                 }
517                 num_pbls = roundup(num_pbes, (pbl_size / sizeof(u64)));
518                 num_pbls = num_pbls / (pbl_size / sizeof(u64));
519                 idx++;
520         } while (num_pbls >= mr->hwmr.dev->attr.max_num_mr_pbl);
521
522         mr->hwmr.num_pbes = num_pbes;
523         mr->hwmr.num_pbls = num_pbls;
524         mr->hwmr.pbl_size = pbl_size;
525         return status;
526 }
527
528 static int ocrdma_build_pbl_tbl(struct ocrdma_dev *dev, struct ocrdma_hw_mr *mr)
529 {
530         int status = 0;
531         int i;
532         u32 dma_len = mr->pbl_size;
533         struct pci_dev *pdev = dev->nic_info.pdev;
534         void *va;
535         dma_addr_t pa;
536
537         mr->pbl_table = kzalloc(sizeof(struct ocrdma_pbl) *
538                                 mr->num_pbls, GFP_KERNEL);
539
540         if (!mr->pbl_table)
541                 return -ENOMEM;
542
543         for (i = 0; i < mr->num_pbls; i++) {
544                 va = dma_alloc_coherent(&pdev->dev, dma_len, &pa, GFP_KERNEL);
545                 if (!va) {
546                         ocrdma_free_mr_pbl_tbl(dev, mr);
547                         status = -ENOMEM;
548                         break;
549                 }
550                 memset(va, 0, dma_len);
551                 mr->pbl_table[i].va = va;
552                 mr->pbl_table[i].pa = pa;
553         }
554         return status;
555 }
556
557 static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
558                             u32 num_pbes)
559 {
560         struct ocrdma_pbe *pbe;
561         struct ib_umem_chunk *chunk;
562         struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
563         struct ib_umem *umem = mr->umem;
564         int i, shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
565
566         if (!mr->hwmr.num_pbes)
567                 return;
568
569         pbe = (struct ocrdma_pbe *)pbl_tbl->va;
570         pbe_cnt = 0;
571
572         shift = ilog2(umem->page_size);
573
574         list_for_each_entry(chunk, &umem->chunk_list, list) {
575                 /* get all the dma regions from the chunk. */
576                 for (i = 0; i < chunk->nmap; i++) {
577                         pages = sg_dma_len(&chunk->page_list[i]) >> shift;
578                         for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
579                                 /* store the page address in pbe */
580                                 pbe->pa_lo =
581                                     cpu_to_le32(sg_dma_address
582                                                 (&chunk->page_list[i]) +
583                                                 (umem->page_size * pg_cnt));
584                                 pbe->pa_hi =
585                                     cpu_to_le32(upper_32_bits
586                                                 ((sg_dma_address
587                                                   (&chunk->page_list[i]) +
588                                                   umem->page_size * pg_cnt)));
589                                 pbe_cnt += 1;
590                                 total_num_pbes += 1;
591                                 pbe++;
592
593                                 /* if done building pbes, issue the mbx cmd. */
594                                 if (total_num_pbes == num_pbes)
595                                         return;
596
597                                 /* if the given pbl is full storing the pbes,
598                                  * move to next pbl.
599                                  */
600                                 if (pbe_cnt ==
601                                         (mr->hwmr.pbl_size / sizeof(u64))) {
602                                         pbl_tbl++;
603                                         pbe = (struct ocrdma_pbe *)pbl_tbl->va;
604                                         pbe_cnt = 0;
605                                 }
606                         }
607                 }
608         }
609 }
610
611 struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
612                                  u64 usr_addr, int acc, struct ib_udata *udata)
613 {
614         int status = -ENOMEM;
615         struct ocrdma_dev *dev;
616         struct ocrdma_mr *mr;
617         struct ocrdma_pd *pd;
618         u32 num_pbes;
619
620         pd = get_ocrdma_pd(ibpd);
621         dev = pd->dev;
622
623         if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
624                 return ERR_PTR(-EINVAL);
625
626         mr = kzalloc(sizeof(*mr), GFP_KERNEL);
627         if (!mr)
628                 return ERR_PTR(status);
629         mr->hwmr.dev = dev;
630         mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
631         if (IS_ERR(mr->umem)) {
632                 status = -EFAULT;
633                 goto umem_err;
634         }
635         num_pbes = ib_umem_page_count(mr->umem);
636         status = ocrdma_get_pbl_info(mr, num_pbes);
637         if (status)
638                 goto umem_err;
639
640         mr->hwmr.pbe_size = mr->umem->page_size;
641         mr->hwmr.fbo = mr->umem->offset;
642         mr->hwmr.va = usr_addr;
643         mr->hwmr.len = len;
644         mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
645         mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
646         mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
647         mr->hwmr.local_rd = 1;
648         mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
649         status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
650         if (status)
651                 goto umem_err;
652         build_user_pbes(dev, mr, num_pbes);
653         status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
654         if (status)
655                 goto mbx_err;
656         mr->pd = pd;
657         mr->ibmr.lkey = mr->hwmr.lkey;
658         if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
659                 mr->ibmr.rkey = mr->hwmr.lkey;
660
661         return &mr->ibmr;
662
663 mbx_err:
664         ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
665 umem_err:
666         kfree(mr);
667         return ERR_PTR(status);
668 }
669
670 int ocrdma_dereg_mr(struct ib_mr *ib_mr)
671 {
672         struct ocrdma_mr *mr = get_ocrdma_mr(ib_mr);
673         struct ocrdma_dev *dev = mr->hwmr.dev;
674         int status;
675
676         status = ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey);
677
678         if (mr->hwmr.fr_mr == 0)
679                 ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
680
681         /* it could be user registered memory. */
682         if (mr->umem)
683                 ib_umem_release(mr->umem);
684         kfree(mr);
685         return status;
686 }
687
688 static int ocrdma_copy_cq_uresp(struct ocrdma_cq *cq, struct ib_udata *udata,
689                                 struct ib_ucontext *ib_ctx)
690 {
691         int status;
692         struct ocrdma_ucontext *uctx;
693         struct ocrdma_create_cq_uresp uresp;
694
695         uresp.cq_id = cq->id;
696         uresp.page_size = cq->len;
697         uresp.num_pages = 1;
698         uresp.max_hw_cqe = cq->max_hw_cqe;
699         uresp.page_addr[0] = cq->pa;
700         uresp.db_page_addr = cq->dev->nic_info.unmapped_db;
701         uresp.db_page_size = cq->dev->nic_info.db_page_size;
702         uresp.phase_change = cq->phase_change ? 1 : 0;
703         status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
704         if (status) {
705                 pr_err("%s(%d) copy error cqid=0x%x.\n",
706                        __func__, cq->dev->id, cq->id);
707                 goto err;
708         }
709         uctx = get_ocrdma_ucontext(ib_ctx);
710         status = ocrdma_add_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
711         if (status)
712                 goto err;
713         status = ocrdma_add_mmap(uctx, uresp.page_addr[0], uresp.page_size);
714         if (status) {
715                 ocrdma_del_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
716                 goto err;
717         }
718         cq->ucontext = uctx;
719 err:
720         return status;
721 }
722
723 struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector,
724                                struct ib_ucontext *ib_ctx,
725                                struct ib_udata *udata)
726 {
727         struct ocrdma_cq *cq;
728         struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
729         int status;
730         struct ocrdma_create_cq_ureq ureq;
731
732         if (udata) {
733                 if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
734                         return ERR_PTR(-EFAULT);
735         } else
736                 ureq.dpp_cq = 0;
737         cq = kzalloc(sizeof(*cq), GFP_KERNEL);
738         if (!cq)
739                 return ERR_PTR(-ENOMEM);
740
741         spin_lock_init(&cq->cq_lock);
742         spin_lock_init(&cq->comp_handler_lock);
743         INIT_LIST_HEAD(&cq->sq_head);
744         INIT_LIST_HEAD(&cq->rq_head);
745         cq->dev = dev;
746
747         status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq);
748         if (status) {
749                 kfree(cq);
750                 return ERR_PTR(status);
751         }
752         if (ib_ctx) {
753                 status = ocrdma_copy_cq_uresp(cq, udata, ib_ctx);
754                 if (status)
755                         goto ctx_err;
756         }
757         cq->phase = OCRDMA_CQE_VALID;
758         cq->arm_needed = true;
759         dev->cq_tbl[cq->id] = cq;
760
761         return &cq->ibcq;
762
763 ctx_err:
764         ocrdma_mbx_destroy_cq(dev, cq);
765         kfree(cq);
766         return ERR_PTR(status);
767 }
768
769 int ocrdma_resize_cq(struct ib_cq *ibcq, int new_cnt,
770                      struct ib_udata *udata)
771 {
772         int status = 0;
773         struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
774
775         if (new_cnt < 1 || new_cnt > cq->max_hw_cqe) {
776                 status = -EINVAL;
777                 return status;
778         }
779         ibcq->cqe = new_cnt;
780         return status;
781 }
782
783 int ocrdma_destroy_cq(struct ib_cq *ibcq)
784 {
785         int status;
786         struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
787         struct ocrdma_dev *dev = cq->dev;
788
789         status = ocrdma_mbx_destroy_cq(dev, cq);
790
791         if (cq->ucontext) {
792                 ocrdma_del_mmap(cq->ucontext, (u64) cq->pa, cq->len);
793                 ocrdma_del_mmap(cq->ucontext, dev->nic_info.unmapped_db,
794                                 dev->nic_info.db_page_size);
795         }
796         dev->cq_tbl[cq->id] = NULL;
797
798         kfree(cq);
799         return status;
800 }
801
802 static int ocrdma_add_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
803 {
804         int status = -EINVAL;
805
806         if (qp->id < OCRDMA_MAX_QP && dev->qp_tbl[qp->id] == NULL) {
807                 dev->qp_tbl[qp->id] = qp;
808                 status = 0;
809         }
810         return status;
811 }
812
813 static void ocrdma_del_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
814 {
815         dev->qp_tbl[qp->id] = NULL;
816 }
817
818 static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
819                                   struct ib_qp_init_attr *attrs)
820 {
821         if (attrs->qp_type != IB_QPT_GSI &&
822             attrs->qp_type != IB_QPT_RC &&
823             attrs->qp_type != IB_QPT_UD) {
824                 pr_err("%s(%d) unsupported qp type=0x%x requested\n",
825                        __func__, dev->id, attrs->qp_type);
826                 return -EINVAL;
827         }
828         if (attrs->cap.max_send_wr > dev->attr.max_wqe) {
829                 pr_err("%s(%d) unsupported send_wr=0x%x requested\n",
830                        __func__, dev->id, attrs->cap.max_send_wr);
831                 pr_err("%s(%d) supported send_wr=0x%x\n",
832                        __func__, dev->id, dev->attr.max_wqe);
833                 return -EINVAL;
834         }
835         if (!attrs->srq && (attrs->cap.max_recv_wr > dev->attr.max_rqe)) {
836                 pr_err("%s(%d) unsupported recv_wr=0x%x requested\n",
837                        __func__, dev->id, attrs->cap.max_recv_wr);
838                 pr_err("%s(%d) supported recv_wr=0x%x\n",
839                        __func__, dev->id, dev->attr.max_rqe);
840                 return -EINVAL;
841         }
842         if (attrs->cap.max_inline_data > dev->attr.max_inline_data) {
843                 pr_err("%s(%d) unsupported inline data size=0x%x requested\n",
844                        __func__, dev->id, attrs->cap.max_inline_data);
845                 pr_err("%s(%d) supported inline data size=0x%x\n",
846                        __func__, dev->id, dev->attr.max_inline_data);
847                 return -EINVAL;
848         }
849         if (attrs->cap.max_send_sge > dev->attr.max_send_sge) {
850                 pr_err("%s(%d) unsupported send_sge=0x%x requested\n",
851                        __func__, dev->id, attrs->cap.max_send_sge);
852                 pr_err("%s(%d) supported send_sge=0x%x\n",
853                        __func__, dev->id, dev->attr.max_send_sge);
854                 return -EINVAL;
855         }
856         if (attrs->cap.max_recv_sge > dev->attr.max_recv_sge) {
857                 pr_err("%s(%d) unsupported recv_sge=0x%x requested\n",
858                        __func__, dev->id, attrs->cap.max_recv_sge);
859                 pr_err("%s(%d) supported recv_sge=0x%x\n",
860                        __func__, dev->id, dev->attr.max_recv_sge);
861                 return -EINVAL;
862         }
863         /* unprivileged user space cannot create special QP */
864         if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
865                 pr_err
866                     ("%s(%d) Userspace can't create special QPs of type=0x%x\n",
867                      __func__, dev->id, attrs->qp_type);
868                 return -EINVAL;
869         }
870         /* allow creating only one GSI type of QP */
871         if (attrs->qp_type == IB_QPT_GSI && dev->gsi_qp_created) {
872                 pr_err("%s(%d) GSI special QPs already created.\n",
873                        __func__, dev->id);
874                 return -EINVAL;
875         }
876         /* verify consumer QPs are not trying to use GSI QP's CQ */
877         if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created)) {
878                 if ((dev->gsi_sqcq == get_ocrdma_cq(attrs->send_cq)) ||
879                     (dev->gsi_sqcq == get_ocrdma_cq(attrs->recv_cq)) ||
880                     (dev->gsi_rqcq == get_ocrdma_cq(attrs->send_cq)) ||
881                     (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) {
882                         pr_err("%s(%d) Consumer QP cannot use GSI CQs.\n",
883                                __func__, dev->id);
884                         return -EINVAL;
885                 }
886         }
887         return 0;
888 }
889
890 static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
891                                 struct ib_udata *udata, int dpp_offset,
892                                 int dpp_credit_lmt, int srq)
893 {
894         int status = 0;
895         u64 usr_db;
896         struct ocrdma_create_qp_uresp uresp;
897         struct ocrdma_dev *dev = qp->dev;
898         struct ocrdma_pd *pd = qp->pd;
899
900         memset(&uresp, 0, sizeof(uresp));
901         usr_db = dev->nic_info.unmapped_db +
902                         (pd->id * dev->nic_info.db_page_size);
903         uresp.qp_id = qp->id;
904         uresp.sq_dbid = qp->sq.dbid;
905         uresp.num_sq_pages = 1;
906         uresp.sq_page_size = qp->sq.len;
907         uresp.sq_page_addr[0] = qp->sq.pa;
908         uresp.num_wqe_allocated = qp->sq.max_cnt;
909         if (!srq) {
910                 uresp.rq_dbid = qp->rq.dbid;
911                 uresp.num_rq_pages = 1;
912                 uresp.rq_page_size = qp->rq.len;
913                 uresp.rq_page_addr[0] = qp->rq.pa;
914                 uresp.num_rqe_allocated = qp->rq.max_cnt;
915         }
916         uresp.db_page_addr = usr_db;
917         uresp.db_page_size = dev->nic_info.db_page_size;
918         if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
919                 uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET;
920                 uresp.db_rq_offset = ((qp->id & 0xFFFF) < 128) ?
921                         OCRDMA_DB_GEN2_RQ1_OFFSET : OCRDMA_DB_GEN2_RQ2_OFFSET;
922                 uresp.db_shift = (qp->id < 128) ? 24 : 16;
923         } else {
924                 uresp.db_sq_offset = OCRDMA_DB_SQ_OFFSET;
925                 uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
926                 uresp.db_shift = 16;
927         }
928
929         if (qp->dpp_enabled) {
930                 uresp.dpp_credit = dpp_credit_lmt;
931                 uresp.dpp_offset = dpp_offset;
932         }
933         status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
934         if (status) {
935                 pr_err("%s(%d) user copy error.\n", __func__, dev->id);
936                 goto err;
937         }
938         status = ocrdma_add_mmap(pd->uctx, uresp.sq_page_addr[0],
939                                  uresp.sq_page_size);
940         if (status)
941                 goto err;
942
943         if (!srq) {
944                 status = ocrdma_add_mmap(pd->uctx, uresp.rq_page_addr[0],
945                                          uresp.rq_page_size);
946                 if (status)
947                         goto rq_map_err;
948         }
949         return status;
950 rq_map_err:
951         ocrdma_del_mmap(pd->uctx, uresp.sq_page_addr[0], uresp.sq_page_size);
952 err:
953         return status;
954 }
955
956 static void ocrdma_set_qp_db(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
957                              struct ocrdma_pd *pd)
958 {
959         if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
960                 qp->sq_db = dev->nic_info.db +
961                         (pd->id * dev->nic_info.db_page_size) +
962                         OCRDMA_DB_GEN2_SQ_OFFSET;
963                 qp->rq_db = dev->nic_info.db +
964                         (pd->id * dev->nic_info.db_page_size) +
965                         ((qp->id < 128) ?
966                         OCRDMA_DB_GEN2_RQ1_OFFSET : OCRDMA_DB_GEN2_RQ2_OFFSET);
967         } else {
968                 qp->sq_db = dev->nic_info.db +
969                         (pd->id * dev->nic_info.db_page_size) +
970                         OCRDMA_DB_SQ_OFFSET;
971                 qp->rq_db = dev->nic_info.db +
972                         (pd->id * dev->nic_info.db_page_size) +
973                         OCRDMA_DB_RQ_OFFSET;
974         }
975 }
976
977 static int ocrdma_alloc_wr_id_tbl(struct ocrdma_qp *qp)
978 {
979         qp->wqe_wr_id_tbl =
980             kzalloc(sizeof(*(qp->wqe_wr_id_tbl)) * qp->sq.max_cnt,
981                     GFP_KERNEL);
982         if (qp->wqe_wr_id_tbl == NULL)
983                 return -ENOMEM;
984         qp->rqe_wr_id_tbl =
985             kzalloc(sizeof(u64) * qp->rq.max_cnt, GFP_KERNEL);
986         if (qp->rqe_wr_id_tbl == NULL)
987                 return -ENOMEM;
988
989         return 0;
990 }
991
992 static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp,
993                                       struct ocrdma_pd *pd,
994                                       struct ib_qp_init_attr *attrs)
995 {
996         qp->pd = pd;
997         spin_lock_init(&qp->q_lock);
998         INIT_LIST_HEAD(&qp->sq_entry);
999         INIT_LIST_HEAD(&qp->rq_entry);
1000
1001         qp->qp_type = attrs->qp_type;
1002         qp->cap_flags = OCRDMA_QP_INB_RD | OCRDMA_QP_INB_WR;
1003         qp->max_inline_data = attrs->cap.max_inline_data;
1004         qp->sq.max_sges = attrs->cap.max_send_sge;
1005         qp->rq.max_sges = attrs->cap.max_recv_sge;
1006         qp->state = OCRDMA_QPS_RST;
1007 }
1008
1009
1010 static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev,
1011                                    struct ib_qp_init_attr *attrs)
1012 {
1013         if (attrs->qp_type == IB_QPT_GSI) {
1014                 dev->gsi_qp_created = 1;
1015                 dev->gsi_sqcq = get_ocrdma_cq(attrs->send_cq);
1016                 dev->gsi_rqcq = get_ocrdma_cq(attrs->recv_cq);
1017         }
1018 }
1019
1020 struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd,
1021                                struct ib_qp_init_attr *attrs,
1022                                struct ib_udata *udata)
1023 {
1024         int status;
1025         struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
1026         struct ocrdma_qp *qp;
1027         struct ocrdma_dev *dev = pd->dev;
1028         struct ocrdma_create_qp_ureq ureq;
1029         u16 dpp_credit_lmt, dpp_offset;
1030
1031         status = ocrdma_check_qp_params(ibpd, dev, attrs);
1032         if (status)
1033                 goto gen_err;
1034
1035         memset(&ureq, 0, sizeof(ureq));
1036         if (udata) {
1037                 if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
1038                         return ERR_PTR(-EFAULT);
1039         }
1040         qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1041         if (!qp) {
1042                 status = -ENOMEM;
1043                 goto gen_err;
1044         }
1045         qp->dev = dev;
1046         ocrdma_set_qp_init_params(qp, pd, attrs);
1047
1048         mutex_lock(&dev->dev_lock);
1049         status = ocrdma_mbx_create_qp(qp, attrs, ureq.enable_dpp_cq,
1050                                         ureq.dpp_cq_id,
1051                                         &dpp_offset, &dpp_credit_lmt);
1052         if (status)
1053                 goto mbx_err;
1054
1055         /* user space QP's wr_id table are managed in library */
1056         if (udata == NULL) {
1057                 qp->cap_flags |= (OCRDMA_QP_MW_BIND | OCRDMA_QP_LKEY0 |
1058                                   OCRDMA_QP_FAST_REG);
1059                 status = ocrdma_alloc_wr_id_tbl(qp);
1060                 if (status)
1061                         goto map_err;
1062         }
1063
1064         status = ocrdma_add_qpn_map(dev, qp);
1065         if (status)
1066                 goto map_err;
1067         ocrdma_set_qp_db(dev, qp, pd);
1068         if (udata) {
1069                 status = ocrdma_copy_qp_uresp(qp, udata, dpp_offset,
1070                                               dpp_credit_lmt,
1071                                               (attrs->srq != NULL));
1072                 if (status)
1073                         goto cpy_err;
1074         }
1075         ocrdma_store_gsi_qp_cq(dev, attrs);
1076         qp->ibqp.qp_num = qp->id;
1077         mutex_unlock(&dev->dev_lock);
1078         return &qp->ibqp;
1079
1080 cpy_err:
1081         ocrdma_del_qpn_map(dev, qp);
1082 map_err:
1083         ocrdma_mbx_destroy_qp(dev, qp);
1084 mbx_err:
1085         mutex_unlock(&dev->dev_lock);
1086         kfree(qp->wqe_wr_id_tbl);
1087         kfree(qp->rqe_wr_id_tbl);
1088         kfree(qp);
1089         pr_err("%s(%d) error=%d\n", __func__, dev->id, status);
1090 gen_err:
1091         return ERR_PTR(status);
1092 }
1093
1094 int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1095                       int attr_mask)
1096 {
1097         int status = 0;
1098         struct ocrdma_qp *qp;
1099         struct ocrdma_dev *dev;
1100         enum ib_qp_state old_qps;
1101
1102         qp = get_ocrdma_qp(ibqp);
1103         dev = qp->dev;
1104         if (attr_mask & IB_QP_STATE)
1105                 status = ocrdma_qp_state_machine(qp, attr->qp_state, &old_qps);
1106         /* if new and previous states are same hw doesn't need to
1107          * know about it.
1108          */
1109         if (status < 0)
1110                 return status;
1111         status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask, old_qps);
1112         return status;
1113 }
1114
1115 int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1116                      int attr_mask, struct ib_udata *udata)
1117 {
1118         unsigned long flags;
1119         int status = -EINVAL;
1120         struct ocrdma_qp *qp;
1121         struct ocrdma_dev *dev;
1122         enum ib_qp_state old_qps, new_qps;
1123
1124         qp = get_ocrdma_qp(ibqp);
1125         dev = qp->dev;
1126
1127         /* syncronize with multiple context trying to change, retrive qps */
1128         mutex_lock(&dev->dev_lock);
1129         /* syncronize with wqe, rqe posting and cqe processing contexts */
1130         spin_lock_irqsave(&qp->q_lock, flags);
1131         old_qps = get_ibqp_state(qp->state);
1132         if (attr_mask & IB_QP_STATE)
1133                 new_qps = attr->qp_state;
1134         else
1135                 new_qps = old_qps;
1136         spin_unlock_irqrestore(&qp->q_lock, flags);
1137
1138         if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) {
1139                 pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
1140                        "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
1141                        __func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
1142                        old_qps, new_qps);
1143                 goto param_err;
1144         }
1145
1146         status = _ocrdma_modify_qp(ibqp, attr, attr_mask);
1147         if (status > 0)
1148                 status = 0;
1149 param_err:
1150         mutex_unlock(&dev->dev_lock);
1151         return status;
1152 }
1153
1154 static enum ib_mtu ocrdma_mtu_int_to_enum(u16 mtu)
1155 {
1156         switch (mtu) {
1157         case 256:
1158                 return IB_MTU_256;
1159         case 512:
1160                 return IB_MTU_512;
1161         case 1024:
1162                 return IB_MTU_1024;
1163         case 2048:
1164                 return IB_MTU_2048;
1165         case 4096:
1166                 return IB_MTU_4096;
1167         default:
1168                 return IB_MTU_1024;
1169         }
1170 }
1171
1172 static int ocrdma_to_ib_qp_acc_flags(int qp_cap_flags)
1173 {
1174         int ib_qp_acc_flags = 0;
1175
1176         if (qp_cap_flags & OCRDMA_QP_INB_WR)
1177                 ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
1178         if (qp_cap_flags & OCRDMA_QP_INB_RD)
1179                 ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
1180         return ib_qp_acc_flags;
1181 }
1182
1183 int ocrdma_query_qp(struct ib_qp *ibqp,
1184                     struct ib_qp_attr *qp_attr,
1185                     int attr_mask, struct ib_qp_init_attr *qp_init_attr)
1186 {
1187         int status;
1188         u32 qp_state;
1189         struct ocrdma_qp_params params;
1190         struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
1191         struct ocrdma_dev *dev = qp->dev;
1192
1193         memset(&params, 0, sizeof(params));
1194         mutex_lock(&dev->dev_lock);
1195         status = ocrdma_mbx_query_qp(dev, qp, &params);
1196         mutex_unlock(&dev->dev_lock);
1197         if (status)
1198                 goto mbx_err;
1199         qp_attr->qp_state = get_ibqp_state(IB_QPS_INIT);
1200         qp_attr->cur_qp_state = get_ibqp_state(IB_QPS_INIT);
1201         qp_attr->path_mtu =
1202                 ocrdma_mtu_int_to_enum(params.path_mtu_pkey_indx &
1203                                 OCRDMA_QP_PARAMS_PATH_MTU_MASK) >>
1204                                 OCRDMA_QP_PARAMS_PATH_MTU_SHIFT;
1205         qp_attr->path_mig_state = IB_MIG_MIGRATED;
1206         qp_attr->rq_psn = params.hop_lmt_rq_psn & OCRDMA_QP_PARAMS_RQ_PSN_MASK;
1207         qp_attr->sq_psn = params.tclass_sq_psn & OCRDMA_QP_PARAMS_SQ_PSN_MASK;
1208         qp_attr->dest_qp_num =
1209             params.ack_to_rnr_rtc_dest_qpn & OCRDMA_QP_PARAMS_DEST_QPN_MASK;
1210
1211         qp_attr->qp_access_flags = ocrdma_to_ib_qp_acc_flags(qp->cap_flags);
1212         qp_attr->cap.max_send_wr = qp->sq.max_cnt - 1;
1213         qp_attr->cap.max_recv_wr = qp->rq.max_cnt - 1;
1214         qp_attr->cap.max_send_sge = qp->sq.max_sges;
1215         qp_attr->cap.max_recv_sge = qp->rq.max_sges;
1216         qp_attr->cap.max_inline_data = dev->attr.max_inline_data;
1217         qp_init_attr->cap = qp_attr->cap;
1218         memcpy(&qp_attr->ah_attr.grh.dgid, &params.dgid[0],
1219                sizeof(params.dgid));
1220         qp_attr->ah_attr.grh.flow_label = params.rnt_rc_sl_fl &
1221             OCRDMA_QP_PARAMS_FLOW_LABEL_MASK;
1222         qp_attr->ah_attr.grh.sgid_index = qp->sgid_idx;
1223         qp_attr->ah_attr.grh.hop_limit = (params.hop_lmt_rq_psn &
1224                                           OCRDMA_QP_PARAMS_HOP_LMT_MASK) >>
1225                                                 OCRDMA_QP_PARAMS_HOP_LMT_SHIFT;
1226         qp_attr->ah_attr.grh.traffic_class = (params.tclass_sq_psn &
1227                                               OCRDMA_QP_PARAMS_SQ_PSN_MASK) >>
1228                                                 OCRDMA_QP_PARAMS_TCLASS_SHIFT;
1229
1230         qp_attr->ah_attr.ah_flags = IB_AH_GRH;
1231         qp_attr->ah_attr.port_num = 1;
1232         qp_attr->ah_attr.sl = (params.rnt_rc_sl_fl &
1233                                OCRDMA_QP_PARAMS_SL_MASK) >>
1234                                 OCRDMA_QP_PARAMS_SL_SHIFT;
1235         qp_attr->timeout = (params.ack_to_rnr_rtc_dest_qpn &
1236                             OCRDMA_QP_PARAMS_ACK_TIMEOUT_MASK) >>
1237                                 OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT;
1238         qp_attr->rnr_retry = (params.ack_to_rnr_rtc_dest_qpn &
1239                               OCRDMA_QP_PARAMS_RNR_RETRY_CNT_MASK) >>
1240                                 OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT;
1241         qp_attr->retry_cnt =
1242             (params.rnt_rc_sl_fl & OCRDMA_QP_PARAMS_RETRY_CNT_MASK) >>
1243                 OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT;
1244         qp_attr->min_rnr_timer = 0;
1245         qp_attr->pkey_index = 0;
1246         qp_attr->port_num = 1;
1247         qp_attr->ah_attr.src_path_bits = 0;
1248         qp_attr->ah_attr.static_rate = 0;
1249         qp_attr->alt_pkey_index = 0;
1250         qp_attr->alt_port_num = 0;
1251         qp_attr->alt_timeout = 0;
1252         memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
1253         qp_state = (params.max_sge_recv_flags & OCRDMA_QP_PARAMS_STATE_MASK) >>
1254                     OCRDMA_QP_PARAMS_STATE_SHIFT;
1255         qp_attr->sq_draining = (qp_state == OCRDMA_QPS_SQ_DRAINING) ? 1 : 0;
1256         qp_attr->max_dest_rd_atomic =
1257             params.max_ord_ird >> OCRDMA_QP_PARAMS_MAX_ORD_SHIFT;
1258         qp_attr->max_rd_atomic =
1259             params.max_ord_ird & OCRDMA_QP_PARAMS_MAX_IRD_MASK;
1260         qp_attr->en_sqd_async_notify = (params.max_sge_recv_flags &
1261                                 OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC) ? 1 : 0;
1262 mbx_err:
1263         return status;
1264 }
1265
1266 static void ocrdma_srq_toggle_bit(struct ocrdma_srq *srq, int idx)
1267 {
1268         int i = idx / 32;
1269         unsigned int mask = (1 << (idx % 32));
1270
1271         if (srq->idx_bit_fields[i] & mask)
1272                 srq->idx_bit_fields[i] &= ~mask;
1273         else
1274                 srq->idx_bit_fields[i] |= mask;
1275 }
1276
1277 static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info *q)
1278 {
1279         int free_cnt;
1280         if (q->head >= q->tail)
1281                 free_cnt = (q->max_cnt - q->head) + q->tail;
1282         else
1283                 free_cnt = q->tail - q->head;
1284         return free_cnt;
1285 }
1286
1287 static int is_hw_sq_empty(struct ocrdma_qp *qp)
1288 {
1289         return (qp->sq.tail == qp->sq.head &&
1290                 ocrdma_hwq_free_cnt(&qp->sq) ? 1 : 0);
1291 }
1292
1293 static int is_hw_rq_empty(struct ocrdma_qp *qp)
1294 {
1295         return (qp->rq.tail == qp->rq.head) ? 1 : 0;
1296 }
1297
1298 static void *ocrdma_hwq_head(struct ocrdma_qp_hwq_info *q)
1299 {
1300         return q->va + (q->head * q->entry_size);
1301 }
1302
1303 static void *ocrdma_hwq_head_from_idx(struct ocrdma_qp_hwq_info *q,
1304                                       u32 idx)
1305 {
1306         return q->va + (idx * q->entry_size);
1307 }
1308
1309 static void ocrdma_hwq_inc_head(struct ocrdma_qp_hwq_info *q)
1310 {
1311         q->head = (q->head + 1) & q->max_wqe_idx;
1312 }
1313
1314 static void ocrdma_hwq_inc_tail(struct ocrdma_qp_hwq_info *q)
1315 {
1316         q->tail = (q->tail + 1) & q->max_wqe_idx;
1317 }
1318
1319 /* discard the cqe for a given QP */
1320 static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq)
1321 {
1322         unsigned long cq_flags;
1323         unsigned long flags;
1324         int discard_cnt = 0;
1325         u32 cur_getp, stop_getp;
1326         struct ocrdma_cqe *cqe;
1327         u32 qpn = 0;
1328
1329         spin_lock_irqsave(&cq->cq_lock, cq_flags);
1330
1331         /* traverse through the CQEs in the hw CQ,
1332          * find the matching CQE for a given qp,
1333          * mark the matching one discarded by clearing qpn.
1334          * ring the doorbell in the poll_cq() as
1335          * we don't complete out of order cqe.
1336          */
1337
1338         cur_getp = cq->getp;
1339         /* find upto when do we reap the cq. */
1340         stop_getp = cur_getp;
1341         do {
1342                 if (is_hw_sq_empty(qp) && (!qp->srq && is_hw_rq_empty(qp)))
1343                         break;
1344
1345                 cqe = cq->va + cur_getp;
1346                 /* if (a) done reaping whole hw cq, or
1347                  *    (b) qp_xq becomes empty.
1348                  * then exit
1349                  */
1350                 qpn = cqe->cmn.qpn & OCRDMA_CQE_QPN_MASK;
1351                 /* if previously discarded cqe found, skip that too. */
1352                 /* check for matching qp */
1353                 if (qpn == 0 || qpn != qp->id)
1354                         goto skip_cqe;
1355
1356                 /* mark cqe discarded so that it is not picked up later
1357                  * in the poll_cq().
1358                  */
1359                 discard_cnt += 1;
1360                 cqe->cmn.qpn = 0;
1361                 if (is_cqe_for_sq(cqe))
1362                         ocrdma_hwq_inc_tail(&qp->sq);
1363                 else {
1364                         if (qp->srq) {
1365                                 spin_lock_irqsave(&qp->srq->q_lock, flags);
1366                                 ocrdma_hwq_inc_tail(&qp->srq->rq);
1367                                 ocrdma_srq_toggle_bit(qp->srq, cur_getp);
1368                                 spin_unlock_irqrestore(&qp->srq->q_lock, flags);
1369
1370                         } else
1371                                 ocrdma_hwq_inc_tail(&qp->rq);
1372                 }
1373 skip_cqe:
1374                 cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
1375         } while (cur_getp != stop_getp);
1376         spin_unlock_irqrestore(&cq->cq_lock, cq_flags);
1377 }
1378
1379 static void ocrdma_del_flush_qp(struct ocrdma_qp *qp)
1380 {
1381         int found = false;
1382         unsigned long flags;
1383         struct ocrdma_dev *dev = qp->dev;
1384         /* sync with any active CQ poll */
1385
1386         spin_lock_irqsave(&dev->flush_q_lock, flags);
1387         found = ocrdma_is_qp_in_sq_flushlist(qp->sq_cq, qp);
1388         if (found)
1389                 list_del(&qp->sq_entry);
1390         if (!qp->srq) {
1391                 found = ocrdma_is_qp_in_rq_flushlist(qp->rq_cq, qp);
1392                 if (found)
1393                         list_del(&qp->rq_entry);
1394         }
1395         spin_unlock_irqrestore(&dev->flush_q_lock, flags);
1396 }
1397
1398 int ocrdma_destroy_qp(struct ib_qp *ibqp)
1399 {
1400         int status;
1401         struct ocrdma_pd *pd;
1402         struct ocrdma_qp *qp;
1403         struct ocrdma_dev *dev;
1404         struct ib_qp_attr attrs;
1405         int attr_mask = IB_QP_STATE;
1406         unsigned long flags;
1407
1408         qp = get_ocrdma_qp(ibqp);
1409         dev = qp->dev;
1410
1411         attrs.qp_state = IB_QPS_ERR;
1412         pd = qp->pd;
1413
1414         /* change the QP state to ERROR */
1415         _ocrdma_modify_qp(ibqp, &attrs, attr_mask);
1416
1417         /* ensure that CQEs for newly created QP (whose id may be same with
1418          * one which just getting destroyed are same), dont get
1419          * discarded until the old CQEs are discarded.
1420          */
1421         mutex_lock(&dev->dev_lock);
1422         status = ocrdma_mbx_destroy_qp(dev, qp);
1423
1424         /*
1425          * acquire CQ lock while destroy is in progress, in order to
1426          * protect against proessing in-flight CQEs for this QP.
1427          */
1428         spin_lock_irqsave(&qp->sq_cq->cq_lock, flags);
1429         if (qp->rq_cq && (qp->rq_cq != qp->sq_cq))
1430                 spin_lock(&qp->rq_cq->cq_lock);
1431
1432         ocrdma_del_qpn_map(dev, qp);
1433
1434         if (qp->rq_cq && (qp->rq_cq != qp->sq_cq))
1435                 spin_unlock(&qp->rq_cq->cq_lock);
1436         spin_unlock_irqrestore(&qp->sq_cq->cq_lock, flags);
1437
1438         if (!pd->uctx) {
1439                 ocrdma_discard_cqes(qp, qp->sq_cq);
1440                 ocrdma_discard_cqes(qp, qp->rq_cq);
1441         }
1442         mutex_unlock(&dev->dev_lock);
1443
1444         if (pd->uctx) {
1445                 ocrdma_del_mmap(pd->uctx, (u64) qp->sq.pa, qp->sq.len);
1446                 if (!qp->srq)
1447                         ocrdma_del_mmap(pd->uctx, (u64) qp->rq.pa, qp->rq.len);
1448         }
1449
1450         ocrdma_del_flush_qp(qp);
1451
1452         kfree(qp->wqe_wr_id_tbl);
1453         kfree(qp->rqe_wr_id_tbl);
1454         kfree(qp);
1455         return status;
1456 }
1457
1458 static int ocrdma_copy_srq_uresp(struct ocrdma_srq *srq, struct ib_udata *udata)
1459 {
1460         int status;
1461         struct ocrdma_create_srq_uresp uresp;
1462
1463         uresp.rq_dbid = srq->rq.dbid;
1464         uresp.num_rq_pages = 1;
1465         uresp.rq_page_addr[0] = srq->rq.pa;
1466         uresp.rq_page_size = srq->rq.len;
1467         uresp.db_page_addr = srq->dev->nic_info.unmapped_db +
1468             (srq->pd->id * srq->dev->nic_info.db_page_size);
1469         uresp.db_page_size = srq->dev->nic_info.db_page_size;
1470         uresp.num_rqe_allocated = srq->rq.max_cnt;
1471         if (srq->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
1472                 uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ1_OFFSET;
1473                 uresp.db_shift = 24;
1474         } else {
1475                 uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
1476                 uresp.db_shift = 16;
1477         }
1478
1479         status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1480         if (status)
1481                 return status;
1482         status = ocrdma_add_mmap(srq->pd->uctx, uresp.rq_page_addr[0],
1483                                  uresp.rq_page_size);
1484         if (status)
1485                 return status;
1486         return status;
1487 }
1488
1489 struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd,
1490                                  struct ib_srq_init_attr *init_attr,
1491                                  struct ib_udata *udata)
1492 {
1493         int status = -ENOMEM;
1494         struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
1495         struct ocrdma_dev *dev = pd->dev;
1496         struct ocrdma_srq *srq;
1497
1498         if (init_attr->attr.max_sge > dev->attr.max_recv_sge)
1499                 return ERR_PTR(-EINVAL);
1500         if (init_attr->attr.max_wr > dev->attr.max_rqe)
1501                 return ERR_PTR(-EINVAL);
1502
1503         srq = kzalloc(sizeof(*srq), GFP_KERNEL);
1504         if (!srq)
1505                 return ERR_PTR(status);
1506
1507         spin_lock_init(&srq->q_lock);
1508         srq->dev = dev;
1509         srq->pd = pd;
1510         srq->db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size);
1511         status = ocrdma_mbx_create_srq(srq, init_attr, pd);
1512         if (status)
1513                 goto err;
1514
1515         if (udata == NULL) {
1516                 srq->rqe_wr_id_tbl = kzalloc(sizeof(u64) * srq->rq.max_cnt,
1517                             GFP_KERNEL);
1518                 if (srq->rqe_wr_id_tbl == NULL)
1519                         goto arm_err;
1520
1521                 srq->bit_fields_len = (srq->rq.max_cnt / 32) +
1522                     (srq->rq.max_cnt % 32 ? 1 : 0);
1523                 srq->idx_bit_fields =
1524                     kmalloc(srq->bit_fields_len * sizeof(u32), GFP_KERNEL);
1525                 if (srq->idx_bit_fields == NULL)
1526                         goto arm_err;
1527                 memset(srq->idx_bit_fields, 0xff,
1528                        srq->bit_fields_len * sizeof(u32));
1529         }
1530
1531         if (init_attr->attr.srq_limit) {
1532                 status = ocrdma_mbx_modify_srq(srq, &init_attr->attr);
1533                 if (status)
1534                         goto arm_err;
1535         }
1536
1537         if (udata) {
1538                 status = ocrdma_copy_srq_uresp(srq, udata);
1539                 if (status)
1540                         goto arm_err;
1541         }
1542
1543         return &srq->ibsrq;
1544
1545 arm_err:
1546         ocrdma_mbx_destroy_srq(dev, srq);
1547 err:
1548         kfree(srq->rqe_wr_id_tbl);
1549         kfree(srq->idx_bit_fields);
1550         kfree(srq);
1551         return ERR_PTR(status);
1552 }
1553
1554 int ocrdma_modify_srq(struct ib_srq *ibsrq,
1555                       struct ib_srq_attr *srq_attr,
1556                       enum ib_srq_attr_mask srq_attr_mask,
1557                       struct ib_udata *udata)
1558 {
1559         int status = 0;
1560         struct ocrdma_srq *srq;
1561
1562         srq = get_ocrdma_srq(ibsrq);
1563         if (srq_attr_mask & IB_SRQ_MAX_WR)
1564                 status = -EINVAL;
1565         else
1566                 status = ocrdma_mbx_modify_srq(srq, srq_attr);
1567         return status;
1568 }
1569
1570 int ocrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
1571 {
1572         int status;
1573         struct ocrdma_srq *srq;
1574
1575         srq = get_ocrdma_srq(ibsrq);
1576         status = ocrdma_mbx_query_srq(srq, srq_attr);
1577         return status;
1578 }
1579
1580 int ocrdma_destroy_srq(struct ib_srq *ibsrq)
1581 {
1582         int status;
1583         struct ocrdma_srq *srq;
1584         struct ocrdma_dev *dev;
1585
1586         srq = get_ocrdma_srq(ibsrq);
1587         dev = srq->dev;
1588
1589         status = ocrdma_mbx_destroy_srq(dev, srq);
1590
1591         if (srq->pd->uctx)
1592                 ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa, srq->rq.len);
1593
1594         kfree(srq->idx_bit_fields);
1595         kfree(srq->rqe_wr_id_tbl);
1596         kfree(srq);
1597         return status;
1598 }
1599
1600 /* unprivileged verbs and their support functions. */
1601 static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp,
1602                                 struct ocrdma_hdr_wqe *hdr,
1603                                 struct ib_send_wr *wr)
1604 {
1605         struct ocrdma_ewqe_ud_hdr *ud_hdr =
1606                 (struct ocrdma_ewqe_ud_hdr *)(hdr + 1);
1607         struct ocrdma_ah *ah = get_ocrdma_ah(wr->wr.ud.ah);
1608
1609         ud_hdr->rsvd_dest_qpn = wr->wr.ud.remote_qpn;
1610         if (qp->qp_type == IB_QPT_GSI)
1611                 ud_hdr->qkey = qp->qkey;
1612         else
1613                 ud_hdr->qkey = wr->wr.ud.remote_qkey;
1614         ud_hdr->rsvd_ahid = ah->id;
1615 }
1616
1617 static void ocrdma_build_sges(struct ocrdma_hdr_wqe *hdr,
1618                               struct ocrdma_sge *sge, int num_sge,
1619                               struct ib_sge *sg_list)
1620 {
1621         int i;
1622
1623         for (i = 0; i < num_sge; i++) {
1624                 sge[i].lrkey = sg_list[i].lkey;
1625                 sge[i].addr_lo = sg_list[i].addr;
1626                 sge[i].addr_hi = upper_32_bits(sg_list[i].addr);
1627                 sge[i].len = sg_list[i].length;
1628                 hdr->total_len += sg_list[i].length;
1629         }
1630         if (num_sge == 0)
1631                 memset(sge, 0, sizeof(*sge));
1632 }
1633
1634 static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
1635                                     struct ocrdma_hdr_wqe *hdr,
1636                                     struct ocrdma_sge *sge,
1637                                     struct ib_send_wr *wr, u32 wqe_size)
1638 {
1639         if (wr->send_flags & IB_SEND_INLINE) {
1640                 if (wr->sg_list[0].length > qp->max_inline_data) {
1641                         pr_err("%s() supported_len=0x%x,\n"
1642                                " unspported len req=0x%x\n", __func__,
1643                                qp->max_inline_data, wr->sg_list[0].length);
1644                         return -EINVAL;
1645                 }
1646                 memcpy(sge,
1647                        (void *)(unsigned long)wr->sg_list[0].addr,
1648                        wr->sg_list[0].length);
1649                 hdr->total_len = wr->sg_list[0].length;
1650                 wqe_size += roundup(hdr->total_len, OCRDMA_WQE_ALIGN_BYTES);
1651                 hdr->cw |= (OCRDMA_TYPE_INLINE << OCRDMA_WQE_TYPE_SHIFT);
1652         } else {
1653                 ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
1654                 if (wr->num_sge)
1655                         wqe_size += (wr->num_sge * sizeof(struct ocrdma_sge));
1656                 else
1657                         wqe_size += sizeof(struct ocrdma_sge);
1658                 hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
1659         }
1660         hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
1661         return 0;
1662 }
1663
1664 static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
1665                              struct ib_send_wr *wr)
1666 {
1667         int status;
1668         struct ocrdma_sge *sge;
1669         u32 wqe_size = sizeof(*hdr);
1670
1671         if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
1672                 ocrdma_build_ud_hdr(qp, hdr, wr);
1673                 sge = (struct ocrdma_sge *)(hdr + 2);
1674                 wqe_size += sizeof(struct ocrdma_ewqe_ud_hdr);
1675         } else
1676                 sge = (struct ocrdma_sge *)(hdr + 1);
1677
1678         status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
1679         return status;
1680 }
1681
1682 static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
1683                               struct ib_send_wr *wr)
1684 {
1685         int status;
1686         struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
1687         struct ocrdma_sge *sge = ext_rw + 1;
1688         u32 wqe_size = sizeof(*hdr) + sizeof(*ext_rw);
1689
1690         status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
1691         if (status)
1692                 return status;
1693         ext_rw->addr_lo = wr->wr.rdma.remote_addr;
1694         ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr);
1695         ext_rw->lrkey = wr->wr.rdma.rkey;
1696         ext_rw->len = hdr->total_len;
1697         return 0;
1698 }
1699
1700 static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
1701                               struct ib_send_wr *wr)
1702 {
1703         struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
1704         struct ocrdma_sge *sge = ext_rw + 1;
1705         u32 wqe_size = ((wr->num_sge + 1) * sizeof(struct ocrdma_sge)) +
1706             sizeof(struct ocrdma_hdr_wqe);
1707
1708         ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
1709         hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
1710         hdr->cw |= (OCRDMA_READ << OCRDMA_WQE_OPCODE_SHIFT);
1711         hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
1712
1713         ext_rw->addr_lo = wr->wr.rdma.remote_addr;
1714         ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr);
1715         ext_rw->lrkey = wr->wr.rdma.rkey;
1716         ext_rw->len = hdr->total_len;
1717 }
1718
1719 static void ocrdma_ring_sq_db(struct ocrdma_qp *qp)
1720 {
1721         u32 val = qp->sq.dbid | (1 << 16);
1722
1723         iowrite32(val, qp->sq_db);
1724 }
1725
1726 int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1727                      struct ib_send_wr **bad_wr)
1728 {
1729         int status = 0;
1730         struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
1731         struct ocrdma_hdr_wqe *hdr;
1732         unsigned long flags;
1733
1734         spin_lock_irqsave(&qp->q_lock, flags);
1735         if (qp->state != OCRDMA_QPS_RTS && qp->state != OCRDMA_QPS_SQD) {
1736                 spin_unlock_irqrestore(&qp->q_lock, flags);
1737                 *bad_wr = wr;
1738                 return -EINVAL;
1739         }
1740
1741         while (wr) {
1742                 if (ocrdma_hwq_free_cnt(&qp->sq) == 0 ||
1743                     wr->num_sge > qp->sq.max_sges) {
1744                         *bad_wr = wr;
1745                         status = -ENOMEM;
1746                         break;
1747                 }
1748                 hdr = ocrdma_hwq_head(&qp->sq);
1749                 hdr->cw = 0;
1750                 if (wr->send_flags & IB_SEND_SIGNALED)
1751                         hdr->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT);
1752                 if (wr->send_flags & IB_SEND_FENCE)
1753                         hdr->cw |=
1754                             (OCRDMA_FLAG_FENCE_L << OCRDMA_WQE_FLAGS_SHIFT);
1755                 if (wr->send_flags & IB_SEND_SOLICITED)
1756                         hdr->cw |=
1757                             (OCRDMA_FLAG_SOLICIT << OCRDMA_WQE_FLAGS_SHIFT);
1758                 hdr->total_len = 0;
1759                 switch (wr->opcode) {
1760                 case IB_WR_SEND_WITH_IMM:
1761                         hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT);
1762                         hdr->immdt = ntohl(wr->ex.imm_data);
1763                 case IB_WR_SEND:
1764                         hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT);
1765                         ocrdma_build_send(qp, hdr, wr);
1766                         break;
1767                 case IB_WR_SEND_WITH_INV:
1768                         hdr->cw |= (OCRDMA_FLAG_INV << OCRDMA_WQE_FLAGS_SHIFT);
1769                         hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT);
1770                         hdr->lkey = wr->ex.invalidate_rkey;
1771                         status = ocrdma_build_send(qp, hdr, wr);
1772                         break;
1773                 case IB_WR_RDMA_WRITE_WITH_IMM:
1774                         hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT);
1775                         hdr->immdt = ntohl(wr->ex.imm_data);
1776                 case IB_WR_RDMA_WRITE:
1777                         hdr->cw |= (OCRDMA_WRITE << OCRDMA_WQE_OPCODE_SHIFT);
1778                         status = ocrdma_build_write(qp, hdr, wr);
1779                         break;
1780                 case IB_WR_RDMA_READ_WITH_INV:
1781                         hdr->cw |= (OCRDMA_FLAG_INV << OCRDMA_WQE_FLAGS_SHIFT);
1782                 case IB_WR_RDMA_READ:
1783                         ocrdma_build_read(qp, hdr, wr);
1784                         break;
1785                 case IB_WR_LOCAL_INV:
1786                         hdr->cw |=
1787                             (OCRDMA_LKEY_INV << OCRDMA_WQE_OPCODE_SHIFT);
1788                         hdr->cw |= (sizeof(struct ocrdma_hdr_wqe) /
1789                                 OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT;
1790                         hdr->lkey = wr->ex.invalidate_rkey;
1791                         break;
1792                 default:
1793                         status = -EINVAL;
1794                         break;
1795                 }
1796                 if (status) {
1797                         *bad_wr = wr;
1798                         break;
1799                 }
1800                 if (wr->send_flags & IB_SEND_SIGNALED)
1801                         qp->wqe_wr_id_tbl[qp->sq.head].signaled = 1;
1802                 else
1803                         qp->wqe_wr_id_tbl[qp->sq.head].signaled = 0;
1804                 qp->wqe_wr_id_tbl[qp->sq.head].wrid = wr->wr_id;
1805                 ocrdma_cpu_to_le32(hdr, ((hdr->cw >> OCRDMA_WQE_SIZE_SHIFT) &
1806                                    OCRDMA_WQE_SIZE_MASK) * OCRDMA_WQE_STRIDE);
1807                 /* make sure wqe is written before adapter can access it */
1808                 wmb();
1809                 /* inform hw to start processing it */
1810                 ocrdma_ring_sq_db(qp);
1811
1812                 /* update pointer, counter for next wr */
1813                 ocrdma_hwq_inc_head(&qp->sq);
1814                 wr = wr->next;
1815         }
1816         spin_unlock_irqrestore(&qp->q_lock, flags);
1817         return status;
1818 }
1819
1820 static void ocrdma_ring_rq_db(struct ocrdma_qp *qp)
1821 {
1822         u32 val = qp->rq.dbid | (1 << ocrdma_get_num_posted_shift(qp));
1823
1824         iowrite32(val, qp->rq_db);
1825 }
1826
1827 static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr,
1828                              u16 tag)
1829 {
1830         u32 wqe_size = 0;
1831         struct ocrdma_sge *sge;
1832         if (wr->num_sge)
1833                 wqe_size = (wr->num_sge * sizeof(*sge)) + sizeof(*rqe);
1834         else
1835                 wqe_size = sizeof(*sge) + sizeof(*rqe);
1836
1837         rqe->cw = ((wqe_size / OCRDMA_WQE_STRIDE) <<
1838                                 OCRDMA_WQE_SIZE_SHIFT);
1839         rqe->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT);
1840         rqe->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
1841         rqe->total_len = 0;
1842         rqe->rsvd_tag = tag;
1843         sge = (struct ocrdma_sge *)(rqe + 1);
1844         ocrdma_build_sges(rqe, sge, wr->num_sge, wr->sg_list);
1845         ocrdma_cpu_to_le32(rqe, wqe_size);
1846 }
1847
1848 int ocrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1849                      struct ib_recv_wr **bad_wr)
1850 {
1851         int status = 0;
1852         unsigned long flags;
1853         struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
1854         struct ocrdma_hdr_wqe *rqe;
1855
1856         spin_lock_irqsave(&qp->q_lock, flags);
1857         if (qp->state == OCRDMA_QPS_RST || qp->state == OCRDMA_QPS_ERR) {
1858                 spin_unlock_irqrestore(&qp->q_lock, flags);
1859                 *bad_wr = wr;
1860                 return -EINVAL;
1861         }
1862         while (wr) {
1863                 if (ocrdma_hwq_free_cnt(&qp->rq) == 0 ||
1864                     wr->num_sge > qp->rq.max_sges) {
1865                         *bad_wr = wr;
1866                         status = -ENOMEM;
1867                         break;
1868                 }
1869                 rqe = ocrdma_hwq_head(&qp->rq);
1870                 ocrdma_build_rqe(rqe, wr, 0);
1871
1872                 qp->rqe_wr_id_tbl[qp->rq.head] = wr->wr_id;
1873                 /* make sure rqe is written before adapter can access it */
1874                 wmb();
1875
1876                 /* inform hw to start processing it */
1877                 ocrdma_ring_rq_db(qp);
1878
1879                 /* update pointer, counter for next wr */
1880                 ocrdma_hwq_inc_head(&qp->rq);
1881                 wr = wr->next;
1882         }
1883         spin_unlock_irqrestore(&qp->q_lock, flags);
1884         return status;
1885 }
1886
1887 /* cqe for srq's rqe can potentially arrive out of order.
1888  * index gives the entry in the shadow table where to store
1889  * the wr_id. tag/index is returned in cqe to reference back
1890  * for a given rqe.
1891  */
1892 static int ocrdma_srq_get_idx(struct ocrdma_srq *srq)
1893 {
1894         int row = 0;
1895         int indx = 0;
1896
1897         for (row = 0; row < srq->bit_fields_len; row++) {
1898                 if (srq->idx_bit_fields[row]) {
1899                         indx = ffs(srq->idx_bit_fields[row]);
1900                         indx = (row * 32) + (indx - 1);
1901                         if (indx >= srq->rq.max_cnt)
1902                                 BUG();
1903                         ocrdma_srq_toggle_bit(srq, indx);
1904                         break;
1905                 }
1906         }
1907
1908         if (row == srq->bit_fields_len)
1909                 BUG();
1910         return indx;
1911 }
1912
1913 static void ocrdma_ring_srq_db(struct ocrdma_srq *srq)
1914 {
1915         u32 val = srq->rq.dbid | (1 << 16);
1916
1917         iowrite32(val, srq->db + OCRDMA_DB_GEN2_SRQ_OFFSET);
1918 }
1919
1920 int ocrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
1921                          struct ib_recv_wr **bad_wr)
1922 {
1923         int status = 0;
1924         unsigned long flags;
1925         struct ocrdma_srq *srq;
1926         struct ocrdma_hdr_wqe *rqe;
1927         u16 tag;
1928
1929         srq = get_ocrdma_srq(ibsrq);
1930
1931         spin_lock_irqsave(&srq->q_lock, flags);
1932         while (wr) {
1933                 if (ocrdma_hwq_free_cnt(&srq->rq) == 0 ||
1934                     wr->num_sge > srq->rq.max_sges) {
1935                         status = -ENOMEM;
1936                         *bad_wr = wr;
1937                         break;
1938                 }
1939                 tag = ocrdma_srq_get_idx(srq);
1940                 rqe = ocrdma_hwq_head(&srq->rq);
1941                 ocrdma_build_rqe(rqe, wr, tag);
1942
1943                 srq->rqe_wr_id_tbl[tag] = wr->wr_id;
1944                 /* make sure rqe is written before adapter can perform DMA */
1945                 wmb();
1946                 /* inform hw to start processing it */
1947                 ocrdma_ring_srq_db(srq);
1948                 /* update pointer, counter for next wr */
1949                 ocrdma_hwq_inc_head(&srq->rq);
1950                 wr = wr->next;
1951         }
1952         spin_unlock_irqrestore(&srq->q_lock, flags);
1953         return status;
1954 }
1955
1956 static enum ib_wc_status ocrdma_to_ibwc_err(u16 status)
1957 {
1958         enum ib_wc_status ibwc_status = IB_WC_GENERAL_ERR;
1959
1960         switch (status) {
1961         case OCRDMA_CQE_GENERAL_ERR:
1962                 ibwc_status = IB_WC_GENERAL_ERR;
1963                 break;
1964         case OCRDMA_CQE_LOC_LEN_ERR:
1965                 ibwc_status = IB_WC_LOC_LEN_ERR;
1966                 break;
1967         case OCRDMA_CQE_LOC_QP_OP_ERR:
1968                 ibwc_status = IB_WC_LOC_QP_OP_ERR;
1969                 break;
1970         case OCRDMA_CQE_LOC_EEC_OP_ERR:
1971                 ibwc_status = IB_WC_LOC_EEC_OP_ERR;
1972                 break;
1973         case OCRDMA_CQE_LOC_PROT_ERR:
1974                 ibwc_status = IB_WC_LOC_PROT_ERR;
1975                 break;
1976         case OCRDMA_CQE_WR_FLUSH_ERR:
1977                 ibwc_status = IB_WC_WR_FLUSH_ERR;
1978                 break;
1979         case OCRDMA_CQE_MW_BIND_ERR:
1980                 ibwc_status = IB_WC_MW_BIND_ERR;
1981                 break;
1982         case OCRDMA_CQE_BAD_RESP_ERR:
1983                 ibwc_status = IB_WC_BAD_RESP_ERR;
1984                 break;
1985         case OCRDMA_CQE_LOC_ACCESS_ERR:
1986                 ibwc_status = IB_WC_LOC_ACCESS_ERR;
1987                 break;
1988         case OCRDMA_CQE_REM_INV_REQ_ERR:
1989                 ibwc_status = IB_WC_REM_INV_REQ_ERR;
1990                 break;
1991         case OCRDMA_CQE_REM_ACCESS_ERR:
1992                 ibwc_status = IB_WC_REM_ACCESS_ERR;
1993                 break;
1994         case OCRDMA_CQE_REM_OP_ERR:
1995                 ibwc_status = IB_WC_REM_OP_ERR;
1996                 break;
1997         case OCRDMA_CQE_RETRY_EXC_ERR:
1998                 ibwc_status = IB_WC_RETRY_EXC_ERR;
1999                 break;
2000         case OCRDMA_CQE_RNR_RETRY_EXC_ERR:
2001                 ibwc_status = IB_WC_RNR_RETRY_EXC_ERR;
2002                 break;
2003         case OCRDMA_CQE_LOC_RDD_VIOL_ERR:
2004                 ibwc_status = IB_WC_LOC_RDD_VIOL_ERR;
2005                 break;
2006         case OCRDMA_CQE_REM_INV_RD_REQ_ERR:
2007                 ibwc_status = IB_WC_REM_INV_RD_REQ_ERR;
2008                 break;
2009         case OCRDMA_CQE_REM_ABORT_ERR:
2010                 ibwc_status = IB_WC_REM_ABORT_ERR;
2011                 break;
2012         case OCRDMA_CQE_INV_EECN_ERR:
2013                 ibwc_status = IB_WC_INV_EECN_ERR;
2014                 break;
2015         case OCRDMA_CQE_INV_EEC_STATE_ERR:
2016                 ibwc_status = IB_WC_INV_EEC_STATE_ERR;
2017                 break;
2018         case OCRDMA_CQE_FATAL_ERR:
2019                 ibwc_status = IB_WC_FATAL_ERR;
2020                 break;
2021         case OCRDMA_CQE_RESP_TIMEOUT_ERR:
2022                 ibwc_status = IB_WC_RESP_TIMEOUT_ERR;
2023                 break;
2024         default:
2025                 ibwc_status = IB_WC_GENERAL_ERR;
2026                 break;
2027         };
2028         return ibwc_status;
2029 }
2030
2031 static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc,
2032                       u32 wqe_idx)
2033 {
2034         struct ocrdma_hdr_wqe *hdr;
2035         struct ocrdma_sge *rw;
2036         int opcode;
2037
2038         hdr = ocrdma_hwq_head_from_idx(&qp->sq, wqe_idx);
2039
2040         ibwc->wr_id = qp->wqe_wr_id_tbl[wqe_idx].wrid;
2041         /* Undo the hdr->cw swap */
2042         opcode = le32_to_cpu(hdr->cw) & OCRDMA_WQE_OPCODE_MASK;
2043         switch (opcode) {
2044         case OCRDMA_WRITE:
2045                 ibwc->opcode = IB_WC_RDMA_WRITE;
2046                 break;
2047         case OCRDMA_READ:
2048                 rw = (struct ocrdma_sge *)(hdr + 1);
2049                 ibwc->opcode = IB_WC_RDMA_READ;
2050                 ibwc->byte_len = rw->len;
2051                 break;
2052         case OCRDMA_SEND:
2053                 ibwc->opcode = IB_WC_SEND;
2054                 break;
2055         case OCRDMA_LKEY_INV:
2056                 ibwc->opcode = IB_WC_LOCAL_INV;
2057                 break;
2058         default:
2059                 ibwc->status = IB_WC_GENERAL_ERR;
2060                 pr_err("%s() invalid opcode received = 0x%x\n",
2061                        __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK);
2062                 break;
2063         };
2064 }
2065
2066 static void ocrdma_set_cqe_status_flushed(struct ocrdma_qp *qp,
2067                                                 struct ocrdma_cqe *cqe)
2068 {
2069         if (is_cqe_for_sq(cqe)) {
2070                 cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2071                                 cqe->flags_status_srcqpn) &
2072                                         ~OCRDMA_CQE_STATUS_MASK);
2073                 cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2074                                 cqe->flags_status_srcqpn) |
2075                                 (OCRDMA_CQE_WR_FLUSH_ERR <<
2076                                         OCRDMA_CQE_STATUS_SHIFT));
2077         } else {
2078                 if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
2079                         cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2080                                         cqe->flags_status_srcqpn) &
2081                                                 ~OCRDMA_CQE_UD_STATUS_MASK);
2082                         cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2083                                         cqe->flags_status_srcqpn) |
2084                                         (OCRDMA_CQE_WR_FLUSH_ERR <<
2085                                                 OCRDMA_CQE_UD_STATUS_SHIFT));
2086                 } else {
2087                         cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2088                                         cqe->flags_status_srcqpn) &
2089                                                 ~OCRDMA_CQE_STATUS_MASK);
2090                         cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2091                                         cqe->flags_status_srcqpn) |
2092                                         (OCRDMA_CQE_WR_FLUSH_ERR <<
2093                                                 OCRDMA_CQE_STATUS_SHIFT));
2094                 }
2095         }
2096 }
2097
2098 static bool ocrdma_update_err_cqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
2099                                   struct ocrdma_qp *qp, int status)
2100 {
2101         bool expand = false;
2102
2103         ibwc->byte_len = 0;
2104         ibwc->qp = &qp->ibqp;
2105         ibwc->status = ocrdma_to_ibwc_err(status);
2106
2107         ocrdma_flush_qp(qp);
2108         ocrdma_qp_state_machine(qp, IB_QPS_ERR, NULL);
2109
2110         /* if wqe/rqe pending for which cqe needs to be returned,
2111          * trigger inflating it.
2112          */
2113         if (!is_hw_rq_empty(qp) || !is_hw_sq_empty(qp)) {
2114                 expand = true;
2115                 ocrdma_set_cqe_status_flushed(qp, cqe);
2116         }
2117         return expand;
2118 }
2119
2120 static int ocrdma_update_err_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
2121                                   struct ocrdma_qp *qp, int status)
2122 {
2123         ibwc->opcode = IB_WC_RECV;
2124         ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
2125         ocrdma_hwq_inc_tail(&qp->rq);
2126
2127         return ocrdma_update_err_cqe(ibwc, cqe, qp, status);
2128 }
2129
2130 static int ocrdma_update_err_scqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
2131                                   struct ocrdma_qp *qp, int status)
2132 {
2133         ocrdma_update_wc(qp, ibwc, qp->sq.tail);
2134         ocrdma_hwq_inc_tail(&qp->sq);
2135
2136         return ocrdma_update_err_cqe(ibwc, cqe, qp, status);
2137 }
2138
2139
2140 static bool ocrdma_poll_err_scqe(struct ocrdma_qp *qp,
2141                                  struct ocrdma_cqe *cqe, struct ib_wc *ibwc,
2142                                  bool *polled, bool *stop)
2143 {
2144         bool expand;
2145         int status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2146                 OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
2147
2148         /* when hw sq is empty, but rq is not empty, so we continue
2149          * to keep the cqe in order to get the cq event again.
2150          */
2151         if (is_hw_sq_empty(qp) && !is_hw_rq_empty(qp)) {
2152                 /* when cq for rq and sq is same, it is safe to return
2153                  * flush cqe for RQEs.
2154                  */
2155                 if (!qp->srq && (qp->sq_cq == qp->rq_cq)) {
2156                         *polled = true;
2157                         status = OCRDMA_CQE_WR_FLUSH_ERR;
2158                         expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
2159                 } else {
2160                         /* stop processing further cqe as this cqe is used for
2161                          * triggering cq event on buddy cq of RQ.
2162                          * When QP is destroyed, this cqe will be removed
2163                          * from the cq's hardware q.
2164                          */
2165                         *polled = false;
2166                         *stop = true;
2167                         expand = false;
2168                 }
2169         } else {
2170                 *polled = true;
2171                 expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
2172         }
2173         return expand;
2174 }
2175
2176 static bool ocrdma_poll_success_scqe(struct ocrdma_qp *qp,
2177                                      struct ocrdma_cqe *cqe,
2178                                      struct ib_wc *ibwc, bool *polled)
2179 {
2180         bool expand = false;
2181         int tail = qp->sq.tail;
2182         u32 wqe_idx;
2183
2184         if (!qp->wqe_wr_id_tbl[tail].signaled) {
2185                 *polled = false;    /* WC cannot be consumed yet */
2186         } else {
2187                 ibwc->status = IB_WC_SUCCESS;
2188                 ibwc->wc_flags = 0;
2189                 ibwc->qp = &qp->ibqp;
2190                 ocrdma_update_wc(qp, ibwc, tail);
2191                 *polled = true;
2192         }
2193         wqe_idx = le32_to_cpu(cqe->wq.wqeidx) & OCRDMA_CQE_WQEIDX_MASK;
2194         if (tail != wqe_idx)
2195                 expand = true; /* Coalesced CQE can't be consumed yet */
2196
2197         ocrdma_hwq_inc_tail(&qp->sq);
2198         return expand;
2199 }
2200
2201 static bool ocrdma_poll_scqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2202                              struct ib_wc *ibwc, bool *polled, bool *stop)
2203 {
2204         int status;
2205         bool expand;
2206
2207         status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2208                 OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
2209
2210         if (status == OCRDMA_CQE_SUCCESS)
2211                 expand = ocrdma_poll_success_scqe(qp, cqe, ibwc, polled);
2212         else
2213                 expand = ocrdma_poll_err_scqe(qp, cqe, ibwc, polled, stop);
2214         return expand;
2215 }
2216
2217 static int ocrdma_update_ud_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe)
2218 {
2219         int status;
2220
2221         status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2222                 OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT;
2223         ibwc->src_qp = le32_to_cpu(cqe->flags_status_srcqpn) &
2224                                                 OCRDMA_CQE_SRCQP_MASK;
2225         ibwc->pkey_index = le32_to_cpu(cqe->ud.rxlen_pkey) &
2226                                                 OCRDMA_CQE_PKEY_MASK;
2227         ibwc->wc_flags = IB_WC_GRH;
2228         ibwc->byte_len = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
2229                                         OCRDMA_CQE_UD_XFER_LEN_SHIFT);
2230         return status;
2231 }
2232
2233 static void ocrdma_update_free_srq_cqe(struct ib_wc *ibwc,
2234                                        struct ocrdma_cqe *cqe,
2235                                        struct ocrdma_qp *qp)
2236 {
2237         unsigned long flags;
2238         struct ocrdma_srq *srq;
2239         u32 wqe_idx;
2240
2241         srq = get_ocrdma_srq(qp->ibqp.srq);
2242         wqe_idx = le32_to_cpu(cqe->rq.buftag_qpn) >> OCRDMA_CQE_BUFTAG_SHIFT;
2243         ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx];
2244         spin_lock_irqsave(&srq->q_lock, flags);
2245         ocrdma_srq_toggle_bit(srq, wqe_idx);
2246         spin_unlock_irqrestore(&srq->q_lock, flags);
2247         ocrdma_hwq_inc_tail(&srq->rq);
2248 }
2249
2250 static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2251                                 struct ib_wc *ibwc, bool *polled, bool *stop,
2252                                 int status)
2253 {
2254         bool expand;
2255
2256         /* when hw_rq is empty, but wq is not empty, so continue
2257          * to keep the cqe to get the cq event again.
2258          */
2259         if (is_hw_rq_empty(qp) && !is_hw_sq_empty(qp)) {
2260                 if (!qp->srq && (qp->sq_cq == qp->rq_cq)) {
2261                         *polled = true;
2262                         status = OCRDMA_CQE_WR_FLUSH_ERR;
2263                         expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
2264                 } else {
2265                         *polled = false;
2266                         *stop = true;
2267                         expand = false;
2268                 }
2269         } else {
2270                 *polled = true;
2271                 expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
2272         }
2273         return expand;
2274 }
2275
2276 static void ocrdma_poll_success_rcqe(struct ocrdma_qp *qp,
2277                                      struct ocrdma_cqe *cqe, struct ib_wc *ibwc)
2278 {
2279         ibwc->opcode = IB_WC_RECV;
2280         ibwc->qp = &qp->ibqp;
2281         ibwc->status = IB_WC_SUCCESS;
2282
2283         if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI)
2284                 ocrdma_update_ud_rcqe(ibwc, cqe);
2285         else
2286                 ibwc->byte_len = le32_to_cpu(cqe->rq.rxlen);
2287
2288         if (is_cqe_imm(cqe)) {
2289                 ibwc->ex.imm_data = htonl(le32_to_cpu(cqe->rq.lkey_immdt));
2290                 ibwc->wc_flags |= IB_WC_WITH_IMM;
2291         } else if (is_cqe_wr_imm(cqe)) {
2292                 ibwc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
2293                 ibwc->ex.imm_data = htonl(le32_to_cpu(cqe->rq.lkey_immdt));
2294                 ibwc->wc_flags |= IB_WC_WITH_IMM;
2295         } else if (is_cqe_invalidated(cqe)) {
2296                 ibwc->ex.invalidate_rkey = le32_to_cpu(cqe->rq.lkey_immdt);
2297                 ibwc->wc_flags |= IB_WC_WITH_INVALIDATE;
2298         }
2299         if (qp->ibqp.srq)
2300                 ocrdma_update_free_srq_cqe(ibwc, cqe, qp);
2301         else {
2302                 ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
2303                 ocrdma_hwq_inc_tail(&qp->rq);
2304         }
2305 }
2306
2307 static bool ocrdma_poll_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2308                              struct ib_wc *ibwc, bool *polled, bool *stop)
2309 {
2310         int status;
2311         bool expand = false;
2312
2313         ibwc->wc_flags = 0;
2314         if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI)
2315                 status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2316                                         OCRDMA_CQE_UD_STATUS_MASK) >>
2317                                         OCRDMA_CQE_UD_STATUS_SHIFT;
2318         else
2319                 status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2320                              OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
2321
2322         if (status == OCRDMA_CQE_SUCCESS) {
2323                 *polled = true;
2324                 ocrdma_poll_success_rcqe(qp, cqe, ibwc);
2325         } else {
2326                 expand = ocrdma_poll_err_rcqe(qp, cqe, ibwc, polled, stop,
2327                                               status);
2328         }
2329         return expand;
2330 }
2331
2332 static void ocrdma_change_cq_phase(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe,
2333                                    u16 cur_getp)
2334 {
2335         if (cq->phase_change) {
2336                 if (cur_getp == 0)
2337                         cq->phase = (~cq->phase & OCRDMA_CQE_VALID);
2338         } else
2339                 /* clear valid bit */
2340                 cqe->flags_status_srcqpn = 0;
2341 }
2342
2343 static int ocrdma_poll_hwcq(struct ocrdma_cq *cq, int num_entries,
2344                             struct ib_wc *ibwc)
2345 {
2346         u16 qpn = 0;
2347         int i = 0;
2348         bool expand = false;
2349         int polled_hw_cqes = 0;
2350         struct ocrdma_qp *qp = NULL;
2351         struct ocrdma_dev *dev = cq->dev;
2352         struct ocrdma_cqe *cqe;
2353         u16 cur_getp; bool polled = false; bool stop = false;
2354
2355         cur_getp = cq->getp;
2356         while (num_entries) {
2357                 cqe = cq->va + cur_getp;
2358                 /* check whether valid cqe or not */
2359                 if (!is_cqe_valid(cq, cqe))
2360                         break;
2361                 qpn = (le32_to_cpu(cqe->cmn.qpn) & OCRDMA_CQE_QPN_MASK);
2362                 /* ignore discarded cqe */
2363                 if (qpn == 0)
2364                         goto skip_cqe;
2365                 qp = dev->qp_tbl[qpn];
2366                 BUG_ON(qp == NULL);
2367
2368                 if (is_cqe_for_sq(cqe)) {
2369                         expand = ocrdma_poll_scqe(qp, cqe, ibwc, &polled,
2370                                                   &stop);
2371                 } else {
2372                         expand = ocrdma_poll_rcqe(qp, cqe, ibwc, &polled,
2373                                                   &stop);
2374                 }
2375                 if (expand)
2376                         goto expand_cqe;
2377                 if (stop)
2378                         goto stop_cqe;
2379                 /* clear qpn to avoid duplicate processing by discard_cqe() */
2380                 cqe->cmn.qpn = 0;
2381 skip_cqe:
2382                 polled_hw_cqes += 1;
2383                 cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
2384                 ocrdma_change_cq_phase(cq, cqe, cur_getp);
2385 expand_cqe:
2386                 if (polled) {
2387                         num_entries -= 1;
2388                         i += 1;
2389                         ibwc = ibwc + 1;
2390                         polled = false;
2391                 }
2392         }
2393 stop_cqe:
2394         cq->getp = cur_getp;
2395         if (polled_hw_cqes || expand || stop) {
2396                 ocrdma_ring_cq_db(dev, cq->id, cq->armed, cq->solicited,
2397                                   polled_hw_cqes);
2398         }
2399         return i;
2400 }
2401
2402 /* insert error cqe if the QP's SQ or RQ's CQ matches the CQ under poll. */
2403 static int ocrdma_add_err_cqe(struct ocrdma_cq *cq, int num_entries,
2404                               struct ocrdma_qp *qp, struct ib_wc *ibwc)
2405 {
2406         int err_cqes = 0;
2407
2408         while (num_entries) {
2409                 if (is_hw_sq_empty(qp) && is_hw_rq_empty(qp))
2410                         break;
2411                 if (!is_hw_sq_empty(qp) && qp->sq_cq == cq) {
2412                         ocrdma_update_wc(qp, ibwc, qp->sq.tail);
2413                         ocrdma_hwq_inc_tail(&qp->sq);
2414                 } else if (!is_hw_rq_empty(qp) && qp->rq_cq == cq) {
2415                         ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
2416                         ocrdma_hwq_inc_tail(&qp->rq);
2417                 } else
2418                         return err_cqes;
2419                 ibwc->byte_len = 0;
2420                 ibwc->status = IB_WC_WR_FLUSH_ERR;
2421                 ibwc = ibwc + 1;
2422                 err_cqes += 1;
2423                 num_entries -= 1;
2424         }
2425         return err_cqes;
2426 }
2427
2428 int ocrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
2429 {
2430         int cqes_to_poll = num_entries;
2431         struct ocrdma_cq *cq = NULL;
2432         unsigned long flags;
2433         struct ocrdma_dev *dev;
2434         int num_os_cqe = 0, err_cqes = 0;
2435         struct ocrdma_qp *qp;
2436
2437         cq = get_ocrdma_cq(ibcq);
2438         dev = cq->dev;
2439
2440         /* poll cqes from adapter CQ */
2441         spin_lock_irqsave(&cq->cq_lock, flags);
2442         num_os_cqe = ocrdma_poll_hwcq(cq, cqes_to_poll, wc);
2443         spin_unlock_irqrestore(&cq->cq_lock, flags);
2444         cqes_to_poll -= num_os_cqe;
2445
2446         if (cqes_to_poll) {
2447                 wc = wc + num_os_cqe;
2448                 /* adapter returns single error cqe when qp moves to
2449                  * error state. So insert error cqes with wc_status as
2450                  * FLUSHED for pending WQEs and RQEs of QP's SQ and RQ
2451                  * respectively which uses this CQ.
2452                  */
2453                 spin_lock_irqsave(&dev->flush_q_lock, flags);
2454                 list_for_each_entry(qp, &cq->sq_head, sq_entry) {
2455                         if (cqes_to_poll == 0)
2456                                 break;
2457                         err_cqes = ocrdma_add_err_cqe(cq, cqes_to_poll, qp, wc);
2458                         cqes_to_poll -= err_cqes;
2459                         num_os_cqe += err_cqes;
2460                         wc = wc + err_cqes;
2461                 }
2462                 spin_unlock_irqrestore(&dev->flush_q_lock, flags);
2463         }
2464         return num_os_cqe;
2465 }
2466
2467 int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
2468 {
2469         struct ocrdma_cq *cq;
2470         unsigned long flags;
2471         struct ocrdma_dev *dev;
2472         u16 cq_id;
2473         u16 cur_getp;
2474         struct ocrdma_cqe *cqe;
2475
2476         cq = get_ocrdma_cq(ibcq);
2477         cq_id = cq->id;
2478         dev = cq->dev;
2479
2480         spin_lock_irqsave(&cq->cq_lock, flags);
2481         if (cq_flags & IB_CQ_NEXT_COMP || cq_flags & IB_CQ_SOLICITED)
2482                 cq->armed = true;
2483         if (cq_flags & IB_CQ_SOLICITED)
2484                 cq->solicited = true;
2485
2486         cur_getp = cq->getp;
2487         cqe = cq->va + cur_getp;
2488
2489         /* check whether any valid cqe exist or not, if not then safe to
2490          * arm. If cqe is not yet consumed, then let it get consumed and then
2491          * we arm it to avoid false interrupts.
2492          */
2493         if (!is_cqe_valid(cq, cqe) || cq->arm_needed) {
2494                 cq->arm_needed = false;
2495                 ocrdma_ring_cq_db(dev, cq_id, cq->armed, cq->solicited, 0);
2496         }
2497         spin_unlock_irqrestore(&cq->cq_lock, flags);
2498         return 0;
2499 }