public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
* Segmentation Fault when accessing QPN of a RC Queue pair
@ 2011-03-23 12:55 Konstantin Boyanov
       [not found] ` <4D89EDC4.4050309-T5F83Mi6MZE@public.gmane.org>
  0 siblings, 1 reply; 5+ messages in thread
From: Konstantin Boyanov @ 2011-03-23 12:55 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA

Hello list,

I have a very peculiar problem with a simple code of mine. In the last 
week I am trying to bring up a simple RC-based programm to run and 
exchange a single message between two peers, but right now I am stuck 
with a segmentation fault when accessing one of the QPs QPN. It is a 
peculiar problem because I create two identical QPs (one on the sending 
peer and one on the receiving peer), and when I access the fields of the 
one queue pair structure I got no problems, but when I do this on the 
other I get a segmentation fault.

I am trying to create a reliable connection between two QPs residing on 
the same HCA and the send messages via the loopback mechanism.

I first open the HCA and init the corresponding context with it:


<CODE>
static void gpeIBopenDev(ib_thread_info *ibthr, int verbose){

    struct ibv_device **infband_dev_list;
    struct ibv_device_attr ibdev_attr;
    int ret;

    infband_dev_list = ibv_get_device_list(NULL);
    if(!infband_dev_list)
        perror("ibv_get_device_list");

    if(infband_dev_list[0] != NULL){
        ibthr->ibdev = infband_dev_list[0];
    }else
        printf("Error: No IB device found!\n");

    ibthr->ibctx = ibv_open_device(ibthr->ibdev);
    if(!ibthr->ibctx)
        perror("ibv_open_device");
}
</CODE>


Then I allocate memory buffers and create protection domains, create a 
protection domain to be associated with the QPs an at last create the 
memory regions which the QPs will be using:


<CODE>
static void gpeIBinitMemory(ib_thread_info *ibthr, bench_args_t *barg){

      static long int pg_sz;
      pg_sz = sysconf(_SC_PAGESIZE);

      if (barg->conn_type_arg==CONN_TYPE_UD) {
        ibthr->buf = memalign(pg_sz, ( barg->tx_byte_sz_arg + 40 ) * 2);
        if (!ibthr->buf) {
          printf("Could not allocate buffer.\n");
          perror("memalign");
          exit(1);
        }
        memset(ibthr->buf, 0, ( barg->tx_byte_sz_arg + 40 ) * 2);
      } else {
        ibthr->buf = memalign(pg_sz, barg->tx_byte_sz_arg*2);
        if (!ibthr->buf) {
          printf("Could not allocate buffer.\n");
          perror("memalign");
          exit(1);
        }
        memset(ibthr->buf, 0, barg->tx_byte_sz_arg*2);
      }

      ibthr->ib_prot_domain =  ibv_alloc_pd(ibthr->ibctx);
      if(!ibthr->ib_prot_domain){
          perror("ibv_alloc_pd");
          exit(1);
      }

      if(barg->verbose == 1)
          printf("Initialize the inbound and outbound context buffers \n");

      ibthr->out_data = (char *)ibthr->buf + (barg->tx_byte_sz_arg-1);
      ibthr->in_data  = (char *)ibthr->buf + (barg->tx_byte_sz_arg-1)*2;

      if(barg->verbose == 1)
          printf("initialize memory region (MR)\n");

      if (barg->conn_type_arg==CONN_TYPE_UD) {
        ibthr->mr = ibv_reg_mr(ibthr->ib_prot_domain, ibthr->buf, 
(barg->tx_byte_sz_arg+40)*2,
                      IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);
      }else{
        ibthr->mr = ibv_reg_mr(ibthr->ib_prot_domain, ibthr->buf, 
(barg->tx_byte_sz_arg)*2,
                      IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);
      }
      if(!ibthr->mr)
        perror("ibv_reg_mr");
}
</CODE>


After this being successfully done,  I create and initialize the 
completition channels for this QP context's Queue Pair, create the 
uninitialized Queue Pairs themselves.


<CODE>
static void gpeIBcreateQP(ib_thread_info *ibthr, bench_args_t *barg){

    struct ibv_qp_init_attr qp_init_attr;
    memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr));

    // fill ibqp_ini_attr with needed values
    qp_init_attr.cap.max_send_sge = 0;
    qp_init_attr.cap.max_recv_sge = 0;
    qp_init_attr.cap.max_send_wr = barg->tx_dpth_arg;
    qp_init_attr.cap.max_recv_wr = barg->tx_dpth_arg;

    qp_init_attr.sq_sig_all = 0;

    switch (barg->conn_type_arg) {
      case CONN_TYPE_RC :
          qp_init_attr.qp_type = IBV_QPT_RC;
          break;
      case CONN_TYPE_UC :
          qp_init_attr.qp_type = IBV_QPT_UC;
          break;
      case CONN_TYPE_UD :
          qp_init_attr.qp_type = IBV_QPT_UD;
          break;
      default:
          printf("Unknown connection type %d \n",barg->conn_type_arg);
          exit(1);
    }

    // First create an uninitialized instance of the Queue Pairs
    if(barg->verbose == 1)
        printf(" Initialize completion channel \n");

    ibthr->ibcompl_ch = ibv_create_comp_channel(ibthr->ibctx);
    if(!ibthr->ibcompl_ch)
      perror("ibv_create_comp_channel");

    if(barg->verbose == 1)
      printf(" Create the Completiotion Queues (CQs) \n");

    ibthr->send_cq = ibv_create_cq(ibthr->ibctx, barg->tx_dpth_arg, 
NULL, ibthr->ibcompl_ch, 0);
    if(!ibthr->send_cq)
      perror("ibv_create_cq");

    qp_init_attr.send_cq = ibthr->send_cq;

    ibthr->recv_cq = ibv_create_cq(ibthr->ibctx, barg->tx_dpth_arg, 
NULL, ibthr->ibcompl_ch, 0);
    if(!ibthr->recv_cq )
      perror("ibv_create_cq");

    qp_init_attr.recv_cq = ibthr->recv_cq;


    ibthr->qp = ibv_create_qp(ibthr->ib_prot_domain, &qp_init_attr);
    if (!ibthr->qp) {
        perror("ibv_create_qp");
        fprintf(stderr, "Couldn't create QP, %p\n", ibthr->qp);
        exit(1);
    }

    printf("%d\n",ibthr->qp->handle);
    printf("%d\n",ibthr->qp->qp_num);
    printf("%d\n",ibthr->qp->qp_type);

}
</CODE>


Note the last three printf's. here I can access the fields of the QP 
without a problem. After altering the QPs to the INIT state however 
(which finishes OK without errors) I get a segmentation fault on the 
following invocation (snippet from main() ):


<CODE>
  ib_thread_info ping_ib_thread;
  ib_thread_info pong_ib_thread;

  // Get the list of available devices and open the first one found
  gpeIBopenDev(&pong_ib_thread, optVerbose);
  gpeIBopenDev(&ping_ib_thread, optVerbose);
 
  // Initialize send and receive buffers, Memory regions and protection 
domains
  gpeIBinitMemory(&pong_ib_thread, &barg);
  gpeIBinitMemory(&ping_ib_thread, &barg);

  // Create and initial empty queue pair
  gpeIBcreateQP(&pong_ib_thread, &barg);
  gpeIBcreateQP(&ping_ib_thread, &barg);

  // Then, alter the state of the Queue Pairs to the INIT state
  //  ib_ping_qp_attr.pkey_index      = 1;
  //  ib_pong_qp_attr.pkey_index      = 1;
  gpeIBinitQP(&ib_pong_qp_attr, &pong_ib_thread, &barg);
  gpeIBinitQP(&ib_ping_qp_attr, &ping_ib_thread, &barg);

dest.qpn = ping_ib_thread.qp->qp_num;  <<-- RESULTS IN SEGMENTATION 
FAULT !!!
rem_dest.qpn = pong_ib_thread.qp->qp_num; << THIS DOES NOT RESULT IN 
SEGMENTATION FAULT !!!
</CODE>


I have little to no idea why the one causes a SEGFAULT and the other 
not, after all I have created and initialized them in the same way. I am 
really getting frustrated by this, I spend the past 3 days looking for 
some cause but to no avail. If I ommit the line where the SEGFAULT 
occurs I get another later when trying to get the QP to the RTS state, 
so sonething is definetly wrong, but right now I don't have the 
slightest clue what. So please, if someone has some idea what I might be 
doing wrong, share it with me. I am really starting to hate this code, 
as there is no comprehensible description as to what are the 
requirements to create a QP for the different transport types, and also 
no examples (at least I didn't found any).


Hope my mail didn't get too long, boring and confusing.
Any help will be appreciated!



Best Regards,
Konstantin Boyanov

P.S.
The whole code is available here:
http://www.ifh.de/~boyanov/ibpipo/
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: Segmentation Fault when accessing QPN of a RC Queue pair
       [not found] ` <4D89EDC4.4050309-T5F83Mi6MZE@public.gmane.org>
@ 2011-03-23 16:54   ` Hefty, Sean
       [not found]     ` <AANLkTi=ppeQSxHTA18pFM6=y06Q_eNGsY8Fn=gXtXaBM@mail.gmail.com>
  0 siblings, 1 reply; 5+ messages in thread
From: Hefty, Sean @ 2011-03-23 16:54 UTC (permalink / raw)
  To: Konstantin Boyanov,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org

> I have little to no idea why the one causes a SEGFAULT and the other
> not, after all I have created and initialized them in the same way. I am
> really getting frustrated by this, I spend the past 3 days looking for
> some cause but to no avail. If I ommit the line where the SEGFAULT
> occurs I get another later when trying to get the QP to the RTS state,
> so sonething is definetly wrong, but right now I don't have the
> slightest clue what. So please, if someone has some idea what I might be
> doing wrong, share it with me. I am really starting to hate this code,
> as there is no comprehensible description as to what are the
> requirements to create a QP for the different transport types, and also
> no examples (at least I didn't found any).

Both libibverbs and librdmacm provide sample programs, and you could also look at some of the perftest samples.  The simplest of these for RC QPs is probably the rdma_server/rdma_client samples with the librdmacm, but these make use of newer APIs.  For UC QPs, you would need to look at libibverbs.  UD QPs are setup differently, but both libibverbs and librdmacm have samples.

There wasn't enough context provided in the code snippet for me to see the cause of the crash.

- Sean
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: Segmentation Fault when accessing QPN of a RC Queue pair
       [not found]       ` <AANLkTi=ppeQSxHTA18pFM6=y06Q_eNGsY8Fn=gXtXaBM-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2011-03-28 10:57         ` Konstantin Boyanov
       [not found]           ` <4D906993.7030404-T5F83Mi6MZE@public.gmane.org>
  0 siblings, 1 reply; 5+ messages in thread
From: Konstantin Boyanov @ 2011-03-28 10:57 UTC (permalink / raw)
  To: Konstantin Boyanov
  Cc: Hefty, Sean, linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org

Sorry for the last incomplete mail, I accidentaly pressed the enter when 
in the wrong focus...
here we go again:

Hello,

First of all thanks for the reply! I was really getting desperate about 
this issue.

I have looked in the libibverbs examples and I cannot see differences 
between my code and the one in the examples (at least functionally, I 
clearly have another structure in my programm).

About the snippets: I didn't want to copy/paste all the code into my 
mail, that's why I provided a link to where the code can be found:


http://www.ifh.de/~boyanov/ibpipo/ <http://www.ifh.de/%7Eboyanov/ibpipo/>


I still cannot understand what I am doing wrong, and why the errors ocur 
only on one of the Queue Pairs. Is it possible that I try to interact 
with some of the "reserved" QPs like the QP0 and QP1?

Another thing: I am also getting segfaults when trying to modify the LID 
of the QP contexts. i first get the LID from the device and then when 
trying to set it in my custom QP context structure I get a segfault:

      struct ibv_port_attr tmp_port_attr;

      ret = ibv_query_port(ping_ib_thread.ibctx, barg.port, &tmp_port_attr);
      if ( ret != 0){
        printf("error querying IB port %d for context %s\n", port, 
ping_ib_thread.ibctx->device->dev_path);
        perror("ibv_query_port");
      }

    ping_ib_thread.ibport_attr->lid = tmp_port_attr.lid;
    pong_ib_thread.ibport_attr->lid = tmp_port_attr.lid;

Here, barg.port = 1 and port 1 is in the PORT_ACTIVE state.  I know I am 
missing something small but I am so confused and frustrated from this 
code I cannot see anything anymore in it...

Please, if you have some clues to what can be wrong, at least where I 
can dig for more info and possible problems, let me know.

Best Regards,
Konstantin
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: Segmentation Fault when accessing QPN of a RC Queue pair
       [not found]           ` <4D906993.7030404-T5F83Mi6MZE@public.gmane.org>
@ 2011-03-28 11:22             ` Konstantin Boyanov
  2011-03-28 23:13             ` Hefty, Sean
  1 sibling, 0 replies; 5+ messages in thread
From: Konstantin Boyanov @ 2011-03-28 11:22 UTC (permalink / raw)
  To: Konstantin Boyanov
  Cc: Hefty, Sean, linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org

Hello again,

Another thing came to my mind right now - can it be that I get the 
errors described above just because I am using this static functions for 
setting up QPs and the like?  I mean pass to these utility functions the 
pointers to the context data structures and I am not sure wheather or 
not these are handled correctly after return to the main function. Possible?

Regards,
Konstantin
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: Segmentation Fault when accessing QPN of a RC Queue pair
       [not found]           ` <4D906993.7030404-T5F83Mi6MZE@public.gmane.org>
  2011-03-28 11:22             ` Konstantin Boyanov
@ 2011-03-28 23:13             ` Hefty, Sean
  1 sibling, 0 replies; 5+ messages in thread
From: Hefty, Sean @ 2011-03-28 23:13 UTC (permalink / raw)
  To: Konstantin Boyanov, Konstantin Boyanov
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org

I only looked quickly, but in main() from the links that you provided:

	ubenchIBsetQPstateRTR(&pong_as_dest, &ib_ping_qp_attr, &ping_ib_thread, &barg);
	ubenchIBsetQPstateRTR(&ping_as_dest, &ib_pong_qp_attr, &ping_ib_thread, &barg);

I noticed that you pass ping_ib_thread into the above call twice.  Is that intentional, or should one be pong_ib_thread?

	// At last, modify the ping QP to the Ready-to-Send state
	ubenchIBsetQPstateRTS(&ping_as_dest, &ib_ping_qp_attr, &ping_ib_thread, &barg);

I didn't see where you called *RTS() for pong_ib_thread.  Also, the parameters into *RTS() differ from those passed into *RTR() when ping_ib_thread is the 3rd parameter.

- Sean

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2011-03-28 23:13 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-03-23 12:55 Segmentation Fault when accessing QPN of a RC Queue pair Konstantin Boyanov
     [not found] ` <4D89EDC4.4050309-T5F83Mi6MZE@public.gmane.org>
2011-03-23 16:54   ` Hefty, Sean
     [not found]     ` <AANLkTi=ppeQSxHTA18pFM6=y06Q_eNGsY8Fn=gXtXaBM@mail.gmail.com>
     [not found]       ` <AANLkTi=ppeQSxHTA18pFM6=y06Q_eNGsY8Fn=gXtXaBM-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2011-03-28 10:57         ` Konstantin Boyanov
     [not found]           ` <4D906993.7030404-T5F83Mi6MZE@public.gmane.org>
2011-03-28 11:22             ` Konstantin Boyanov
2011-03-28 23:13             ` Hefty, Sean

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox