netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
       [not found] <1188599815.5176.12.camel@dell>
@ 2007-09-05 18:34 ` Mike Christie
  2007-09-05 21:27   ` Anil Veerabhadrappa
  2007-09-07 22:36 ` Mike Christie
  1 sibling, 1 reply; 27+ messages in thread
From: Mike Christie @ 2007-09-05 18:34 UTC (permalink / raw)
  To: Michael Chan
  Cc: davem, netdev, open-iscsi, anilgv, talm, lusinsky, uri,
	SCSI Mailing List

Michael Chan wrote:
> diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
> index 6f2c71e..adcfbbc 100644
> --- a/drivers/scsi/Kconfig
> +++ b/drivers/scsi/Kconfig
> @@ -1800,6 +1800,8 @@ config ZFCP
>            called zfcp. If you want to compile it as a module, say M here
>            and read <file:Documentation/kbuild/modules.txt>.
>  
> +source "drivers/scsi/bnx2i/Kconfig"
> +
>  config SCSI_SRP
>  	tristate "SCSI RDMA Protocol helper library"
>  	depends on SCSI && PCI
> diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
> index 86a7ba7..65fe633 100644
> --- a/drivers/scsi/Makefile
> +++ b/drivers/scsi/Makefile
> @@ -133,6 +133,7 @@ obj-$(CONFIG_SCSI_IBMVSCSIS)	+= ibmvscsi/
>  obj-$(CONFIG_SCSI_HPTIOP)	+= hptiop.o
>  obj-$(CONFIG_SCSI_STEX)		+= stex.o
>  obj-$(CONFIG_PS3_ROM)		+= ps3rom.o
> +obj-$(CONFIG_SCSI_BNX2_ISCSI)	+= bnx2i/
>  
>  obj-$(CONFIG_ARM)		+= arm/
>  
> diff --git a/drivers/scsi/bnx2i/57xx_iscsi_constants.h b/drivers/scsi/bnx2i/57xx_iscsi_constants.h
> new file mode 100644
> index 0000000..61f07f9
> --- /dev/null
> +++ b/drivers/scsi/bnx2i/57xx_iscsi_constants.h
> @@ -0,0 +1,212 @@
> +#ifndef __57XX_ISCSI_CONSTANTS_H_
> +#define __57XX_ISCSI_CONSTANTS_H_
> +
> +/**
> +* This file defines HSI constants for the iSCSI flows
> +*/
> +
> +/* iSCSI request op codes */
> +#define ISCSI_OPCODE_NOP_OUT 			(0 | 0x40)
> +#define ISCSI_OPCODE_SCSI_CMD 			(1)
> +#define ISCSI_OPCODE_TMF_REQUEST 		(2 | 0x40)
> +#define ISCSI_OPCODE_LOGIN_REQUEST 		(3 | 0x40)
> +#define ISCSI_OPCODE_TEXT_REQUEST 		(4 | 0x40)
> +#define ISCSI_OPCODE_DATA_OUT 			(5)
> +#define ISCSI_OPCODE_LOGOUT_REQUEST 		(6 | 0x00)
> +#define ISCSI_OPCODE_CLEANUP_REQUEST		(7)


What is ISCSI_OPCODE_CLEANUP_REQUEST?


> +
> +/* iSCSI response/messages op codes */
> +#define ISCSI_OPCODE_NOP_IN 			(0x20)
> +#define ISCSI_OPCODE_SCSI_RESPONSE 		(0x21)
> +#define ISCSI_OPCODE_TMF_RESPONSE 		(0x22)
> +#define ISCSI_OPCODE_LOGIN_RESPONSE 		(0x23)
> +#define ISCSI_OPCODE_TEXT_RESPONSE 		(0x24)
> +#define ISCSI_OPCODE_DATA_IN			(0x25)
> +#define ISCSI_OPCODE_LOGOUT_RESPONSE 		(0x26)
> +#define ISCSI_OPCODE_CLEANUP_RESPONSE 		(0x27)
> +#define ISCSI_OPCODE_R2T			(0x31)
> +#define ISCSI_OPCODE_ASYNC_MSG 			(0x32)
> +#define ISCSI_OPCODE_REJECT 			(0x3f)
> +#define ISCSI_OPCODE_NOPOUT_LOCAL_COMPLETION	(0)


What does the LOCAL_COMPLETION on the nopout mean?


> +
> +/* iSCSI stages */
> +#define ISCSI_STAGE_SECURITY_NEGOTIATION (0)
> +#define ISCSI_STAGE_LOGIN_OPERATIONAL_NEGOTIATION (1)
> +#define ISCSI_STAGE_FULL_FEATURE_PHASE (3)
> +
> +/* iSCSI parameter defaults */
> +#define ISCSI_DEFAULT_HEADER_DIGEST 		(0)
> +#define ISCSI_DEFAULT_DATA_DIGEST 		(0)
> +#define ISCSI_DEFAULT_INITIAL_R2T 		(1)
> +#define ISCSI_DEFAULT_IMMEDIATE_DATA 		(1)
> +#define ISCSI_DEFAULT_MAX_PDU_LENGTH 		(0x2000)
> +#define ISCSI_DEFAULT_FIRST_BURST_LENGTH 	(0x10000)
> +#define ISCSI_DEFAULT_MAX_BURST_LENGTH 		(0x40000)
> +#define ISCSI_DEFAULT_MAX_OUTSTANDING_R2T 	(1)
> +
> +/* iSCSI parameter limits */
> +#define ISCSI_MIN_VAL_MAX_PDU_LENGTH (0x200)
> +#define ISCSI_MAX_VAL_MAX_PDU_LENGTH (0xffffff)
> +#define ISCSI_MIN_VAL_BURST_LENGTH (0x200)
> +#define ISCSI_MAX_VAL_BURST_LENGTH (0xffffff)
> +#define ISCSI_MIN_VAL_MAX_OUTSTANDING_R2T (1)
> +#define ISCSI_MAX_VAL_MAX_OUTSTANDING_R2T (0xff) /* 0x10000 according to RFC */
> +
> +/* SCSI command response codes */
> +#define ISCSI_SCSI_CMD_RESPONSE_CMD_COMPLETED	(0x00)
> +#define ISCSI_SCSI_CMD_RESPONSE_TARGET_FAILURE	(0x01)
> +
> +/* SCSI command status codes */
> +#define ISCSI_SCSI_CMD_STATUS_GOOD 		(0x00)
> +#define ISCSI_SCSI_CMD_STATUS_CHECK_CONDITION 	(0x02)
> +#define ISCSI_SCSI_CMD_STATUS_INTERMIDIATE 	(0x10)
> +
> +/* TMF codes */
> +#define ISCSI_TMF_ABORT_TASK 			(1)
> +#define ISCSI_TMF_LOGICAL_UNIT_RESET 		(5)
> +
> +/* TMF response codes */
> +#define ISCSI_TMF_RESPONSE_FUNCTION_COMPLETE			(0x00)
> +#define ISCSI_TMF_RESPONSE_TASK_DOESNT_EXIST			(0x01)
> +#define ISCSI_TMF_RESPONSE_LUN_DOESNT_EXIST			(0x02)
> +#define ISCSI_TMF_RESPONSE_TASK_STILL_ALLEGIANT			(0x03)
> +#define ISCSI_TMF_RESPONSE_FUNCTION_NOT_SUPPORTED		(0x05)
> +#define ISCSI_TMF_RESPONSE_FUNCTION_AUTHORIZATION_FAILED	(0x06)
> +#define ISCSI_TMF_RESPONSE_FUNCTION_REJECTED			(0xff)
> +
> +/* Logout reason codes */
> +/*#define ISCSI_LOGOUT_REASON_CLOSE_CONNECTION (1) */
> +
> +/* Logout response codes */
> +#define ISCSI_LOGOUT_RESPONSE_CONNECTION_CLOSED (0)
> +#define ISCSI_LOGOUT_RESPONSE_CID_NOT_FOUND (1)
> +#define ISCSI_LOGOUT_RESPONSE_CLEANUP_FAILED (3)
> +
> +/* iSCSI task types */
> +#define ISCSI_TASK_TYPE_READ    (0)
> +#define ISCSI_TASK_TYPE_WRITE   (1)
> +#define ISCSI_TASK_TYPE_MPATH   (2)




All of these iscsi code shoulds be in iscsi_proto.h or should be added 
there.


> +
> +#endif
> diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c
> new file mode 100644
> index 0000000..b74a93c
> --- /dev/null
> +++ b/drivers/scsi/bnx2i/bnx2i_hwi.c
> @@ -0,0 +1,1977 @@



> + * process iSCSI TMF Response CQE & wake up driver EH thread.
> + */
> +static int bnx2i_process_tmf_resp(struct bnx2i_conn *conn, struct cqe *cqe)
> +{
> +	u32 itt;
> +	struct bnx2i_cmd *cmd;
> +	struct bnx2i_cmd *aborted_cmd;
> +	struct iscsi_tmf_response *tmf_cqe;
> +
> +	tmf_cqe = (struct iscsi_tmf_response *) cqe;
> +	itt = (tmf_cqe->itt & ISCSI_TMF_RESPONSE_INDEX);
> +	cmd = get_cmnd(conn->sess, itt);
> +	if (!cmd) {
> +		printk(KERN_ALERT "tmf_resp: ITT 0x%x is not active\n",
> +				  tmf_cqe->itt);
> +		return 0;
> +	}
> +
> +	bnx2i_update_cmd_sequence(conn->sess, tmf_cqe->exp_cmd_sn,
> +				  tmf_cqe->max_cmd_sn);
> +
> +	aborted_cmd = cmd->tmf_ref_cmd;
> +
> +	if (tmf_cqe->response == ISCSI_TMF_RSP_COMPLETE) {
> +		if (aborted_cmd->scsi_cmd) {
> +			aborted_cmd->scsi_cmd->result = DID_ABORT << 16;
> +			aborted_cmd->scsi_cmd->resid =

should use resid wrappers throughout driver.

> +				aborted_cmd->scsi_cmd->request_bufflen;

Should use bufflen wrappers throught driver.


> +}
> diff --git a/drivers/scsi/bnx2i/bnx2i_init.c b/drivers/scsi/bnx2i/bnx2i_init.c
> new file mode 100644
> index 0000000..837bd95
> --- /dev/null
> +++ b/drivers/scsi/bnx2i/bnx2i_init.c
>
> + * handles SCSI command queued by SCSI-ML, allocates a command structure,
> + *	assigning CMDSN, mapping SG buffers and handing over request to CNIC.
> + */
> +int bnx2i_queuecommand(struct scsi_cmnd *sc,
> +		       void (*done) (struct scsi_cmnd *))
> +{
> +	struct bnx2i_sess *sess;
> +	struct bnx2i_conn *conn;
> +	struct bnx2i_cmd *cmd;
> +	static int old_recovery_state = 0;
> +
> +	sc->scsi_done = done;
> +	sc->result = 0;
> +	sess = iscsi_hostdata(sc->device->host->hostdata);
> +
> +#define iscsi_cmd_win_closed(_sess)	\
> +	((int) (_sess->max_cmdsn - _sess->cmdsn) < 0)
> +
> +	if (iscsi_cmd_win_closed(sess) ||
> +	    test_bit(ADAPTER_STATE_LINK_DOWN, &sess->hba->adapter_state))
> +		goto iscsi_win_closed;
> +
> +	if ((sess->state & BNX2I_SESS_IN_SHUTDOWN) ||
> +		(sess->state & BNX2I_SESS_IN_LOGOUT))
> +		goto dev_not_found;
> +
> +	if (sess->recovery_state) {
> +		if (old_recovery_state != sess->recovery_state)
> +			old_recovery_state = sess->recovery_state;
> +
> +		if (sess->recovery_state & ISCSI_SESS_RECOVERY_FAILED)
> +			goto dev_not_found;
> +		else if (!(sess->recovery_state & ISCSI_SESS_RECOVERY_COMPLETE))
> +			goto iscsi_win_closed;
> +		else
> +			sess->recovery_state = 0;
> +	}
> +
> +	cmd = bnx2i_alloc_cmd(sess);
> +	if (cmd == NULL)
> +		/* should never happen as cmd list size == SHT->can_queue */
> +		goto cmd_not_accepted;
> +
> +	cmd->conn = conn = sess->lead_conn;
> +	cmd->scsi_cmd = sc;
> +	cmd->req.total_data_transfer_length = sc->request_bufflen;
> +	cmd->iscsi_opcode = ISCSI_OPCODE_SCSI_CMD;
> +	cmd->req.cmd_sn = sess->cmdsn++;
> +
> +	bnx2i_iscsi_map_sg_list(cmd);
> +	bnx2i_cpy_scsi_cdb(sc, cmd);
> +
> +	if (sc->sc_data_direction == DMA_TO_DEVICE) {
> +		cmd->req.op_attr = ISCSI_CMD_REQUEST_WRITE;
> +		cmd->req.itt |= (ISCSI_TASK_TYPE_WRITE <<
> +				 ISCSI_CMD_REQUEST_TYPE_SHIFT);
> +		bnx2i_setup_write_cmd_bd_info(cmd);
> +	} else {
> +		cmd->req.op_attr = ISCSI_CMD_REQUEST_READ;
> +		cmd->req.itt |= (ISCSI_TASK_TYPE_READ <<
> +				 ISCSI_CMD_REQUEST_TYPE_SHIFT);
> +	}
> +	cmd->req.num_bds = cmd->bd_tbl->bd_valid;
> +	if (!cmd->bd_tbl->bd_valid) {
> +		cmd->req.bd_list_addr_lo = (u32) sess->hba->mp_bd_dma;
> +		cmd->req.bd_list_addr_hi =
> +			(u32) ((u64) sess->hba->mp_bd_dma >> 32);
> +		cmd->req.num_bds = 1;
> +	}
> +
> +	cmd->cmd_state = ISCSI_CMD_STATE_INITIATED;
> +	sc->SCp.ptr = (char *) cmd;
> +
> +	if (cmd->req.itt != ITT_INVALID_SIGNATURE) {
> +		bnx2i_send_iscsi_scsicmd(conn, cmd);
> +		list_add_tail(&cmd->link, &sess->active_cmds);
> +		sess->num_active_cmds++;
> +	}
> +	return 0;
> +
> +iscsi_win_closed:
> +cmd_not_accepted:
> +	return SCSI_MLQUEUE_HOST_BUSY;
> +
> +dev_not_found:
> +	sc->sense_buffer[0] = 0x70;
> +	sc->sense_buffer[2] = NOT_READY;
> +	sc->sense_buffer[7] = 0x6;
> +	sc->sense_buffer[12] = 0x08;
> +	sc->sense_buffer[13] = 0x00;


do not fake sense and do not face sense and set a host byte of 
DID_NO_CONNECT. DID_NO_CONNECT is fine.


> +	sc->result = (DID_NO_CONNECT << 16);
> +	sc->resid = sc->request_bufflen;
> +	sc->scsi_done(sc);
> +	return 0;
> +}
> +
> +
> +
> +/*
> + * TMF request timeout handler
> + */
> +static void bnx2i_iscsi_tmf_timer(unsigned long data)
> +{
> +	struct bnx2i_cmd *cmd = (struct bnx2i_cmd *) data;
> +
> +	printk(KERN_ALERT "TMF timer: abort failed, cmd 0x%p\n", cmd);
> +	cmd->cmd_state = ISCSI_CMD_STATE_TMF_TIMEOUT;
> +	cmd->conn->sess->recovery_state = ISCSI_SESS_RECOVERY_OPEN_ISCSI;
> +	iscsi_conn_error(cmd->conn->cls_conn, ISCSI_ERR_CONN_FAILED);
> +}
> +
> +
> +/*
> + * initiate command abort process by requesting CNIC to send
> + *	an iSCSI TMF request to target
> + */
> +static int bnx2i_initiate_abort_cmd(struct scsi_cmnd *sc)
> +{
> +	struct bnx2i_cmd *cmd = (struct bnx2i_cmd *) sc->SCp.ptr;
> +	struct bnx2i_cmd *tmf_cmd;
> +	struct Scsi_Host *shost = cmd->scsi_cmd->device->host;
> +	struct bnx2i_conn *conn = cmd->conn;
> +	struct bnx2i_sess *sess;
> +	struct bnx2i_hba *hba;
> +
> +	shost = cmd->scsi_cmd->device->host;
> +	sess = iscsi_hostdata(shost->hostdata);
> +	BUG_ON(shost != sess->host);
> +
> +	if (sess && (is_sess_active(sess)))
> +		hba = sess->hba;
> +	else
> +		return FAILED;
> +
> +	if (test_bit(ADAPTER_STATE_LINK_DOWN, &hba->adapter_state))
> +		return FAILED;
> +
> +	bnx2i_setup_ictx_dump(hba, conn);
> +
> +	if (cmd->scsi_cmd != sc)
> +		/* command already completed to scsi mid-layer */
> +		goto cmd_not_active;
> +
> +	tmf_cmd = bnx2i_alloc_cmd(sess);
> +	if (tmf_cmd == NULL)
> +		goto lack_of_resc;
> +
> +	tmf_cmd->conn = conn = sess->lead_conn;
> +	tmf_cmd->scsi_cmd = NULL;
> +	tmf_cmd->iscsi_opcode = ISCSI_OPCODE_TMF_REQUEST;
> +	tmf_cmd->req.cmd_sn = sess->cmdsn;
> +	tmf_cmd->tmf_ref_itt = cmd->req.itt;
> +	tmf_cmd->tmf_ref_cmd = cmd;
> +	tmf_cmd->tmf_ref_sc = cmd->scsi_cmd;
> +	cmd->cmd_state = ISCSI_CMD_STATE_ABORT_PEND;
> +	tmf_cmd->cmd_state = ISCSI_CMD_STATE_INITIATED;
> +
> +#define BNX2I_TMF_TIMEOUT	10 * HZ
> +	sess->abort_timer.expires = BNX2I_TMF_TIMEOUT + jiffies;
> +	sess->abort_timer.function = bnx2i_iscsi_tmf_timer;
> +	sess->abort_timer.data = (unsigned long)tmf_cmd;
> +	add_timer(&sess->abort_timer);
> +
> +	bnx2i_send_iscsi_tmf(conn, tmf_cmd);
> +
> +	/* update iSCSI context for this conn, wait for CNIC to complete */
> +	wait_event_interruptible(sess->er_wait, (!conn->ep ||
> +		 			 (tmf_cmd->cmd_state ==
> +					  ISCSI_CMD_STATE_FAILED) ||
> +					 (tmf_cmd->cmd_state ==
> +					  ISCSI_CMD_STATE_COMPLETED)));
> +
> +	if (signal_pending(current))
> +		flush_signals(current);
> +
> +	del_timer_sync(&sess->abort_timer);
> +
> +	if (tmf_cmd->cmd_state == ISCSI_CMD_STATE_TMF_TIMEOUT) {
> +		printk(KERN_ALERT "abort: abort failed, cmd 0x%p\n", tmf_cmd);
> +		/* TMF timed out, return error status and let SCSI-ML do
> +		 * session recovery.
> +		 */
> +		list_del_init(&tmf_cmd->link);
> +		bnx2i_free_cmd(sess, tmf_cmd);
> +		return SUCCESS;
> +	}
> +
> +	list_del_init(&tmf_cmd->link);
> +	bnx2i_free_cmd(sess, tmf_cmd);
> +
> +	if ((cmd->scsi_cmd->result & 0xFF0000) == (DID_ABORT << 16)) {
> +		cmd->cmd_state = ISCSI_CMD_STATE_CLEANUP_PEND;
> +		bnx2i_send_cmd_cleanup_req(hba, cmd);
> +		wait_event_interruptible_timeout(sess->er_wait,
> +						 (cmd->cmd_state ==
> +						  ISCSI_CMD_STATE_CLEANUP_CMPL),
> +						 msecs_to_jiffies(
> +						  ISCSI_CMD_CLEANUP_TIMEOUT));
> +
> +		if (signal_pending(current))
> +			flush_signals(current);
> +	} else
> +		cmd->scsi_cmd->result = (DID_ABORT << 16);
> +
> +	cmd->conn->sess->num_active_cmds--;
> +	list_del_init(&cmd->link);
> +	bnx2i_return_failed_command(sess, cmd, DID_ABORT);
> +	bnx2i_free_cmd(sess, cmd);
> +
> +cmd_not_active:
> +	return SUCCESS;
> +
> +lack_of_resc:
> +	return FAILED;
> +}
> +
> +
> +/*
> + * SCSI abort request handler.
> + */
> +int bnx2i_abort(struct scsi_cmnd *sc)
> +{
> +	int reason;
> +	struct bnx2i_cmd *cmd = (struct bnx2i_cmd *) sc->SCp.ptr;
> +
> +	if (unlikely(!cmd)) {
> +		/* command already completed to scsi mid-layer */
> +		printk(KERN_INFO "bnx2i_abort: sc 0x%p, not active\n", sc);
> +		return SUCCESS;
> +	}
> +
> +	reason = bnx2i_initiate_abort_cmd(sc);
> +	return reason;
> +}
> +
> +
> +
> +/*
> + * hardware reset
> + */
> +int bnx2i_reset(struct scsi_cmnd *sc)
> +{
> +	return 0;
> +}


So what is up with this one? It seems like if there is a way to reset 
hardware then you would want it as the scsi eh host reset callout 
instead of dropping the session. We could add some transport level 
recovery callouts for the iscsi specifics.


> +
> +
> +void bnx2i_return_failed_command(struct bnx2i_sess *sess,
> +				 struct bnx2i_cmd *cmd, int err_code)
> +{
> +	struct scsi_cmnd *sc = cmd->scsi_cmd;
> +	sc->result = err_code << 16;
> +	sc->resid = cmd->scsi_cmd->request_bufflen;
> +	cmd->scsi_cmd = NULL;
> +	sess->num_active_cmds--;
> +	sc->SCp.ptr = NULL;
> +	sc->scsi_done(sc);
> +}
> +
> +
> +
> +/*
> + * SCSI host reset handler - iSCSI session recovery
> + */
> +int bnx2i_host_reset(struct scsi_cmnd *sc)
> +{
> +	struct Scsi_Host *shost;
> +	struct bnx2i_sess *sess;
> +	int rc = 0;
> +
> +	shost = sc->device->host;
> +	sess = iscsi_hostdata(shost->hostdata);
> +	printk(KERN_INFO "bnx2i: attempting to reset host, #%d\n",
> +			  sess->host->host_no);
> +
> +	BUG_ON(shost != sess->host);
> +	rc = bnx2i_do_iscsi_sess_recovery(sess, DID_RESET);
> +
> +	return rc;
> +}
> +
> +
> +
> +/**********************************************************************
> + *		open-iscsi interface
> + **********************************************************************/
> +
> +
> +#define get_bnx2_device(_hba, _devc) 	do {				\
> +		if ((_hba->pci_did == PCI_DEVICE_ID_NX2_5706) ||	\
> +			(_hba->pci_did == PCI_DEVICE_ID_NX2_5706S)) {	\
> +			_devc = '6';					\
> +		} else if ((_hba->pci_did == PCI_DEVICE_ID_NX2_5708) ||	\
> +			(_hba->pci_did == PCI_DEVICE_ID_NX2_5708S)) {	\
> +			_devc = '8';					\
> +		} else if ((_hba->pci_did == PCI_DEVICE_ID_NX2_5709) ||	\
> +			(_hba->pci_did == PCI_DEVICE_ID_NX2_5709S)) {	\
> +			_devc = '9';					\
> +		}							\
> +	} while (0)
> +
> +/* from open-iscsi project */
> +/*
> + * iSCSI Session's hostdata organization:
> + *
> + *    *------------------* <== hostdata_session(host->hostdata)
> + *    | ptr to class sess|
> + *    |------------------| <== iscsi_hostdata(host->hostdata)
> + *    | iscsi_session    |
> + *    *------------------*
> + */



I think it is going to be nicer to not allocate the host with the 
session for bnx2i like was done for qla4xxx. If you are going to 
allocate the host with the session then it seems like you should be able 
to intergrate into libiscsi like was done with iscsi_tcp and ib_iser. 
And even if you do not allocate a host per session it seems like you 
should be able to better integreate with libiscsi like was done with 
iscsi_tcp and ib_iser. Right now, it is sort of a mix of qla4xxx's model 
and iscsi_tcp/ib_iser's model which is not going to work well in the 
long run.

You can change libiscsi so that you do not run in the host's workq and 
so data structs are better optimized for you and whatever else is 
needed. I mean feel free to change that code so it better suits your 
model. Your code is probably closer to ib_iser since that module does 
not have to handle operations like responding to r2ts and has to make 
its own connections.

There seem to be some bugs from when you might have modeled bnx2i's 
callouts after a older iscsi_tcp or ib_iser. Those bugs got fixed for 
both modules for free when libiscsi got fixed.


> +
> +#define hostdata_privsize(_sz)	(sizeof(unsigned long) + _sz + \
> +				 _sz % sizeof(unsigned long))
> +
> +#define hostdata_session(_hostdata) (iscsi_ptr(*(unsigned long *)_hostdata))
> +
> +#define session_to_cls(_sess) 	hostdata_session(_sess->host->hostdata)
> +
> +
> +
> +
> +/*
> + * Function: bnx2i_register_xport
> + * Description: this routine will allocate memory for SCSI host template,
> + * 		iSCSI template and registers one instance of NX2 device with
> + *		iSCSI Transport Kernel module.
> + */
> +int bnx2i_register_xport(struct bnx2i_hba *hba)
> +{
> +	void *mem_ptr;
> +	char dev_id = '8';
> +
> +	get_bnx2_device(hba, dev_id);
> +
> +	mem_ptr = kmalloc(sizeof(struct scsi_host_template), GFP_KERNEL);
> +	hba->scsi_template = mem_ptr;
> +	if (hba->scsi_template == NULL) {
> +		printk(KERN_ALERT "bnx2i: failed to alloc memory for sht\n");
> +		return -ENOMEM;
> +	}
> +
> +	mem_ptr = kmalloc(sizeof(struct iscsi_transport), GFP_KERNEL);
> +	hba->iscsi_transport = mem_ptr;
> +	if (hba->iscsi_transport == NULL) {
> +		printk(KERN_ALERT "mem error for iscsi_transport template\n");
> +		goto iscsi_xport_err;
> +	}
> +
> +	mem_ptr = kmalloc(BRCM_ISCSI_XPORT_NAME_SIZE_MAX, GFP_KERNEL);
> +	if (mem_ptr == NULL) {
> +		printk(KERN_ALERT "failed to alloc memory for xport name\n");
> +		goto scsi_name_mem_err;
> +	}
> +
> +	memcpy(hba->scsi_template, (const void *) &bnx2i_host_template,
> +	       sizeof(struct scsi_host_template));
> +	hba->scsi_template->name = mem_ptr;
> +	memcpy((void *) hba->scsi_template->name,
> +	       (const void *) bnx2i_host_template.name,
> +	       strlen(bnx2i_host_template.name) + 1);
> +
> +	mem_ptr = kmalloc(BRCM_ISCSI_XPORT_NAME_SIZE_MAX, GFP_KERNEL);
> +	if (mem_ptr == NULL) {
> +		printk(KERN_ALERT "failed to alloc proc name mem\n");
> +		goto scsi_proc_name_mem_err;
> +	}
> +	hba->scsi_template->proc_name = mem_ptr;
> +
> +	memcpy((void *) hba->iscsi_transport,
> +	       (const void *) &bnx2i_iscsi_transport,
> +	       sizeof(struct iscsi_transport));
> +
> +	hba->iscsi_transport->host_template = hba->scsi_template;
> +
> +	mem_ptr = kmalloc(BRCM_ISCSI_XPORT_NAME_SIZE_MAX, GFP_KERNEL);
> +	if (mem_ptr == NULL) {
> +		printk(KERN_ALERT "mem alloc error, iscsi xport name\n");
> +		goto xport_name_mem_err;
> +	}
> +	hba->iscsi_transport->name = mem_ptr;
> +	sprintf(mem_ptr, "%s%c-%.2x%.2x%.2x", BRCM_ISCSI_XPORT_NAME_PREFIX,
> +			 dev_id, (u8)hba->pci_dev->bus->number,
> +			 hba->pci_devno, (u8)hba->pci_func);
> +
> +	memcpy((void *) hba->scsi_template->proc_name, (const void *) mem_ptr,
> +	       strlen(mem_ptr) + 1);
> +
> +	hba->shost_template = iscsi_register_transport(hba->iscsi_transport);
> +	if (!hba->shost_template) {
> +		printk(KERN_ALERT "bnx2i: xport reg failed, hba 0x%p\n", hba);
> +		goto failed_registration;
> +	}


The transport should be allocated like qla4xxx or iscsi_tcp/ib_iser. 
Userspace then sets everything up based on usersettings like with 
qla4xxx and iscsi_tcp and ib_iser.

I started that patch that I sent to you guys to modify 
scsi_transport_iscsi abd libiscsi for you guys, but have not finished it 
yet. I am trying to finish merging Olafs iscsi_tcp patches. Feel free to 
change iscsid, scsi_transport_iscsi and libiscsi for me.


> +}
> +
> +int bnx2i_sysfs_setup(void)
> +{
> +	int ret;
> +	ret = class_register(&bnx2i_class);
> +
> +	bnx2i_register_port_class_dev(&port_class_dev);
> +	return ret;
> +}
> +
> +void bnx2i_sysfs_cleanup(void)
> +{
> +	class_device_unregister(&port_class_dev);
> +	class_unregister(&bnx2i_class);
> +}

The sysfs bits related to the hba should be use one of the scsi sysfs 
facilities or if they are related to iscsi bits and are generic then 
through the iscsi hba

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
  2007-09-05 18:34 ` [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices Mike Christie
@ 2007-09-05 21:27   ` Anil Veerabhadrappa
  2007-09-07 22:23     ` Mike Christie
  2007-09-08 11:59     ` Christoph Hellwig
  0 siblings, 2 replies; 27+ messages in thread
From: Anil Veerabhadrappa @ 2007-09-05 21:27 UTC (permalink / raw)
  To: Mike Christie
  Cc: Michael Chan, davem, netdev, open-iscsi, talm, lusinsky, uri,
	SCSI Mailing List

On Wed, 2007-09-05 at 13:34 -0500, Mike Christie wrote:
> Michael Chan wrote:
> > +* This file defines HSI constants for the iSCSI flows
> > +*/
> > +
> > +/* iSCSI request op codes */
> > +#define ISCSI_OPCODE_NOP_OUT 			(0 | 0x40)
> > +#define ISCSI_OPCODE_SCSI_CMD 			(1)
> > +#define ISCSI_OPCODE_TMF_REQUEST 		(2 | 0x40)
> > +#define ISCSI_OPCODE_LOGIN_REQUEST 		(3 | 0x40)
> > +#define ISCSI_OPCODE_TEXT_REQUEST 		(4 | 0x40)
> > +#define ISCSI_OPCODE_DATA_OUT 			(5)
> > +#define ISCSI_OPCODE_LOGOUT_REQUEST 		(6 | 0x00)
> > +#define ISCSI_OPCODE_CLEANUP_REQUEST		(7)
> 
> 
> What is ISCSI_OPCODE_CLEANUP_REQUEST?

This is a local message between iSCSI driver and the firmware to cleanup
iSCSI task resources held by chip. This operation is required to reclaim
SCSI command related resources after the TMF response is received for a
task and before it is completed to SCSI-ML


> 
> 
> > +
> > +/* iSCSI response/messages op codes */
> > +#define ISCSI_OPCODE_NOP_IN 			(0x20)
> > +#define ISCSI_OPCODE_SCSI_RESPONSE 		(0x21)
> > +#define ISCSI_OPCODE_TMF_RESPONSE 		(0x22)
> > +#define ISCSI_OPCODE_LOGIN_RESPONSE 		(0x23)
> > +#define ISCSI_OPCODE_TEXT_RESPONSE 		(0x24)
> > +#define ISCSI_OPCODE_DATA_IN			(0x25)
> > +#define ISCSI_OPCODE_LOGOUT_RESPONSE 		(0x26)
> > +#define ISCSI_OPCODE_CLEANUP_RESPONSE 		(0x27)
> > +#define ISCSI_OPCODE_R2T			(0x31)
> > +#define ISCSI_OPCODE_ASYNC_MSG 			(0x32)
> > +#define ISCSI_OPCODE_REJECT 			(0x3f)
> > +#define ISCSI_OPCODE_NOPOUT_LOCAL_COMPLETION	(0)
> 
> 
> What does the LOCAL_COMPLETION on the nopout mean?

This is the completion notification for NOPOUT pdus which does not
involve response from the target. Following two NOPOUTs are classified
under this -
1. Initiator's proactive nopout with ITT=0xFFFFFFFF
2. Initiator's response to target nopin with TTT != 0xFFFFFFFF


> 
> 
> > +
> > +/* iSCSI stages */
> > +#define ISCSI_STAGE_SECURITY_NEGOTIATION (0)
> > +#define ISCSI_STAGE_LOGIN_OPERATIONAL_NEGOTIATION (1)
> > +#define ISCSI_STAGE_FULL_FEATURE_PHASE (3)
> > +/* Logout response codes */
> > +#define ISCSI_LOGOUT_RESPONSE_CONNECTION_CLOSED (0)
> > +#define ISCSI_LOGOUT_RESPONSE_CID_NOT_FOUND (1)
> > +#define ISCSI_LOGOUT_RESPONSE_CLEANUP_FAILED (3)
> > +
> > +/* iSCSI task types */
> > +#define ISCSI_TASK_TYPE_READ    (0)
> > +#define ISCSI_TASK_TYPE_WRITE   (1)
> > +#define ISCSI_TASK_TYPE_MPATH   (2)
> 
> 
> 
> 
> All of these iscsi code shoulds be in iscsi_proto.h or should be added 
> there.
This is a very tricky proposal as this header file is automatically
generated by a well defined process and is shared between various driver
supporting multiple platform/OS and the firmware. If it is not of a big
issue I would like to keep it the way it is.


> 
> 
> > +
> > +#endif
> > diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c
> > new file mode 100644
> > index 0000000..b74a93c
> > --- /dev/null
> > +++ b/drivers/scsi/bnx2i/bnx2i_hwi.c
> > @@ -0,0 +1,1977 @@
> > +
> > +	if (tmf_cqe->response == ISCSI_TMF_RSP_COMPLETE) {
> > +		if (aborted_cmd->scsi_cmd) {
> > +			aborted_cmd->scsi_cmd->result = DID_ABORT << 16;
> > +			aborted_cmd->scsi_cmd->resid =
> 
> should use resid wrappers throughout driver.
> 
> > +				aborted_cmd->scsi_cmd->request_bufflen;
> 
> Should use bufflen wrappers throught driver.

Will look into this.


> > +	cmd->cmd_state = ISCSI_CMD_STATE_INITIATED;
> > +	sc->SCp.ptr = (char *) cmd;
> > +
> > +	if (cmd->req.itt != ITT_INVALID_SIGNATURE) {
> > +		bnx2i_send_iscsi_scsicmd(conn, cmd);
> > +		list_add_tail(&cmd->link, &sess->active_cmds);
> > +		sess->num_active_cmds++;
> > +	}
> > +	return 0;
> > +
> > +iscsi_win_closed:
> > +cmd_not_accepted:
> > +	return SCSI_MLQUEUE_HOST_BUSY;
> > +
> > +dev_not_found:
> > +	sc->sense_buffer[0] = 0x70;
> > +	sc->sense_buffer[2] = NOT_READY;
> > +	sc->sense_buffer[7] = 0x6;
> > +	sc->sense_buffer[12] = 0x08;
> > +	sc->sense_buffer[13] = 0x00;
> 
> 
> do not fake sense and do not face sense and set a host byte of 
> DID_NO_CONNECT. DID_NO_CONNECT is fine.

ok, thanks.

> 
> 
> > +	sc->result = (DID_NO_CONNECT << 16);
> > +	sc->resid = sc->request_bufflen;
> > +	sc->scsi_done(sc);
> > +	return 0;
> > +}
> > +
> > +
> > +
> > +/*
> > + * TMF request timeout handler
> > + */
> > +static void bnx2i_iscsi_tmf_timer(unsigned long data)
> > +{
> > +	struct bnx2i_cmd *cmd = (struct bnx2i_cmd *) data;
> > +
> > +	printk(KERN_ALERT "TMF timer: abort failed, cmd 0x%p\n", cmd);
> > +	cmd->cmd_state = ISCSI_CMD_STATE_TMF_TIMEOUT;
> > +	cmd->conn->sess->recovery_state = ISCSI_SESS_RECOVERY_OPEN_ISCSI;
> > +	iscsi_conn_error(cmd->conn->cls_conn, ISCSI_ERR_CONN_FAILED);
> > +}
> > +
> > +
> > +/*
> > + * initiate command abort process by requesting CNIC to send
> > + *	an iSCSI TMF request to target
> > + */
> > +static int bnx2i_initiate_abort_cmd(struct scsi_cmnd *sc)
> > +{
> > +	struct bnx2i_cmd *cmd = (struct bnx2i_cmd *) sc->SCp.ptr;
> > +	struct bnx2i_cmd *tmf_cmd;
> > +	struct Scsi_Host *shost = cmd->scsi_cmd->device->host;
> > +	struct bnx2i_conn *conn = cmd->conn;
> > +	struct bnx2i_sess *sess;
> > +	struct bnx2i_hba *hba;
> > +
> > +	shost = cmd->scsi_cmd->device->host;
> > +	sess = iscsi_hostdata(shost->hostdata);
> > +	BUG_ON(shost != sess->host);
> > +
> > +	if (sess && (is_sess_active(sess)))
> > +		hba = sess->hba;
> > +	else
> > +		return FAILED;
> > +
> > +	if (test_bit(ADAPTER_STATE_LINK_DOWN, &hba->adapter_state))
> > +		return FAILED;
> > +
> > +	bnx2i_setup_ictx_dump(hba, conn);
> > +
> > +	if (cmd->scsi_cmd != sc)
> > +		/* command already completed to scsi mid-layer */
> > +		goto cmd_not_active;
> > +
> > +	tmf_cmd = bnx2i_alloc_cmd(sess);
> > +	if (tmf_cmd == NULL)
> > +		goto lack_of_resc;
> > +
> > +	tmf_cmd->conn = conn = sess->lead_conn;
> > +	tmf_cmd->scsi_cmd = NULL;
> > +	tmf_cmd->iscsi_opcode = ISCSI_OPCODE_TMF_REQUEST;
> > +	tmf_cmd->req.cmd_sn = sess->cmdsn;
> > +	tmf_cmd->tmf_ref_itt = cmd->req.itt;
> > +	tmf_cmd->tmf_ref_cmd = cmd;
> > +	tmf_cmd->tmf_ref_sc = cmd->scsi_cmd;
> > +	cmd->cmd_state = ISCSI_CMD_STATE_ABORT_PEND;
> > +	tmf_cmd->cmd_state = ISCSI_CMD_STATE_INITIATED;
> > +
> > +#define BNX2I_TMF_TIMEOUT	10 * HZ
> > +	sess->abort_timer.expires = BNX2I_TMF_TIMEOUT + jiffies;
> > +	sess->abort_timer.function = bnx2i_iscsi_tmf_timer;
> > +	sess->abort_timer.data = (unsigned long)tmf_cmd;
> > +	add_timer(&sess->abort_timer);
> > +
> > +	bnx2i_send_iscsi_tmf(conn, tmf_cmd);
> > +
> > +	/* update iSCSI context for this conn, wait for CNIC to complete */
> > +	wait_event_interruptible(sess->er_wait, (!conn->ep ||
> > +		 			 (tmf_cmd->cmd_state ==
> > +					  ISCSI_CMD_STATE_FAILED) ||
> > +					 (tmf_cmd->cmd_state ==
> > +					  ISCSI_CMD_STATE_COMPLETED)));
> > +
> > +	if (signal_pending(current))
> > +		flush_signals(current);
> > +
> > +	del_timer_sync(&sess->abort_timer);
> > +
> > +	if (tmf_cmd->cmd_state == ISCSI_CMD_STATE_TMF_TIMEOUT) {
> > +		printk(KERN_ALERT "abort: abort failed, cmd 0x%p\n", tmf_cmd);
> > +		/* TMF timed out, return error status and let SCSI-ML do
> > +		 * session recovery.
> > +		 */
> > +		list_del_init(&tmf_cmd->link);
> > +		bnx2i_free_cmd(sess, tmf_cmd);
> > +		return SUCCESS;
> > +	}
> > +
> > +	list_del_init(&tmf_cmd->link);
> > +	bnx2i_free_cmd(sess, tmf_cmd);
> > +
> > +	if ((cmd->scsi_cmd->result & 0xFF0000) == (DID_ABORT << 16)) {
> > +		cmd->cmd_state = ISCSI_CMD_STATE_CLEANUP_PEND;
> > +		bnx2i_send_cmd_cleanup_req(hba, cmd);
> > +		wait_event_interruptible_timeout(sess->er_wait,
> > +						 (cmd->cmd_state ==
> > +						  ISCSI_CMD_STATE_CLEANUP_CMPL),
> > +						 msecs_to_jiffies(
> > +						  ISCSI_CMD_CLEANUP_TIMEOUT));
> > +
> > +		if (signal_pending(current))
> > +			flush_signals(current);
> > +	} else
> > +		cmd->scsi_cmd->result = (DID_ABORT << 16);
> > +
> > +	cmd->conn->sess->num_active_cmds--;
> > +	list_del_init(&cmd->link);
> > +	bnx2i_return_failed_command(sess, cmd, DID_ABORT);
> > +	bnx2i_free_cmd(sess, cmd);
> > +
> > +cmd_not_active:
> > +	return SUCCESS;
> > +
> > +lack_of_resc:
> > +	return FAILED;
> > +}
> > +
> > +
> > +/*
> > + * SCSI abort request handler.
> > + */
> > +int bnx2i_abort(struct scsi_cmnd *sc)
> > +{
> > +	int reason;
> > +	struct bnx2i_cmd *cmd = (struct bnx2i_cmd *) sc->SCp.ptr;
> > +
> > +	if (unlikely(!cmd)) {
> > +		/* command already completed to scsi mid-layer */
> > +		printk(KERN_INFO "bnx2i_abort: sc 0x%p, not active\n", sc);
> > +		return SUCCESS;
> > +	}
> > +
> > +	reason = bnx2i_initiate_abort_cmd(sc);
> > +	return reason;
> > +}
> > +
> > +
> > +
> > +/*
> > + * hardware reset
> > + */
> > +int bnx2i_reset(struct scsi_cmnd *sc)
> > +{
> > +	return 0;
> > +}
> 
> 
> So what is up with this one? It seems like if there is a way to reset 
> hardware then you would want it as the scsi eh host reset callout 
> instead of dropping the session. We could add some transport level 
> recovery callouts for the iscsi specifics.

We may not be able to support HBA cold reset as bnx2 driver is the
primary owner of chip reset and initialization. This is the drawback of
sharing network interface with the NIC driver. If there is a need for
administrator to reset the iSCSI port same can be achieved by running
'ifdown eth#' and 'ifup eth#'.
Current driver even allows ethernet interface reset when there are
active iSCSI connection, all active iscsi sessions will be reinstated
when the network link comes back live
 



> 
> 
> > +
> > +
> > +void bnx2i_return_failed_command(struct bnx2i_sess *sess,
> > +				 struct bnx2i_cmd *cmd, int err_code)
> > +{
> > +	struct scsi_cmnd *sc = cmd->scsi_cmd;
> > +	sc->result = err_code << 16;
> > +	sc->resid = cmd->scsi_cmd->request_bufflen;
> > +	cmd->scsi_cmd = NULL;
> > +	sess->num_active_cmds--;
> > +	sc->SCp.ptr = NULL;
> > +	sc->scsi_done(sc);
> > +}
> > +
> > +
> > +
> > +/*
> > + * SCSI host reset handler - iSCSI session recovery
> > + */
> > +int bnx2i_host_reset(struct scsi_cmnd *sc)
> > +{
> > +	struct Scsi_Host *shost;
> > +	struct bnx2i_sess *sess;
> > +	int rc = 0;
> > +
> > +	shost = sc->device->host;
> > +	sess = iscsi_hostdata(shost->hostdata);
> > +	printk(KERN_INFO "bnx2i: attempting to reset host, #%d\n",
> > +			  sess->host->host_no);
> > +
> > +	BUG_ON(shost != sess->host);
> > +	rc = bnx2i_do_iscsi_sess_recovery(sess, DID_RESET);
> > +
> > +	return rc;
> > +}
> > +
> > +
> > +
> > +/**********************************************************************
> > + *		open-iscsi interface
> > + **********************************************************************/
> > +
> > +
> > +#define get_bnx2_device(_hba, _devc) 	do {				\
> > +		if ((_hba->pci_did == PCI_DEVICE_ID_NX2_5706) ||	\
> > +			(_hba->pci_did == PCI_DEVICE_ID_NX2_5706S)) {	\
> > +			_devc = '6';					\
> > +		} else if ((_hba->pci_did == PCI_DEVICE_ID_NX2_5708) ||	\
> > +			(_hba->pci_did == PCI_DEVICE_ID_NX2_5708S)) {	\
> > +			_devc = '8';					\
> > +		} else if ((_hba->pci_did == PCI_DEVICE_ID_NX2_5709) ||	\
> > +			(_hba->pci_did == PCI_DEVICE_ID_NX2_5709S)) {	\
> > +			_devc = '9';					\
> > +		}							\
> > +	} while (0)
> > +
> > +/* from open-iscsi project */
> > +/*
> > + * iSCSI Session's hostdata organization:
> > + *
> > + *    *------------------* <== hostdata_session(host->hostdata)
> > + *    | ptr to class sess|
> > + *    |------------------| <== iscsi_hostdata(host->hostdata)
> > + *    | iscsi_session    |
> > + *    *------------------*
> > + */
> 
> 
> 
> I think it is going to be nicer to not allocate the host with the 
> session for bnx2i like was done for qla4xxx. If you are going to 
> allocate the host with the session then it seems like you should be able 
> to intergrate into libiscsi like was done with iscsi_tcp and ib_iser. 
> And even if you do not allocate a host per session it seems like you 
> should be able to better integreate with libiscsi like was done with 
> iscsi_tcp and ib_iser. Right now, it is sort of a mix of qla4xxx's model 
> and iscsi_tcp/ib_iser's model which is not going to work well in the 
> long run.
Either way will work for us, will investigate more on the code changes
required in bnx2i  


> You can change libiscsi so that you do not run in the host's workq and 
> so data structs are better optimized for you and whatever else is 
> needed. I mean feel free to change that code so it better suits your 
> model. Your code is probably closer to ib_iser since that module does 
> not have to handle operations like responding to r2ts and has to make 
> its own connections.

Right.


> 
> There seem to be some bugs from when you might have modeled bnx2i's 
> callouts after a older iscsi_tcp or ib_iser. Those bugs got fixed for 
> both modules for free when libiscsi got fixed.
> 
> 
> > +
> > +#define hostdata_privsize(_sz)	(sizeof(unsigned long) + _sz + \
> > +				 _sz % sizeof(unsigned long))
> > +
> > +#define hostdata_session(_hostdata) (iscsi_ptr(*(unsigned long *)_hostdata))
> > +
> > +#define session_to_cls(_sess) 	hostdata_session(_sess->host->hostdata)
> > +
> > +
> > +
> > +
> > +/*
> > + * Function: bnx2i_register_xport
> > + * Description: this routine will allocate memory for SCSI host template,
> > + * 		iSCSI template and registers one instance of NX2 device with
> > + *		iSCSI Transport Kernel module.
> > + */
> > +int bnx2i_register_xport(struct bnx2i_hba *hba)
> > +{
> > +	void *mem_ptr;
> > +	char dev_id = '8';
> > +
> > +	get_bnx2_device(hba, dev_id);
> > +
> > +	mem_ptr = kmalloc(sizeof(struct scsi_host_template), GFP_KERNEL);
> > +	hba->scsi_template = mem_ptr;
> > +	if (hba->scsi_template == NULL) {
> > +		printk(KERN_ALERT "bnx2i: failed to alloc memory for sht\n");
> > +		return -ENOMEM;
> > +	}
> > +
> > +	mem_ptr = kmalloc(sizeof(struct iscsi_transport), GFP_KERNEL);
> > +	hba->iscsi_transport = mem_ptr;
> > +	if (hba->iscsi_transport == NULL) {
> > +		printk(KERN_ALERT "mem error for iscsi_transport template\n");
> > +		goto iscsi_xport_err;
> > +	}
> > +
> > +	mem_ptr = kmalloc(BRCM_ISCSI_XPORT_NAME_SIZE_MAX, GFP_KERNEL);
> > +	if (mem_ptr == NULL) {
> > +		printk(KERN_ALERT "failed to alloc memory for xport name\n");
> > +		goto scsi_name_mem_err;
> > +	}
> > +
> > +	memcpy(hba->scsi_template, (const void *) &bnx2i_host_template,
> > +	       sizeof(struct scsi_host_template));
> > +	hba->scsi_template->name = mem_ptr;
> > +	memcpy((void *) hba->scsi_template->name,
> > +	       (const void *) bnx2i_host_template.name,
> > +	       strlen(bnx2i_host_template.name) + 1);
> > +
> > +	mem_ptr = kmalloc(BRCM_ISCSI_XPORT_NAME_SIZE_MAX, GFP_KERNEL);
> > +	if (mem_ptr == NULL) {
> > +		printk(KERN_ALERT "failed to alloc proc name mem\n");
> > +		goto scsi_proc_name_mem_err;
> > +	}
> > +	hba->scsi_template->proc_name = mem_ptr;
> > +
> > +	memcpy((void *) hba->iscsi_transport,
> > +	       (const void *) &bnx2i_iscsi_transport,
> > +	       sizeof(struct iscsi_transport));
> > +
> > +	hba->iscsi_transport->host_template = hba->scsi_template;
> > +
> > +	mem_ptr = kmalloc(BRCM_ISCSI_XPORT_NAME_SIZE_MAX, GFP_KERNEL);
> > +	if (mem_ptr == NULL) {
> > +		printk(KERN_ALERT "mem alloc error, iscsi xport name\n");
> > +		goto xport_name_mem_err;
> > +	}
> > +	hba->iscsi_transport->name = mem_ptr;
> > +	sprintf(mem_ptr, "%s%c-%.2x%.2x%.2x", BRCM_ISCSI_XPORT_NAME_PREFIX,
> > +			 dev_id, (u8)hba->pci_dev->bus->number,
> > +			 hba->pci_devno, (u8)hba->pci_func);
> > +
> > +	memcpy((void *) hba->scsi_template->proc_name, (const void *) mem_ptr,
> > +	       strlen(mem_ptr) + 1);
> > +
> > +	hba->shost_template = iscsi_register_transport(hba->iscsi_transport);
> > +	if (!hba->shost_template) {
> > +		printk(KERN_ALERT "bnx2i: xport reg failed, hba 0x%p\n", hba);
> > +		goto failed_registration;
> > +	}
> 
> 
> The transport should be allocated like qla4xxx or iscsi_tcp/ib_iser. 
> Userspace then sets everything up based on usersettings like with 
> qla4xxx and iscsi_tcp and ib_iser.
> 
> I started that patch that I sent to you guys to modify 
> scsi_transport_iscsi abd libiscsi for you guys, but have not finished it 
> yet. I am trying to finish merging Olafs iscsi_tcp patches. Feel free to 
> change iscsid, scsi_transport_iscsi and libiscsi for me.
> 
> 
> > +}
> > +
> > +int bnx2i_sysfs_setup(void)
> > +{
> > +	int ret;
> > +	ret = class_register(&bnx2i_class);
> > +
> > +	bnx2i_register_port_class_dev(&port_class_dev);
> > +	return ret;
> > +}
> > +
> > +void bnx2i_sysfs_cleanup(void)
> > +{
> > +	class_device_unregister(&port_class_dev);
> > +	class_unregister(&bnx2i_class);
> > +}
> 
> The sysfs bits related to the hba should be use one of the scsi sysfs 
> facilities or if they are related to iscsi bits and are generic then 
> through the iscsi hba

bnx2i needs 2 sysfs entries -
1. QP size info - this is used to size per connection shared data
structures to issue work requests to chip (login, scsi cmd, tmf, nopin)
and get completions from the chip (scsi completions, async messages,
etc'). This is a iSCSI HBA attribute
2. port mapper - we can be more flexible on classifying this as either
iSCSI HBA attribute or bnx2i driver global attribute
Can hooks be added to iSCSI transport class to include these?


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
  2007-09-05 21:27   ` Anil Veerabhadrappa
@ 2007-09-07 22:23     ` Mike Christie
  2007-11-21 18:38       ` Anil Veerabhadrappa
  2007-09-08 11:59     ` Christoph Hellwig
  1 sibling, 1 reply; 27+ messages in thread
From: Mike Christie @ 2007-09-07 22:23 UTC (permalink / raw)
  To: Anil Veerabhadrappa
  Cc: Mike Christie, Michael Chan, davem, netdev, open-iscsi, talm,
	lusinsky, uri, SCSI Mailing List

Anil Veerabhadrappa wrote:
>>
>>> +
>>> +/* iSCSI stages */
>>> +#define ISCSI_STAGE_SECURITY_NEGOTIATION (0)
>>> +#define ISCSI_STAGE_LOGIN_OPERATIONAL_NEGOTIATION (1)
>>> +#define ISCSI_STAGE_FULL_FEATURE_PHASE (3)
>>> +/* Logout response codes */
>>> +#define ISCSI_LOGOUT_RESPONSE_CONNECTION_CLOSED (0)
>>> +#define ISCSI_LOGOUT_RESPONSE_CID_NOT_FOUND (1)
>>> +#define ISCSI_LOGOUT_RESPONSE_CLEANUP_FAILED (3)
>>> +
>>> +/* iSCSI task types */
>>> +#define ISCSI_TASK_TYPE_READ    (0)
>>> +#define ISCSI_TASK_TYPE_WRITE   (1)
>>> +#define ISCSI_TASK_TYPE_MPATH   (2)
>>
>>
>>
>> All of these iscsi code shoulds be in iscsi_proto.h or should be added 
>> there.
> This is a very tricky proposal as this header file is automatically
> generated by a well defined process and is shared between various driver
> supporting multiple platform/OS and the firmware. If it is not of a big
> issue I would like to keep it the way it is.

The values that are iscsi RFC values should come from the iscsi_proto.h 
file and not be duplicated for each driver.


>>> +/*
>>> + * hardware reset
>>> + */
>>> +int bnx2i_reset(struct scsi_cmnd *sc)
>>> +{
>>> +	return 0;
>>> +}
>>
>> So what is up with this one? It seems like if there is a way to reset 
>> hardware then you would want it as the scsi eh host reset callout 
>> instead of dropping the session. We could add some transport level 
>> recovery callouts for the iscsi specifics.
> 
> We may not be able to support HBA cold reset as bnx2 driver is the
> primary owner of chip reset and initialization. This is the drawback of
> sharing network interface with the NIC driver. If there is a need for
> administrator to reset the iSCSI port same can be achieved by running
> 'ifdown eth#' and 'ifup eth#'.
> Current driver even allows ethernet interface reset when there are
> active iSCSI connection, all active iscsi sessions will be reinstated
> when the network link comes back live
>  
> 

If you cannot support it or it does not make sense just remove the stub 
then. I say it is not a big deal now, but hopefully we do not hit fun 
like with qla3xxx and qla4xxx :)

>>> +
>>> +void bnx2i_sysfs_cleanup(void)
>>> +{
>>> +	class_device_unregister(&port_class_dev);
>>> +	class_unregister(&bnx2i_class);
>>> +}
>> The sysfs bits related to the hba should be use one of the scsi sysfs 
>> facilities or if they are related to iscsi bits and are generic then 
>> through the iscsi hba
> 
> bnx2i needs 2 sysfs entries -
> 1. QP size info - this is used to size per connection shared data
> structures to issue work requests to chip (login, scsi cmd, tmf, nopin)
> and get completions from the chip (scsi completions, async messages,
> etc'). This is a iSCSI HBA attribute
> 2. port mapper - we can be more flexible on classifying this as either
> iSCSI HBA attribute or bnx2i driver global attribute
> Can hooks be added to iSCSI transport class to include these?
> 

Which ones were they exactly? I think JamesB wanted only common 
transport values in the transport class. If it is driver specific then 
it should go on the host or target or device with the scsi_host_template 
attrs.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
       [not found] <1188599815.5176.12.camel@dell>
  2007-09-05 18:34 ` [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices Mike Christie
@ 2007-09-07 22:36 ` Mike Christie
  2007-09-08  7:41   ` FUJITA Tomonori
  1 sibling, 1 reply; 27+ messages in thread
From: Mike Christie @ 2007-09-07 22:36 UTC (permalink / raw)
  To: open-iscsi; +Cc: davem, mchristi, netdev, anilgv, talm, lusinsky, uri

Some quick comments that got cut out of the original mail.


> +
> +/*
> + * map single buffer
> + */
> +static int bnx2i_map_single_buf(struct bnx2i_hba *hba,
> +				       struct bnx2i_cmd *cmd)
> +{
> +	struct scsi_cmnd *sc = cmd->scsi_cmd;
> +	struct iscsi_bd *bd = cmd->bd_tbl->bd_tbl;
> +	int byte_count;
> +	int bd_count;
> +	u64 addr;
> +
> +	byte_count = sc->request_bufflen;
> +	sc->SCp.dma_handle =
> +		pci_map_single(hba->pci_dev, sc->request_buffer,
> +			       sc->request_bufflen, sc->sc_data_direction);
> +	addr = sc->SCp.dma_handle;
> +
> +	if (byte_count > MAX_BD_LENGTH) {
> +		bd_count = bnx2i_split_bd(cmd, addr, byte_count, 0);
> +	} else {
> +		bd_count = 1;
> +		bd[0].buffer_addr_lo = addr & 0xffffffff;
> +		bd[0].buffer_addr_hi = addr >> 32;
> +		bd[0].buffer_length = sc->request_bufflen;
> +		bd[0].flags = ISCSI_BD_FIRST_IN_BD_CHAIN |
> +			      ISCSI_BD_LAST_IN_BD_CHAIN;
> +	}
> +	bd[bd_count - 1].flags |= ISCSI_BD_LAST_IN_BD_CHAIN;
> +
> +	return bd_count;
> +}

I think you should always be getting use_sg greater than zero now, so 
the map single path is not needed.


> +
> +
> +/*
> + * map SG list
> + */
> +static int bnx2i_map_sg(struct bnx2i_hba *hba, struct bnx2i_cmd *cmd)
> +{
> +	struct scsi_cmnd *sc = cmd->scsi_cmd;
> +	struct iscsi_bd *bd = cmd->bd_tbl->bd_tbl;
> +	struct scatterlist *sg;
> +	int byte_count = 0;
> +	int sg_frags;
> +	int bd_count = 0;
> +	int sg_count;
> +	int sg_len;
> +	u64 addr;
> +	int i;
> +
> +	sg = sc->request_buffer;
> +	sg_count = pci_map_sg(hba->pci_dev, sg, sc->use_sg,
> +			      sc->sc_data_direction);
> +
> +	for (i = 0; i < sg_count; i++) {
> +		sg_len = sg_dma_len(sg);
> +		addr = sg_dma_address(sg);
> +		if (sg_len > MAX_BD_LENGTH)
> +			sg_frags = bnx2i_split_bd(cmd, addr, sg_len,
> +						  bd_count);


If you call blk_queue_max_segment_size() in the slave_configure callout 
you can limit the size of the segments that the block layer builds so 
they are smaller than MAX_BD_LENGTH. However, I am not sure how useful 
that is. I think DMA-API.txt states that the mapping code is ok to 
merged mutliple sglists entries into one so I think that means that we 
can still end up with an entry that is larger than MAX_BD_LENGTH. Not 
sure if there is way to tell the pci/dma map_sg code to limit this too.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
  2007-09-07 22:36 ` Mike Christie
@ 2007-09-08  7:41   ` FUJITA Tomonori
  2007-09-08 11:32     ` Jeff Garzik
  0 siblings, 1 reply; 27+ messages in thread
From: FUJITA Tomonori @ 2007-09-08  7:41 UTC (permalink / raw)
  To: open-iscsi
  Cc: davem, mchristi, netdev, anilgv, talm, lusinsky, uri,
	fujita.tomonori

On Fri, 07 Sep 2007 17:36:47 -0500
Mike Christie <michaelc@cs.wisc.edu> wrote:

> > +/*
> > + * map SG list
> > + */
> > +static int bnx2i_map_sg(struct bnx2i_hba *hba, struct bnx2i_cmd *cmd)
> > +{
> > +	struct scsi_cmnd *sc = cmd->scsi_cmd;
> > +	struct iscsi_bd *bd = cmd->bd_tbl->bd_tbl;
> > +	struct scatterlist *sg;
> > +	int byte_count = 0;
> > +	int sg_frags;
> > +	int bd_count = 0;
> > +	int sg_count;
> > +	int sg_len;
> > +	u64 addr;
> > +	int i;
> > +
> > +	sg = sc->request_buffer;
> > +	sg_count = pci_map_sg(hba->pci_dev, sg, sc->use_sg,
> > +			      sc->sc_data_direction);

Can you use scsi_dma_map() here?


> > +	for (i = 0; i < sg_count; i++) {
> > +		sg_len = sg_dma_len(sg);
> > +		addr = sg_dma_address(sg);
> > +		if (sg_len > MAX_BD_LENGTH)
> > +			sg_frags = bnx2i_split_bd(cmd, addr, sg_len,
> > +						  bd_count);

Please use scsi_for_each_sg().

You can't directly access to use_sg, request_buffer, request_bufflen,
and resid in scsi_cmnd structure. Please use the scsi data accessors
in scsi_cmnd.h: scsi_sg_count, scsi_sglist, scsi_bufflen,
scsi_set_resid, and scsi_get_resid.


> If you call blk_queue_max_segment_size() in the slave_configure callout 
> you can limit the size of the segments that the block layer builds so 
> they are smaller than MAX_BD_LENGTH. However, I am not sure how useful 
> that is. I think DMA-API.txt states that the mapping code is ok to 
> merged mutliple sglists entries into one so I think that means that we 
> can still end up with an entry that is larger than MAX_BD_LENGTH. Not 
> sure if there is way to tell the pci/dma map_sg code to limit this too.

Yeah, iommu code ignores the lld limitations (the problem is that the
lld limitations are in request_queue and iommu code can't access to
request_queue). There is no way to tell iommu code about the lld
limitations.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
  2007-09-08  7:41   ` FUJITA Tomonori
@ 2007-09-08 11:32     ` Jeff Garzik
  2007-09-08 12:00       ` Christoph Hellwig
  0 siblings, 1 reply; 27+ messages in thread
From: Jeff Garzik @ 2007-09-08 11:32 UTC (permalink / raw)
  To: FUJITA Tomonori
  Cc: open-iscsi, davem, mchristi, netdev, anilgv, talm, lusinsky, uri,
	fujita.tomonori

FUJITA Tomonori wrote:
> Yeah, iommu code ignores the lld limitations (the problem is that the
> lld limitations are in request_queue and iommu code can't access to
> request_queue). There is no way to tell iommu code about the lld
> limitations.


This fact very much wants fixing.

	Jeff



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
  2007-09-05 21:27   ` Anil Veerabhadrappa
  2007-09-07 22:23     ` Mike Christie
@ 2007-09-08 11:59     ` Christoph Hellwig
  2007-09-08 14:49       ` Michael Chan
  1 sibling, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2007-09-08 11:59 UTC (permalink / raw)
  To: Anil Veerabhadrappa
  Cc: Mike Christie, Michael Chan, davem, netdev, open-iscsi, talm,
	lusinsky, uri, SCSI Mailing List

On Wed, Sep 05, 2007 at 02:27:02PM -0700, Anil Veerabhadrappa wrote:
> This is a very tricky proposal as this header file is automatically
> generated by a well defined process and is shared between various driver
> supporting multiple platform/OS and the firmware. If it is not of a big
> issue I would like to keep it the way it is.

Most of it should just go away, and the other bits shouldn't change over
the lifetime of the driver except for additions.  So there really isn't
any point in auto-generating it.


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
  2007-09-08 11:32     ` Jeff Garzik
@ 2007-09-08 12:00       ` Christoph Hellwig
  2007-09-09 15:05         ` FUJITA Tomonori
  0 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2007-09-08 12:00 UTC (permalink / raw)
  To: Jeff Garzik
  Cc: FUJITA Tomonori, open-iscsi, davem, mchristi, netdev, anilgv,
	talm, lusinsky, uri, fujita.tomonori

On Sat, Sep 08, 2007 at 07:32:27AM -0400, Jeff Garzik wrote:
> FUJITA Tomonori wrote:
> >Yeah, iommu code ignores the lld limitations (the problem is that the
> >lld limitations are in request_queue and iommu code can't access to
> >request_queue). There is no way to tell iommu code about the lld
> >limitations.
> 
> 
> This fact very much wants fixing.


Absolutely.  Unfortunately everyone wastes their time on creating workarounds
instead of fixing the underlying problem.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
  2007-09-08 11:59     ` Christoph Hellwig
@ 2007-09-08 14:49       ` Michael Chan
  2007-09-08 17:57         ` Anil Veerabhadrappa
  0 siblings, 1 reply; 27+ messages in thread
From: Michael Chan @ 2007-09-08 14:49 UTC (permalink / raw)
  To: Christoph Hellwig, Anil Veerabhadrappa
  Cc: Mike Christie, davem, netdev, open-iscsi, Tal Moyal,
	Robert Lusinsky, Uri Elzur, SCSI Mailing List

Christoph Hellwig wrote:

> Most of it should just go away, and the other bits shouldn't 
> change over
> the lifetime of the driver except for additions.  So there 
> really isn't
> any point in auto-generating it.
> 

Yes, I agree with Mike Christie on this.  These values in
question are defined in iSCSI RFC and therefore should be defined
in a common file.


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
  2007-09-08 14:49       ` Michael Chan
@ 2007-09-08 17:57         ` Anil Veerabhadrappa
  0 siblings, 0 replies; 27+ messages in thread
From: Anil Veerabhadrappa @ 2007-09-08 17:57 UTC (permalink / raw)
  To: Michael Chan
  Cc: Christoph Hellwig, Mike Christie, davem, netdev, open-iscsi,
	Tal Moyal, Robert Lusinsky, Uri Elzur, SCSI Mailing List

On Sat, 2007-09-08 at 07:49 -0700, Michael Chan wrote:
> Christoph Hellwig wrote:
> 
> > Most of it should just go away, and the other bits shouldn't 
> > change over
> > the lifetime of the driver except for additions.  So there 
> > really isn't
> > any point in auto-generating it.
> > 
> 
> Yes, I agree with Mike Christie on this.  These values in
> question are defined in iSCSI RFC and therefore should be defined
> in a common file. 

Sure, we will remove these duplicate defines from bnx2i header file and
work of iscsi_proto.h macro definitions


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
  2007-09-08 12:00       ` Christoph Hellwig
@ 2007-09-09 15:05         ` FUJITA Tomonori
  2007-09-25  8:39           ` Hannes Reinecke
  0 siblings, 1 reply; 27+ messages in thread
From: FUJITA Tomonori @ 2007-09-09 15:05 UTC (permalink / raw)
  To: hch
  Cc: jeff, tomof, open-iscsi, davem, mchristi, netdev, anilgv, talm,
	lusinsky, uri, fujita.tomonori

On Sat, 8 Sep 2007 13:00:36 +0100
Christoph Hellwig <hch@infradead.org> wrote:

> On Sat, Sep 08, 2007 at 07:32:27AM -0400, Jeff Garzik wrote:
> > FUJITA Tomonori wrote:
> > >Yeah, iommu code ignores the lld limitations (the problem is that the
> > >lld limitations are in request_queue and iommu code can't access to
> > >request_queue). There is no way to tell iommu code about the lld
> > >limitations.
> > 
> > 
> > This fact very much wants fixing.
> 
> 
> Absolutely.  Unfortunately everyone wastes their time on creating workarounds
> instead of fixing the underlying problem.

Any ideas on how to fix this?

I chatted to Jens and James on this last week.

- we could just copies the lld limitations to device structure. it's
hacky but device structure already has hacky stuff.

- we could just link device structure to request_queue structure so
that iommu code can see request_queue structure.

- we could remove the lld limitations in request_queue strucutre and
have a new strucutre (something like struct io_restrictions). then
somehow we could link the new structure with request_queue and device
strucutres.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
  2007-09-09 15:05         ` FUJITA Tomonori
@ 2007-09-25  8:39           ` Hannes Reinecke
  2007-09-26  8:57             ` FUJITA Tomonori
  0 siblings, 1 reply; 27+ messages in thread
From: Hannes Reinecke @ 2007-09-25  8:39 UTC (permalink / raw)
  To: tomof
  Cc: open-iscsi, hch, jeff, davem, mchristi, netdev, anilgv, talm,
	lusinsky, uri, fujita.tomonori

Hi Tomo,

FUJITA Tomonori wrote:
> On Sat, 8 Sep 2007 13:00:36 +0100
> Christoph Hellwig <hch@infradead.org> wrote:
> 
>> On Sat, Sep 08, 2007 at 07:32:27AM -0400, Jeff Garzik wrote:
>>> FUJITA Tomonori wrote:
>>>> Yeah, iommu code ignores the lld limitations (the problem is that the
>>>> lld limitations are in request_queue and iommu code can't access to
>>>> request_queue). There is no way to tell iommu code about the lld
>>>> limitations.
>>>
>>> This fact very much wants fixing.
>>
>> Absolutely.  Unfortunately everyone wastes their time on creating workarounds
>> instead of fixing the underlying problem.
> 
> Any ideas on how to fix this?
> 
> I chatted to Jens and James on this last week.
> 
> - we could just copies the lld limitations to device structure. it's
> hacky but device structure already has hacky stuff.
> 
> - we could just link device structure to request_queue structure so
> that iommu code can see request_queue structure.
> 
> - we could remove the lld limitations in request_queue strucutre and
> have a new strucutre (something like struct io_restrictions). then
> somehow we could link the new structure with request_queue and device
> strucutres.
> 
I'd prefer the latter. These struct io_restrictions could then be used
by dm (which has it's own version right now) to merge queue capabilities.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		      zSeries & Storage
hare@suse.de			      +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Markus Rex, HRB 16746 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
  2007-09-25  8:39           ` Hannes Reinecke
@ 2007-09-26  8:57             ` FUJITA Tomonori
  2007-09-27  7:31               ` Jeff Garzik
  0 siblings, 1 reply; 27+ messages in thread
From: FUJITA Tomonori @ 2007-09-26  8:57 UTC (permalink / raw)
  To: hare
  Cc: tomof, open-iscsi, hch, jeff, davem, mchristi, netdev, anilgv,
	talm, lusinsky, uri, fujita.tomonori

On Tue, 25 Sep 2007 10:39:17 +0200
Hannes Reinecke <hare@suse.de> wrote:

> Hi Tomo,
> 
> FUJITA Tomonori wrote:
> > On Sat, 8 Sep 2007 13:00:36 +0100
> > Christoph Hellwig <hch@infradead.org> wrote:
> > 
> >> On Sat, Sep 08, 2007 at 07:32:27AM -0400, Jeff Garzik wrote:
> >>> FUJITA Tomonori wrote:
> >>>> Yeah, iommu code ignores the lld limitations (the problem is that the
> >>>> lld limitations are in request_queue and iommu code can't access to
> >>>> request_queue). There is no way to tell iommu code about the lld
> >>>> limitations.
> >>>
> >>> This fact very much wants fixing.
> >>
> >> Absolutely.  Unfortunately everyone wastes their time on creating workarounds
> >> instead of fixing the underlying problem.
> > 
> > Any ideas on how to fix this?
> > 
> > I chatted to Jens and James on this last week.
> > 
> > - we could just copies the lld limitations to device structure. it's
> > hacky but device structure already has hacky stuff.
> > 
> > - we could just link device structure to request_queue structure so
> > that iommu code can see request_queue structure.
> > 
> > - we could remove the lld limitations in request_queue strucutre and
> > have a new strucutre (something like struct io_restrictions). then
> > somehow we could link the new structure with request_queue and device
> > strucutres.
> > 
> I'd prefer the latter. These struct io_restrictions could then be used
> by dm (which has it's own version right now) to merge queue capabilities.

Yeah, we could nicely handle lld's restrictions (especially with
stacking devices). But iommu code needs only max_segment_size and
seg_boundary_mask, right? If so, the first simple approach to add two
values to device structure is not so bad, I think.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
  2007-09-26  8:57             ` FUJITA Tomonori
@ 2007-09-27  7:31               ` Jeff Garzik
  2007-09-27  7:38                 ` Benjamin Herrenschmidt
  2007-09-27  8:06                 ` FUJITA Tomonori
  0 siblings, 2 replies; 27+ messages in thread
From: Jeff Garzik @ 2007-09-27  7:31 UTC (permalink / raw)
  To: FUJITA Tomonori
  Cc: hare, open-iscsi, hch, davem, mchristi, netdev, anilgv, talm,
	lusinsky, uri, fujita.tomonori, Benjamin Herrenschmidt

FUJITA Tomonori wrote:
> Yeah, we could nicely handle lld's restrictions (especially with
> stacking devices). But iommu code needs only max_segment_size and
> seg_boundary_mask, right? If so, the first simple approach to add two
> values to device structure is not so bad, I think.

(replying to slightly older email in the thread)
(added benh, since we've discussed this issue in the past)

dumb question, what happened to seg_boundary_mask?

If you look at drivers/ata/libata-core.c:ata_fill_sg(), you will note 
that we split s/g segments after DMA-mapping.  Looking at libata LLDD's, 
you will also note judicious use of ATA_DMA_BOUNDARY (0xffff).

It was drilled into my head by James and benh that I cannot rely on the 
DMA boundary + block/scsi + dma_map_sg() to ensure that my S/G segments 
never cross a 64K boundary, a legacy IDE requirement.  Thus the 
additional code in ata_fill_sg() to split S/G segments straddling 64K, 
in addition to setting dma boundary to 0xffff.

A key problem I was hoping would be solved with your work here was the 
elimination of that post dma_map_sg() split.

If I understood James and Ben correctly, one of the key problems was 
always in communicating libata's segment boundary needs to the IOMMU layers?

	Jeff



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
  2007-09-27  7:31               ` Jeff Garzik
@ 2007-09-27  7:38                 ` Benjamin Herrenschmidt
  2007-09-27  7:49                   ` Jeff Garzik
  2007-09-27  8:06                 ` FUJITA Tomonori
  1 sibling, 1 reply; 27+ messages in thread
From: Benjamin Herrenschmidt @ 2007-09-27  7:38 UTC (permalink / raw)
  To: Jeff Garzik
  Cc: FUJITA Tomonori, hare, open-iscsi, hch, davem, mchristi, netdev,
	anilgv, talm, lusinsky, uri, fujita.tomonori


On Thu, 2007-09-27 at 03:31 -0400, Jeff Garzik wrote:
> A key problem I was hoping would be solved with your work here was
> the 
> elimination of that post dma_map_sg() split.
> 
> If I understood James and Ben correctly, one of the key problems was 
> always in communicating libata's segment boundary needs to the IOMMU
> layers?

Yup. If we can put some constraint in struct device that the dma mapping
code can then look at ... we also need to ensure that what's passed in
for DMA'ing already matches those constraints as well since no-iommu
platforms will basically just keep the dma table as-is.

Ben.



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
  2007-09-27  7:38                 ` Benjamin Herrenschmidt
@ 2007-09-27  7:49                   ` Jeff Garzik
  2007-09-27  8:12                     ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 27+ messages in thread
From: Jeff Garzik @ 2007-09-27  7:49 UTC (permalink / raw)
  To: benh
  Cc: FUJITA Tomonori, hare, open-iscsi, hch, davem, mchristi, netdev,
	anilgv, talm, lusinsky, uri, fujita.tomonori

Benjamin Herrenschmidt wrote:
> On Thu, 2007-09-27 at 03:31 -0400, Jeff Garzik wrote:
>> A key problem I was hoping would be solved with your work here was
>> the 
>> elimination of that post dma_map_sg() split.
>>
>> If I understood James and Ben correctly, one of the key problems was 
>> always in communicating libata's segment boundary needs to the IOMMU
>> layers?
> 
> Yup. If we can put some constraint in struct device that the dma mapping
> code can then look at ... we also need to ensure that what's passed in
> for DMA'ing already matches those constraints as well since no-iommu
> platforms will basically just keep the dma table as-is.

That's a good point...  no-iommu platforms would need to be updated to 
do the split for me.  I suppose we can steal that code from swiotlb or 
somewhere.

	Jeff




^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
  2007-09-27  7:31               ` Jeff Garzik
  2007-09-27  7:38                 ` Benjamin Herrenschmidt
@ 2007-09-27  8:06                 ` FUJITA Tomonori
  2007-09-27  8:23                   ` Jeff Garzik
  1 sibling, 1 reply; 27+ messages in thread
From: FUJITA Tomonori @ 2007-09-27  8:06 UTC (permalink / raw)
  To: jeff
  Cc: tomof, hare, open-iscsi, hch, davem, mchristi, netdev, anilgv,
	talm, lusinsky, uri, fujita.tomonori, benh, jens.axboe,
	James.Bottomley, linux-scsi

CC'ed Jens, James, and linux-scsi.

On Thu, 27 Sep 2007 03:31:55 -0400
Jeff Garzik <jeff@garzik.org> wrote:

> FUJITA Tomonori wrote:
> > Yeah, we could nicely handle lld's restrictions (especially with
> > stacking devices). But iommu code needs only max_segment_size and
> > seg_boundary_mask, right? If so, the first simple approach to add two
> > values to device structure is not so bad, I think.
> 
> (replying to slightly older email in the thread)
> (added benh, since we've discussed this issue in the past)
> 
> dumb question, what happened to seg_boundary_mask?

I'll work on it too after finishing max_seg_size.


> If you look at drivers/ata/libata-core.c:ata_fill_sg(), you will note 
> that we split s/g segments after DMA-mapping.  Looking at libata LLDD's, 
> you will also note judicious use of ATA_DMA_BOUNDARY (0xffff).

I know the workaround since I fixed libata's sg chaining patch.


> It was drilled into my head by James and benh that I cannot rely on the 
> DMA boundary + block/scsi + dma_map_sg() to ensure that my S/G segments 
> never cross a 64K boundary, a legacy IDE requirement.  Thus the 
> additional code in ata_fill_sg() to split S/G segments straddling 64K, 
> in addition to setting dma boundary to 0xffff.

I think that the block layer can handle both max_segment_size and
seg_boundary_mask properly (and SCSI-ml just uses the block layer). So
if we fix iommu, then we can remove a workaround to fix sg lists in
llds.


> A key problem I was hoping would be solved with your work here was the 
> elimination of that post dma_map_sg() split.

Yeah, that's my goal too.


> If I understood James and Ben correctly, one of the key problems was 
> always in communicating libata's segment boundary needs to the IOMMU layers?
> 
> 	Jeff
> 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
  2007-09-27  7:49                   ` Jeff Garzik
@ 2007-09-27  8:12                     ` Benjamin Herrenschmidt
  2007-09-27  8:22                       ` Jeff Garzik
  0 siblings, 1 reply; 27+ messages in thread
From: Benjamin Herrenschmidt @ 2007-09-27  8:12 UTC (permalink / raw)
  To: Jeff Garzik
  Cc: FUJITA Tomonori, hare, open-iscsi, hch, davem, mchristi, netdev,
	anilgv, talm, lusinsky, uri, fujita.tomonori


On Thu, 2007-09-27 at 03:49 -0400, Jeff Garzik wrote:
> Benjamin Herrenschmidt wrote:
> > On Thu, 2007-09-27 at 03:31 -0400, Jeff Garzik wrote:
> >> A key problem I was hoping would be solved with your work here was
> >> the 
> >> elimination of that post dma_map_sg() split.
> >>
> >> If I understood James and Ben correctly, one of the key problems was 
> >> always in communicating libata's segment boundary needs to the IOMMU
> >> layers?
> > 
> > Yup. If we can put some constraint in struct device that the dma mapping
> > code can then look at ... we also need to ensure that what's passed in
> > for DMA'ing already matches those constraints as well since no-iommu
> > platforms will basically just keep the dma table as-is.
> 
> That's a good point...  no-iommu platforms would need to be updated to 
> do the split for me.  I suppose we can steal that code from swiotlb or 
> somewhere.

Doing the split means being able to grow the sglist... which the dma_*
calls can't do at least not in their current form.

Ben.



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
  2007-09-27  8:12                     ` Benjamin Herrenschmidt
@ 2007-09-27  8:22                       ` Jeff Garzik
  2007-09-27  8:46                         ` FUJITA Tomonori
  0 siblings, 1 reply; 27+ messages in thread
From: Jeff Garzik @ 2007-09-27  8:22 UTC (permalink / raw)
  To: benh
  Cc: FUJITA Tomonori, hare, open-iscsi, hch, davem, mchristi, netdev,
	anilgv, talm, lusinsky, uri, fujita.tomonori

Benjamin Herrenschmidt wrote:
> On Thu, 2007-09-27 at 03:49 -0400, Jeff Garzik wrote:
>> Benjamin Herrenschmidt wrote:
>>> On Thu, 2007-09-27 at 03:31 -0400, Jeff Garzik wrote:
>>>> A key problem I was hoping would be solved with your work here was
>>>> the 
>>>> elimination of that post dma_map_sg() split.
>>>>
>>>> If I understood James and Ben correctly, one of the key problems was 
>>>> always in communicating libata's segment boundary needs to the IOMMU
>>>> layers?
>>> Yup. If we can put some constraint in struct device that the dma mapping
>>> code can then look at ... we also need to ensure that what's passed in
>>> for DMA'ing already matches those constraints as well since no-iommu
>>> platforms will basically just keep the dma table as-is.
>> That's a good point...  no-iommu platforms would need to be updated to 
>> do the split for me.  I suppose we can steal that code from swiotlb or 
>> somewhere.
> 
> Doing the split means being able to grow the sglist... which the dma_*
> calls can't do at least not in their current form.

IMO one straightforward approach is for the struct scatterlist owner to 
provide a table large enough to accomodate the possible splits (perhaps 
along with communicate that table's max size to the IOMMU/dma layers).

	Jeff



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
  2007-09-27  8:06                 ` FUJITA Tomonori
@ 2007-09-27  8:23                   ` Jeff Garzik
  0 siblings, 0 replies; 27+ messages in thread
From: Jeff Garzik @ 2007-09-27  8:23 UTC (permalink / raw)
  To: FUJITA Tomonori
  Cc: hare, open-iscsi, hch, davem, mchristi, netdev, anilgv, talm,
	lusinsky, uri, fujita.tomonori, benh, jens.axboe, James.Bottomley,
	linux-scsi

FUJITA Tomonori wrote:
> CC'ed Jens, James, and linux-scsi.
> 
> On Thu, 27 Sep 2007 03:31:55 -0400
> Jeff Garzik <jeff@garzik.org> wrote:
> 
>> FUJITA Tomonori wrote:
>>> Yeah, we could nicely handle lld's restrictions (especially with
>>> stacking devices). But iommu code needs only max_segment_size and
>>> seg_boundary_mask, right? If so, the first simple approach to add two
>>> values to device structure is not so bad, I think.
>> (replying to slightly older email in the thread)
>> (added benh, since we've discussed this issue in the past)
>>
>> dumb question, what happened to seg_boundary_mask?
> 
> I'll work on it too after finishing max_seg_size.
> 
> 
>> If you look at drivers/ata/libata-core.c:ata_fill_sg(), you will note 
>> that we split s/g segments after DMA-mapping.  Looking at libata LLDD's, 
>> you will also note judicious use of ATA_DMA_BOUNDARY (0xffff).
> 
> I know the workaround since I fixed libata's sg chaining patch.
> 
> 
>> It was drilled into my head by James and benh that I cannot rely on the 
>> DMA boundary + block/scsi + dma_map_sg() to ensure that my S/G segments 
>> never cross a 64K boundary, a legacy IDE requirement.  Thus the 
>> additional code in ata_fill_sg() to split S/G segments straddling 64K, 
>> in addition to setting dma boundary to 0xffff.
> 
> I think that the block layer can handle both max_segment_size and
> seg_boundary_mask properly (and SCSI-ml just uses the block layer). So
> if we fix iommu, then we can remove a workaround to fix sg lists in
> llds.
> 
> 
>> A key problem I was hoping would be solved with your work here was the 
>> elimination of that post dma_map_sg() split.
> 
> Yeah, that's my goal too.

Great :)  Well, I'm generally happy with your max-seg-size stuff (sans 
the minor nits I pointed out in another email).

Thanks for pursuing this,

	Jeff




^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
  2007-09-27  8:22                       ` Jeff Garzik
@ 2007-09-27  8:46                         ` FUJITA Tomonori
  0 siblings, 0 replies; 27+ messages in thread
From: FUJITA Tomonori @ 2007-09-27  8:46 UTC (permalink / raw)
  To: jeff
  Cc: benh, tomof, hare, open-iscsi, hch, davem, mchristi, netdev,
	anilgv, talm, lusinsky, uri, fujita.tomonori, jens.axboe,
	James.Bottomley, linux-scsi

CC'ed Jens, James, and linux-scsi again.

On Thu, 27 Sep 2007 04:22:15 -0400
Jeff Garzik <jeff@garzik.org> wrote:

> Benjamin Herrenschmidt wrote:
> > On Thu, 2007-09-27 at 03:49 -0400, Jeff Garzik wrote:
> >> Benjamin Herrenschmidt wrote:
> >>> On Thu, 2007-09-27 at 03:31 -0400, Jeff Garzik wrote:
> >>>> A key problem I was hoping would be solved with your work here was
> >>>> the 
> >>>> elimination of that post dma_map_sg() split.
> >>>>
> >>>> If I understood James and Ben correctly, one of the key problems was 
> >>>> always in communicating libata's segment boundary needs to the IOMMU
> >>>> layers?
> >>> Yup. If we can put some constraint in struct device that the dma mapping
> >>> code can then look at ... we also need to ensure that what's passed in
> >>> for DMA'ing already matches those constraints as well since no-iommu
> >>> platforms will basically just keep the dma table as-is.
> >> That's a good point...  no-iommu platforms would need to be updated to 
> >> do the split for me.  I suppose we can steal that code from swiotlb or 
> >> somewhere.
> > 
> > Doing the split means being able to grow the sglist... which the dma_*
> > calls can't do at least not in their current form.
> 
> IMO one straightforward approach is for the struct scatterlist owner to 
> provide a table large enough to accomodate the possible splits (perhaps 
> along with communicate that table's max size to the IOMMU/dma layers).

As I said in another mail, the block layer and scsi-ml work properly,
I think. So there is no need to split sg lists for no-iommu platforms.

We need to fix only iommu code merge sglists (already done) for the
segment size restriction but we need to fix all iommu code and swiotlb
for the segment boundary restriction. Splitting sg list might be
useful for the case that iommu can't find a proper boundary memory
area. But I think that it rarely happens (and there are few llds has
the boundary restriction).

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
  2007-09-07 22:23     ` Mike Christie
@ 2007-11-21 18:38       ` Anil Veerabhadrappa
  2007-11-21 19:17         ` James Smart
       [not found]         ` <1195670296.8767.9.camel-opBMJL+S1+mb6IhXEaeG+wpgy58w7zIFpWgKQ6/u3Fg@public.gmane.org>
  0 siblings, 2 replies; 27+ messages in thread
From: Anil Veerabhadrappa @ 2007-11-21 18:38 UTC (permalink / raw)
  To: Mike Christie
  Cc: Mike Christie, Michael Chan, davem, netdev, open-iscsi, talm,
	lusinsky, uri, SCSI Mailing List


> >> The sysfs bits related to the hba should be use one of the scsi sysfs 
> >> facilities or if they are related to iscsi bits and are generic then 
> >> through the iscsi hba
> > 
> > bnx2i needs 2 sysfs entries -
> > 1. QP size info - this is used to size per connection shared data
> > structures to issue work requests to chip (login, scsi cmd, tmf, nopin)
> > and get completions from the chip (scsi completions, async messages,
> > etc'). This is a iSCSI HBA attribute
> > 2. port mapper - we can be more flexible on classifying this as either
> > iSCSI HBA attribute or bnx2i driver global attribute
> > Can hooks be added to iSCSI transport class to include these?
> > 
> 
> Which ones were they exactly? I think JamesB wanted only common 
> transport values in the transport class. If it is driver specific then 
> it should go on the host or target or device with the scsi_host_template 
> attrs.
> 

It's a chicken & egg issue to put "port mapper" sysfs entry in scsi host
attributes. Application won't see sysfs unless initiator creates an
iSCSI session and driver can't create an iSCSI session without a tcp
port. I was wondering if there is a better way than using IOCTL in this
situation?


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
  2007-11-21 18:38       ` Anil Veerabhadrappa
@ 2007-11-21 19:17         ` James Smart
       [not found]         ` <1195670296.8767.9.camel-opBMJL+S1+mb6IhXEaeG+wpgy58w7zIFpWgKQ6/u3Fg@public.gmane.org>
  1 sibling, 0 replies; 27+ messages in thread
From: James Smart @ 2007-11-21 19:17 UTC (permalink / raw)
  To: Anil Veerabhadrappa
  Cc: Mike Christie, Mike Christie, Michael Chan, davem, netdev,
	open-iscsi, talm, lusinsky, uri, SCSI Mailing List


Anil Veerabhadrappa wrote:
> It's a chicken & egg issue to put "port mapper" sysfs entry in scsi host
> attributes. Application won't see sysfs unless initiator creates an
> iSCSI session and driver can't create an iSCSI session without a tcp
> port. I was wondering if there is a better way than using IOCTL in this
> situation?

Agree, and IMHO, is why the scsi_host should have been bound to the ISID
or something similar (e.g. the initiator "port" that can have 1 or more
sessions), and the session bound to the scsi_target under the scsi_host.

-- james s

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
       [not found]         ` <1195670296.8767.9.camel-opBMJL+S1+mb6IhXEaeG+wpgy58w7zIFpWgKQ6/u3Fg@public.gmane.org>
@ 2007-11-27  4:15           ` Mike Christie
  2007-11-28  0:44             ` Anil Veerabhadrappa
  0 siblings, 1 reply; 27+ messages in thread
From: Mike Christie @ 2007-11-27  4:15 UTC (permalink / raw)
  To: Anil Veerabhadrappa
  Cc: Mike Christie, Michael Chan, davem-fT/PcQaiUtIeIZ0/mPfg9Q,
	netdev-u79uwXL29TY76Z2rM5mHXA, open-iscsi-/JYPxA39Uh5TLH3MbocFFw,
	talm-dY08KVG/lbpWk0Htik3J/w, lusinsky-dY08KVG/lbpWk0Htik3J/w,
	uri-dY08KVG/lbpWk0Htik3J/w, SCSI Mailing List


Anil Veerabhadrappa wrote:
>>>> The sysfs bits related to the hba should be use one of the scsi sysfs 
>>>> facilities or if they are related to iscsi bits and are generic then 
>>>> through the iscsi hba
>>> bnx2i needs 2 sysfs entries -
>>> 1. QP size info - this is used to size per connection shared data
>>> structures to issue work requests to chip (login, scsi cmd, tmf, nopin)
>>> and get completions from the chip (scsi completions, async messages,
>>> etc'). This is a iSCSI HBA attribute
>>> 2. port mapper - we can be more flexible on classifying this as either
>>> iSCSI HBA attribute or bnx2i driver global attribute
>>> Can hooks be added to iSCSI transport class to include these?
>>>
>> Which ones were they exactly? I think JamesB wanted only common 
>> transport values in the transport class. If it is driver specific then 
>> it should go on the host or target or device with the scsi_host_template 
>> attrs.
>>
> 
> It's a chicken & egg issue to put "port mapper" sysfs entry in scsi host
> attributes. Application won't see sysfs unless initiator creates an

Sorry for the late response. I was on vacation.

That is only with how you coded it today. I asked you to do something 
like qla4xxx where the session and host are not so closely bound.

> iSCSI session and driver can't create an iSCSI session without a tcp

That is not right with how things are today even. The iscsi_session 
struct can be created before the tcp connection. This was done because 
we thought we were going to have to use only sysfs for all setup and 
management (we ended up netlink and sysfs though).

> port. I was wondering if there is a better way than using IOCTL in this
> situation?
> 
> -
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups "open-iscsi" group.
To post to this group, send email to open-iscsi-/JYPxA39Uh5TLH3MbocFFw@public.gmane.org
To unsubscribe from this group, send email to open-iscsi-unsubscribe-/JYPxA39Uh5TLH3MbocFFw@public.gmane.org
For more options, visit this group at http://groups.google.com/group/open-iscsi
-~----------~----~----~----~------~----~------~--~---

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
  2007-11-27  4:15           ` Mike Christie
@ 2007-11-28  0:44             ` Anil Veerabhadrappa
       [not found]               ` <1196210691.5980.20.camel-opBMJL+S1+mb6IhXEaeG+20Cxg0+/0ngpWgKQ6/u3Fg@public.gmane.org>
  0 siblings, 1 reply; 27+ messages in thread
From: Anil Veerabhadrappa @ 2007-11-28  0:44 UTC (permalink / raw)
  To: Mike Christie
  Cc: Mike Christie, Michael Chan, davem, netdev, open-iscsi, talm,
	lusinsky, uri, SCSI Mailing List


> >> Which ones were they exactly? I think JamesB wanted only common 
> >> transport values in the transport class. If it is driver specific then 
> >> it should go on the host or target or device with the scsi_host_template 
> >> attrs.
> >>
> > 
> > It's a chicken & egg issue to put "port mapper" sysfs entry in scsi host
> > attributes. Application won't see sysfs unless initiator creates an
> 
> Sorry for the late response. I was on vacation.
> 
> That is only with how you coded it today. I asked you to do something 
> like qla4xxx where the session and host are not so closely bound.

bnx2i is still using scsi host per iscsi session model, we plan to
migrate to scsi host per device model pretty soon. Previously you
indicated that you are working on this port, can you please share the
code? We will build on your work and bring this to closure.


> 
> > iSCSI session and driver can't create an iSCSI session without a tcp
> 
> That is not right with how things are today even. The iscsi_session 
> struct can be created before the tcp connection. This was done because 
> we thought we were going to have to use only sysfs for all setup and 
> management (we ended up netlink and sysfs though).

You are right, software is flexible enough to allow creation of
session/connection and endpoint independently but so far user daemon
creates TCP endpoint before iSCSI session and bind them all together.
Any changes to this scheme will not be compatible with existing
distributions




^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
       [not found]               ` <1196210691.5980.20.camel-opBMJL+S1+mb6IhXEaeG+20Cxg0+/0ngpWgKQ6/u3Fg@public.gmane.org>
@ 2007-11-28 20:06                 ` Mike Christie
  2007-11-29  0:36                   ` Anil Veerabhadrappa
  0 siblings, 1 reply; 27+ messages in thread
From: Mike Christie @ 2007-11-28 20:06 UTC (permalink / raw)
  To: open-iscsi-/JYPxA39Uh5TLH3MbocFFw
  Cc: Mike Christie, Michael Chan, davem-fT/PcQaiUtIeIZ0/mPfg9Q,
	netdev-u79uwXL29TY76Z2rM5mHXA, talm-dY08KVG/lbpWk0Htik3J/w,
	lusinsky-dY08KVG/lbpWk0Htik3J/w, uri-dY08KVG/lbpWk0Htik3J/w,
	SCSI Mailing List


Anil Veerabhadrappa wrote:
> 
>>>> Which ones were they exactly? I think JamesB wanted only common 
>>>> transport values in the transport class. If it is driver specific then 
>>>> it should go on the host or target or device with the scsi_host_template 
>>>> attrs.
>>>>
>>> It's a chicken & egg issue to put "port mapper" sysfs entry in scsi host
>>> attributes. Application won't see sysfs unless initiator creates an
>> Sorry for the late response. I was on vacation.
>>
>> That is only with how you coded it today. I asked you to do something 
>> like qla4xxx where the session and host are not so closely bound.
> 
> bnx2i is still using scsi host per iscsi session model, we plan to
> migrate to scsi host per device model pretty soon. Previously you
> indicated that you are working on this port, can you please share the
> code? We will build on your work and bring this to closure.

Send me a link to your code so I can rebuild my stuff against it.

> 
> 
>>> iSCSI session and driver can't create an iSCSI session without a tcp
>> That is not right with how things are today even. The iscsi_session 
>> struct can be created before the tcp connection. This was done because 
>> we thought we were going to have to use only sysfs for all setup and 
>> management (we ended up netlink and sysfs though).
> 
> You are right, software is flexible enough to allow creation of
> session/connection and endpoint independently but so far user daemon
> creates TCP endpoint before iSCSI session and bind them all together.
> Any changes to this scheme will not be compatible with existing
> distributions
> 

So what do you need to do exactly? You want userspace to be able to set 
some session or connection value, then have it used for that session, 
right? We have been using the netlink interface for that. You would 
basically pass iscsiadm some value (on cmdline or in some file/db), then 
when the session and connection is being created we will pass those 
values in. Values that are used after we are in FFP, like abort 
timeouts, are passed in the set_param command. If a value is needed for 
the session/connection setup then we were passing that in with the 
command (like create_session has the queue depth sent with it). To add 
new values (driver specific and common ones) and have the interface 
compatible with older versions should not be too difficult.

--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups "open-iscsi" group.
To post to this group, send email to open-iscsi-/JYPxA39Uh5TLH3MbocFFw@public.gmane.org
To unsubscribe from this group, send email to open-iscsi-unsubscribe-/JYPxA39Uh5TLH3MbocFFw@public.gmane.org
For more options, visit this group at http://groups.google.com/group/open-iscsi
-~----------~----~----~----~------~----~------~--~---

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
  2007-11-28 20:06                 ` Mike Christie
@ 2007-11-29  0:36                   ` Anil Veerabhadrappa
  0 siblings, 0 replies; 27+ messages in thread
From: Anil Veerabhadrappa @ 2007-11-29  0:36 UTC (permalink / raw)
  To: open-iscsi
  Cc: Mike Christie, Michael Chan, davem, netdev, talm, lusinsky, uri,
	SCSI Mailing List

On Wed, 2007-11-28 at 14:06 -0600, Mike Christie wrote:
> Anil Veerabhadrappa wrote:
> > 
> >>>> Which ones were they exactly? I think JamesB wanted only common 
> >>>> transport values in the transport class. If it is driver specific then 
> >>>> it should go on the host or target or device with the scsi_host_template 
> >>>> attrs.
> >>>>
> >>> It's a chicken & egg issue to put "port mapper" sysfs entry in scsi host
> >>> attributes. Application won't see sysfs unless initiator creates an
> >> Sorry for the late response. I was on vacation.
> >>
> >> That is only with how you coded it today. I asked you to do something 
> >> like qla4xxx where the session and host are not so closely bound.
> > 
> > bnx2i is still using scsi host per iscsi session model, we plan to
> > migrate to scsi host per device model pretty soon. Previously you
> > indicated that you are working on this port, can you please share the
> > code? We will build on your work and bring this to closure.
> 
> Send me a link to your code so I can rebuild my stuff against it.


ftp://Net_sys_anon@ftp1.broadcom.com/bnx2iscsi-2.6.25.diff



> > 
> > You are right, software is flexible enough to allow creation of
> > session/connection and endpoint independently but so far user daemon
> > creates TCP endpoint before iSCSI session and bind them all together.
> > Any changes to this scheme will not be compatible with existing
> > distributions
> > 
> 
> So what do you need to do exactly? You want userspace to be able to set 
> some session or connection value, then have it used for that session, 
> right? We have been using the netlink interface for that. You would 
> basically pass iscsiadm some value (on cmdline or in some file/db), then 
> when the session and connection is being created we will pass those 
> values in. Values that are used after we are in FFP, like abort 
> timeouts, are passed in the set_param command. If a value is needed for 
> the session/connection setup then we were passing that in with the 
> command (like create_session has the queue depth sent with it). To add 
> new values (driver specific and common ones) and have the interface 
> compatible with older versions should not be too difficult.

wondering how netlink interface can be expanded to assist hardware
vendor's in configuring and debugging offload devices. In addition to
the current issue we might use this interface for other device
configuration (e.g. administrator will have to shrink per connection
shared queue size if a large number of iSCSI connections is required)
and debugging purpose(e.g. get iSCSI connection context dump when an
error is detected on a connection). Former is a synchronous call while
later in most cases will complete asynchronously. This is how I image
the flow - 
vendor_config_apps --> iscsid --> netlink --> iscsi_transport --> driver

Also passing configuration info via cmd line or file/db will only work
for parameter types which are more like user preferences. I don't think
we will be able to specify a dynamic value using this mechanism, e.g.
port number could be different for 2 consecutive iscsi login to same
target. This will also break "-L all" interface, right?



> 
> --~--~---------~--~----~------------~-------~--~----~
> You received this message because you are subscribed to the Google Groups "open-iscsi" group.
> To post to this group, send email to open-iscsi@googlegroups.com
> To unsubscribe from this group, send email to open-iscsi-unsubscribe@googlegroups.com
> For more options, visit this group at http://groups.google.com/group/open-iscsi
> -~----------~----~----~----~------~----~------~--~---
> 
> 


^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2007-11-29  0:36 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <1188599815.5176.12.camel@dell>
2007-09-05 18:34 ` [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices Mike Christie
2007-09-05 21:27   ` Anil Veerabhadrappa
2007-09-07 22:23     ` Mike Christie
2007-11-21 18:38       ` Anil Veerabhadrappa
2007-11-21 19:17         ` James Smart
     [not found]         ` <1195670296.8767.9.camel-opBMJL+S1+mb6IhXEaeG+wpgy58w7zIFpWgKQ6/u3Fg@public.gmane.org>
2007-11-27  4:15           ` Mike Christie
2007-11-28  0:44             ` Anil Veerabhadrappa
     [not found]               ` <1196210691.5980.20.camel-opBMJL+S1+mb6IhXEaeG+20Cxg0+/0ngpWgKQ6/u3Fg@public.gmane.org>
2007-11-28 20:06                 ` Mike Christie
2007-11-29  0:36                   ` Anil Veerabhadrappa
2007-09-08 11:59     ` Christoph Hellwig
2007-09-08 14:49       ` Michael Chan
2007-09-08 17:57         ` Anil Veerabhadrappa
2007-09-07 22:36 ` Mike Christie
2007-09-08  7:41   ` FUJITA Tomonori
2007-09-08 11:32     ` Jeff Garzik
2007-09-08 12:00       ` Christoph Hellwig
2007-09-09 15:05         ` FUJITA Tomonori
2007-09-25  8:39           ` Hannes Reinecke
2007-09-26  8:57             ` FUJITA Tomonori
2007-09-27  7:31               ` Jeff Garzik
2007-09-27  7:38                 ` Benjamin Herrenschmidt
2007-09-27  7:49                   ` Jeff Garzik
2007-09-27  8:12                     ` Benjamin Herrenschmidt
2007-09-27  8:22                       ` Jeff Garzik
2007-09-27  8:46                         ` FUJITA Tomonori
2007-09-27  8:06                 ` FUJITA Tomonori
2007-09-27  8:23                   ` Jeff Garzik

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).