Re: [PATCH v5] scsi:spraid: initial commit of Ramaxel spraid driver

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Yanling Song <songyl@ramaxel.com>
To: John Garry <john.garry@huawei.com>
Cc: <martin.petersen@oracle.com>, <linux-scsi@vger.kernel.org>,
	<bvanassche@acm.org>
Subject: Re: [PATCH v5] scsi:spraid: initial commit of Ramaxel spraid driver
Date: Wed, 30 Mar 2022 10:54:09 +0800	[thread overview]
Message-ID: <20220330105409.0000636e@ramaxel.com> (raw)
In-Reply-To: <9932f421-638f-091b-f311-68d46966dcfe@huawei.com>

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="GB18030", Size: 102693 bytes --]

On Thu, 24 Mar 2022 16:34:06 +0000
John Garry <john.garry@huawei.com> wrote:

> On 14/03/2022 02:53, Yanling Song wrote:
> 
> just some more comments...
> 
> > +static int spraid_setup_prps(struct spraid_dev *hdev, struct
> > spraid_iod *iod) +{
> > +	struct scatterlist *sg = iod->sg;
> > +	u64 dma_addr = sg_dma_address(sg);
> > +	int dma_len = sg_dma_len(sg);
> > +	__le64 *prp_list, *old_prp_list;
> > +	u32 page_size = hdev->page_size;
> > +	int offset = dma_addr & (page_size - 1);
> > +	void **list = spraid_iod_list(iod);
> > +	int length = iod->length;
> > +	struct dma_pool *pool;
> > +	dma_addr_t prp_dma;
> > +	int nprps, i;
> > +
> > +	length -= (page_size - offset);
> > +	if (length <= 0) {
> > +		iod->first_dma = 0;
> > +		return 0;
> > +	}
> > +
> > +	dma_len -= (page_size - offset);
> > +	if (dma_len) {
> > +		dma_addr += (page_size - offset);
> > +	} else {
> > +		sg = sg_next(sg);
> > +		dma_addr = sg_dma_address(sg);
> > +		dma_len = sg_dma_len(sg);
> > +	}
> > +
> > +	if (length <= page_size) {
> > +		iod->first_dma = dma_addr;
> > +		return 0;
> > +	}
> > +
> > +	nprps = DIV_ROUND_UP(length, page_size);
> > +	if (nprps <= (SMALL_POOL_SIZE / PRP_ENTRY_SIZE)) {
> > +		pool = iod->spraidq->prp_small_pool;
> > +		iod->npages = 0;
> > +	} else {
> > +		pool = hdev->prp_page_pool;
> > +		iod->npages = 1;
> > +	}
> > +
> > +	prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);  
> 
> why atomic?
> 
It is in IO path and if using GFP_KERNEL, it may sleep and impact on
the performance.

> > +	if (!prp_list) {
> > +		dev_err_ratelimited(hdev->dev, "Allocate first
> > prp_list memory failed\n");  
> 
> ratelimited - do you really expect this to fail so often?
> 
Will use dev_err to replace all dev_err_ratelimited.

> > +		iod->first_dma = dma_addr;
> > +		iod->npages = -1;
> > +		return -ENOMEM;
> > +	}
> > +	list[0] = prp_list;
> > +	iod->first_dma = prp_dma;
> > +	i = 0;
> > +	for (;;) {
> > +		if (i == page_size / PRP_ENTRY_SIZE) {
> > +			old_prp_list = prp_list;
> > +
> > +			prp_list = dma_pool_alloc(pool,
> > GFP_ATOMIC, &prp_dma);
> > +			if (!prp_list) {
> > +				dev_err_ratelimited(hdev->dev,
> > "Allocate %dth prp_list memory failed\n",
> > +						    iod->npages +
> > 1);
> > +				return -ENOMEM;
> > +			}
> > +			list[iod->npages++] = prp_list;
> > +			prp_list[0] = old_prp_list[i - 1];
> > +			old_prp_list[i - 1] = cpu_to_le64(prp_dma);
> > +			i = 1;
> > +		}
> > +		prp_list[i++] = cpu_to_le64(dma_addr);
> > +		dma_len -= page_size;
> > +		dma_addr += page_size;
> > +		length -= page_size;
> > +		if (length <= 0)
> > +			break;
> > +		if (dma_len > 0)
> > +			continue;
> > +		if (unlikely(dma_len < 0))
> > +			goto bad_sgl;
> > +		sg = sg_next(sg);
> > +		dma_addr = sg_dma_address(sg);
> > +		dma_len = sg_dma_len(sg);
> > +	}
> > +  
> 
> this all looks so difficult to follow...
> 
The code is to fill PRP during IO. We uses the way of NVME to transfer
commands and do DMA.

> > +	return 0;
> > +
> > +bad_sgl:
> > +	dev_err(hdev->dev, "Setup prps, invalid SGL for payload:
> > %d nents: %d\n",
> > +		iod->length, iod->nsge);
> > +	return -EIO;
> > +}
> > +
> > +#define SGES_PER_PAGE    (PAGE_SIZE / sizeof(struct
> > spraid_sgl_desc)) +
> > +static void spraid_submit_cmd(struct spraid_queue *spraidq, const
> > void *cmd)  
> 
> can you avoid passing void pointers around like this? cmd is a struct 
> spraid_ioq_command, right?
> 
It also can be a struct spraid_admin_command.

> > +{
> > +	u32 sqes = SQE_SIZE(spraidq->qid);
> > +	unsigned long flags;
> > +	struct spraid_admin_common_command *acd = (struct
> > spraid_admin_common_command *)cmd; +
> > +	spin_lock_irqsave(&spraidq->sq_lock, flags);
> > +	memcpy((spraidq->sq_cmds + sqes * spraidq->sq_tail), cmd,
> > sqes);
> > +	if (++spraidq->sq_tail == spraidq->q_depth)
> > +		spraidq->sq_tail = 0;
> > +
> > +	writel(spraidq->sq_tail, spraidq->q_db);  
> 
> why not __relaxed variant?
> 
Because we need barrier here: Action of memcpy has to be executed
completely before doorbell is ringed.

> > +	spin_unlock_irqrestore(&spraidq->sq_lock, flags);
> > +
> > +	pr_debug("cid[%d], qid[%d], opcode[0x%x], flags[0x%x],
> > hdid[%u]\n",
> > +		    acd->command_id, spraidq->qid, acd->opcode,
> > acd->flags, le32_to_cpu(acd->hdid));  
> 
> do you really think that this debug statement is useful? I guess that
> if it was enabled then it would swamp the console
> 
Yes, it is useful when tracking which commands are sent from host. By
default, it is disabled.

> > +}
> > +
> > +static u32 spraid_mod64(u64 dividend, u32 divisor)
> > +{
> > +	u64 d;
> > +	u32 remainder;
> > +
> > +	if (!divisor)
> > +		pr_err("DIVISOR is zero, in div fn\n");
> > +
> > +	d = dividend;
> > +	remainder = do_div(d, divisor);
> > +	return remainder;
> > +}  
> 
> you should not have your own functions just to do a divide
> 
Will use another way in the next version.
> > +
> > +static inline bool spraid_is_rw_scmd(struct scsi_cmnd *scmd)
> > +{
> > +	switch (scmd->cmnd[0]) {
> > +	case READ_6:
> > +	case READ_10:
> > +	case READ_12:
> > +	case READ_16:
> > +	case READ_32:
> > +	case WRITE_6:
> > +	case WRITE_10:
> > +	case WRITE_12:
> > +	case WRITE_16:
> > +	case WRITE_32:
> > +		return true;
> > +	default:
> > +		return false;
> > +	}
> > +}  
> 
> this looks exact same as 
> https://elixir.bootlin.com/linux/latest/source/drivers/scsi/pmcraid.h#L993 
> and other places - I think that there should be a common version of
> this or other standard SCSI core way to decide.
> 
Your link does not include read_32/write_32 and there is no such
standard core function which has the same function as ours.

> > +
> > +static bool spraid_is_prp(struct spraid_dev *hdev, struct
> > scsi_cmnd *scmd, u32 nsge) +{
> > +	struct scatterlist *sg = scsi_sglist(scmd);
> > +	u32 page_size = hdev->page_size;
> > +	bool is_prp = true;
> > +	int i = 0;
> > +
> > +	scsi_for_each_sg(scmd, sg, nsge, i) {
> > +		if (i != 0 && i != nsge - 1) {
> > +			if (spraid_mod64(sg_dma_len(sg),
> > page_size) ||
> > +			    spraid_mod64(sg_dma_address(sg),
> > page_size)) {
> > +				is_prp = false;
> > +				break;
> > +			}
> > +		}
> > +
> > +		if (nsge > 1 && i == 0) {
> > +			if ((spraid_mod64((sg_dma_address(sg) +
> > sg_dma_len(sg)), page_size))) {
> > +				is_prp = false;
> > +				break;
> > +			}
> > +		}
> > +
> > +		if (nsge > 1 && i == (nsge - 1)) {
> > +			if (spraid_mod64(sg_dma_address(sg),
> > page_size)) {
> > +				is_prp = false;
> > +				break;
> > +			}
> > +		}
> > +	}
> > +
> > +	return is_prp;
> > +}
> > +
> > +enum {
> > +	SPRAID_SGL_FMT_DATA_DESC     = 0x00,
> > +	SPRAID_SGL_FMT_SEG_DESC      = 0x02,
> > +	SPRAID_SGL_FMT_LAST_SEG_DESC    = 0x03,
> > +	SPRAID_KEY_SGL_FMT_DATA_DESC    = 0x04,
> > +	SPRAID_TRANSPORT_SGL_DATA_DESC  = 0x05
> > +};
> > +
> > +static void spraid_sgl_set_data(struct spraid_sgl_desc *sge,
> > struct scatterlist *sg) +{
> > +	sge->addr = cpu_to_le64(sg_dma_address(sg));
> > +	sge->length = cpu_to_le32(sg_dma_len(sg));
> > +	sge->type = SPRAID_SGL_FMT_DATA_DESC << 4;
> > +}
> > +
> > +static void spraid_sgl_set_seg(struct spraid_sgl_desc *sge,
> > dma_addr_t dma_addr, int entries) +{
> > +	sge->addr = cpu_to_le64(dma_addr);
> > +	if (entries <= SGES_PER_PAGE) {
> > +		sge->length = cpu_to_le32(entries * sizeof(*sge));
> > +		sge->type = SPRAID_SGL_FMT_LAST_SEG_DESC << 4;
> > +	} else {
> > +		sge->length = cpu_to_le32(PAGE_SIZE);
> > +		sge->type = SPRAID_SGL_FMT_SEG_DESC << 4;
> > +	}
> > +}
> > +
> > +static int spraid_setup_ioq_cmd_sgl(struct spraid_dev *hdev,
> > +				    struct scsi_cmnd *scmd, struct
> > spraid_ioq_command *ioq_cmd,
> > +				    struct spraid_iod *iod)
> > +{
> > +	struct spraid_sgl_desc *sg_list, *link, *old_sg_list;
> > +	struct scatterlist *sg = scsi_sglist(scmd);
> > +	void **list = spraid_iod_list(iod);
> > +	struct dma_pool *pool;
> > +	int nsge = iod->nsge;
> > +	dma_addr_t sgl_dma;
> > +	int i = 0;
> > +
> > +	ioq_cmd->common.flags |= SPRAID_CMD_FLAG_SGL_METABUF;
> > +
> > +	if (nsge == 1) {
> > +		spraid_sgl_set_data(&ioq_cmd->common.dptr.sgl, sg);
> > +		return 0;
> > +	}
> > +
> > +	if (nsge <= (SMALL_POOL_SIZE / sizeof(struct
> > spraid_sgl_desc))) {
> > +		pool = iod->spraidq->prp_small_pool;
> > +		iod->npages = 0;
> > +	} else {
> > +		pool = hdev->prp_page_pool;
> > +		iod->npages = 1;
> > +	}
> > +
> > +	sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma);
> > +	if (!sg_list) {
> > +		dev_err_ratelimited(hdev->dev, "Allocate first
> > sgl_list failed\n");
> > +		iod->npages = -1;
> > +		return -ENOMEM;
> > +	}
> > +
> > +	list[0] = sg_list;
> > +	iod->first_dma = sgl_dma;
> > +	spraid_sgl_set_seg(&ioq_cmd->common.dptr.sgl, sgl_dma,
> > nsge);
> > +	do {
> > +		if (i == SGES_PER_PAGE) {
> > +			old_sg_list = sg_list;
> > +			link = &old_sg_list[SGES_PER_PAGE - 1];
> > +
> > +			sg_list = dma_pool_alloc(pool, GFP_ATOMIC,
> > &sgl_dma);
> > +			if (!sg_list) {
> > +				dev_err_ratelimited(hdev->dev,
> > "Allocate %dth sgl_list failed\n",
> > +						    iod->npages +
> > 1);
> > +				return -ENOMEM;
> > +			}
> > +			list[iod->npages++] = sg_list;
> > +
> > +			i = 0;
> > +			memcpy(&sg_list[i++], link, sizeof(*link));
> > +			spraid_sgl_set_seg(link, sgl_dma, nsge);
> > +		}
> > +
> > +		spraid_sgl_set_data(&sg_list[i++], sg);
> > +		sg = sg_next(sg);
> > +	} while (--nsge > 0);
> > +
> > +	return 0;
> > +}
> > +
> > +#define SPRAID_RW_FUA	BIT(14)
> > +
> > +static void spraid_setup_rw_cmd(struct spraid_dev *hdev,
> > +				struct spraid_rw_command *rw,
> > +				struct scsi_cmnd *scmd)
> > +{
> > +	u32 start_lba_lo, start_lba_hi;
> > +	u32 datalength = 0;
> > +	u16 control = 0;
> > +
> > +	start_lba_lo = 0;
> > +	start_lba_hi = 0;
> > +
> > +	if (scmd->sc_data_direction == DMA_TO_DEVICE) {
> > +		rw->opcode = SPRAID_CMD_WRITE;
> > +	} else if (scmd->sc_data_direction == DMA_FROM_DEVICE) {
> > +		rw->opcode = SPRAID_CMD_READ;  
> 
> what's the point of this? I mean, why not just check 
> scmd->sc_data_direction where you check rw->opcode?
> 
Do you mean checking scmd->sc_data_direction  in spraid_is_rw_scmd()?
They are created for different purposes. 
spraid_is_rw_scmd() is used to tell IO/managment commands from host,
spraid_setup_rw_cmd() is to handle IO(rw) commands,
scmd->sc_data_direction is used to tell the data direction of IO.

> > +	} else {
> > +		dev_err(hdev->dev, "Invalid IO for unsupported
> > data direction: %d\n",
> > +			scmd->sc_data_direction);
> > +		WARN_ON(1);
> > +	}
> > +
> > +	/* 6-byte READ(0x08) or WRITE(0x0A) cdb */
> > +	if (scmd->cmd_len == 6) {
> > +		datalength = (u32)(scmd->cmnd[4] == 0 ?
> > +				IO_6_DEFAULT_TX_LEN :
> > scmd->cmnd[4]);
> > +		start_lba_lo =
> > (u32)get_unaligned_be24(&scmd->cmnd[1]); +
> > +		start_lba_lo &= 0x1FFFFF;
> > +	}
> > +
> > +	/* 10-byte READ(0x28) or WRITE(0x2A) cdb */
> > +	else if (scmd->cmd_len == 10) {
> > +		datalength =
> > (u32)get_unaligned_be16(&scmd->cmnd[7]);
> > +		start_lba_lo = get_unaligned_be32(&scmd->cmnd[2]);
> > +
> > +		if (scmd->cmnd[1] & FUA_MASK)
> > +			control |= SPRAID_RW_FUA;
> > +	}
> > +
> > +	/* 12-byte READ(0xA8) or WRITE(0xAA) cdb */
> > +	else if (scmd->cmd_len == 12) {
> > +		datalength = get_unaligned_be32(&scmd->cmnd[6]);
> > +		start_lba_lo = get_unaligned_be32(&scmd->cmnd[2]);
> > +
> > +		if (scmd->cmnd[1] & FUA_MASK)
> > +			control |= SPRAID_RW_FUA;
> > +	}
> > +	/* 16-byte READ(0x88) or WRITE(0x8A) cdb */
> > +	else if (scmd->cmd_len == 16) {
> > +		datalength = get_unaligned_be32(&scmd->cmnd[10]);
> > +		start_lba_lo = get_unaligned_be32(&scmd->cmnd[6]);
> > +		start_lba_hi = get_unaligned_be32(&scmd->cmnd[2]);
> > +
> > +		if (scmd->cmnd[1] & FUA_MASK)
> > +			control |= SPRAID_RW_FUA;
> > +	}
> > +	/* 32-byte READ(0x88) or WRITE(0x8A) cdb */
> > +	else if (scmd->cmd_len == 32) {
> > +		datalength = get_unaligned_be32(&scmd->cmnd[28]);
> > +		start_lba_lo = get_unaligned_be32(&scmd->cmnd[16]);
> > +		start_lba_hi = get_unaligned_be32(&scmd->cmnd[12]);
> > +
> > +		if (scmd->cmnd[10] & FUA_MASK)
> > +			control |= SPRAID_RW_FUA;
> > +	}
> > +
> > +	if (unlikely(datalength > U16_MAX || datalength == 0)) {
> > +		dev_err(hdev->dev, "Invalid IO for illegal
> > transfer data length: %u\n",
> > +			datalength);
> > +		WARN_ON(1);
> > +	}
> > +
> > +	rw->slba = cpu_to_le64(((u64)start_lba_hi << 32) |
> > start_lba_lo);
> > +	/* 0base for nlb */
> > +	rw->nlb = cpu_to_le16((u16)(datalength - 1));
> > +	rw->control = cpu_to_le16(control);
> > +}
> > +
> > +static void spraid_setup_nonio_cmd(struct spraid_dev *hdev,
> > +				   struct spraid_scsi_nonio
> > *scsi_nonio, struct scsi_cmnd *scmd) +{
> > +	scsi_nonio->buffer_len = cpu_to_le32(scsi_bufflen(scmd));
> > +
> > +	switch (scmd->sc_data_direction) {
> > +	case DMA_NONE:
> > +		scsi_nonio->opcode = SPRAID_CMD_NONIO_NONE;
> > +		break;
> > +	case DMA_TO_DEVICE:
> > +		scsi_nonio->opcode = SPRAID_CMD_NONIO_TODEV;
> > +		break;
> > +	case DMA_FROM_DEVICE:
> > +		scsi_nonio->opcode = SPRAID_CMD_NONIO_FROMDEV;
> > +		break;
> > +	default:
> > +		dev_err(hdev->dev, "Invalid IO for unsupported
> > data direction: %d\n",
> > +			scmd->sc_data_direction);
> > +		WARN_ON(1);
> > +	}
> > +}
> > +
> > +static void spraid_setup_ioq_cmd(struct spraid_dev *hdev,
> > +				 struct spraid_ioq_command
> > *ioq_cmd, struct scsi_cmnd *scmd) +{
> > +	memcpy(ioq_cmd->common.cdb, scmd->cmnd, scmd->cmd_len);
> > +	ioq_cmd->common.cdb_len = scmd->cmd_len;
> > +
> > +	if (spraid_is_rw_scmd(scmd))
> > +		spraid_setup_rw_cmd(hdev, &ioq_cmd->rw, scmd);
> > +	else
> > +		spraid_setup_nonio_cmd(hdev, &ioq_cmd->scsi_nonio,
> > scmd); +}
> > +
> > +static int spraid_init_iod(struct spraid_dev *hdev,
> > +			   struct spraid_iod *iod, struct
> > spraid_ioq_command *ioq_cmd,
> > +			   struct scsi_cmnd *scmd)
> > +{
> > +	if (unlikely(!iod->sense)) {  
> 
> you seem to set iod->sense before this function is called - is this
> just paranoia?
> 
Will delete this sentence in the next version.

> > +		dev_err(hdev->dev, "Allocate sense data buffer
> > failed\n");  
> 
> hmmm... you did not try to allocate anything here, which the print
> would imply
> 
Will delete this sentence in the next version.

> > +		return -ENOMEM;
> > +	}
> > +	ioq_cmd->common.sense_addr = cpu_to_le64(iod->sense_dma);
> > +	ioq_cmd->common.sense_len =
> > cpu_to_le16(SCSI_SENSE_BUFFERSIZE); +
> > +	iod->nsge = 0;
> > +	iod->npages = -1;
> > +	iod->use_sgl = 0;
> > +	iod->sg_drv_mgmt = false;
> > +	WRITE_ONCE(iod->state, SPRAID_CMD_IDLE);
> > +
> > +	return 0;
> > +}
> > +
> > +static void spraid_free_iod_res(struct spraid_dev *hdev, struct
> > spraid_iod *iod) +{
> > +	const int last_prp = hdev->page_size / sizeof(__le64) - 1;
> > +	dma_addr_t dma_addr, next_dma_addr;
> > +	struct spraid_sgl_desc *sg_list;
> > +	__le64 *prp_list;
> > +	void *addr;
> > +	int i;
> > +
> > +	dma_addr = iod->first_dma;
> > +	if (iod->npages == 0)
> > +		dma_pool_free(iod->spraidq->prp_small_pool,
> > spraid_iod_list(iod)[0], dma_addr); +
> > +	for (i = 0; i < iod->npages; i++) {
> > +		addr = spraid_iod_list(iod)[i];
> > +
> > +		if (iod->use_sgl) {
> > +			sg_list = addr;
> > +			next_dma_addr =
> > +				le64_to_cpu((sg_list[SGES_PER_PAGE
> > - 1]).addr);
> > +		} else {
> > +			prp_list = addr;
> > +			next_dma_addr =
> > le64_to_cpu(prp_list[last_prp]);
> > +		}
> > +
> > +		dma_pool_free(hdev->prp_page_pool, addr, dma_addr);
> > +		dma_addr = next_dma_addr;
> > +	}
> > +
> > +	if (iod->sg_drv_mgmt && iod->sg != iod->inline_sg) {
> > +		iod->sg_drv_mgmt = false;
> > +		mempool_free(iod->sg, hdev->iod_mempool);
> > +	}
> > +
> > +	iod->sense = NULL;
> > +	iod->npages = -1;
> > +}
> > +
> > +static int spraid_io_map_data(struct spraid_dev *hdev, struct
> > spraid_iod *iod,
> > +			      struct scsi_cmnd *scmd, struct
> > spraid_ioq_command *ioq_cmd) +{
> > +	int ret;
> > +
> > +	iod->nsge = scsi_dma_map(scmd);
> > +
> > +	/* No data to DMA, it may be scsi no-rw command */  
> 
> if it may be a no-rw then would it be better to avoid the dma map 
> altogether?
> 
When nsge is 0, scsi_dma_map() does not do the real action of dma map
and just return 0. So there is no difference between our implementation
and your suggestion.

> > +	if (unlikely(iod->nsge == 0))
> > +		return 0;
> > +
> > +	iod->length = scsi_bufflen(scmd);
> > +	iod->sg = scsi_sglist(scmd);
> > +	iod->use_sgl = !spraid_is_prp(hdev, scmd, iod->nsge);
> > +
> > +	if (iod->use_sgl) {
> > +		ret = spraid_setup_ioq_cmd_sgl(hdev, scmd,
> > ioq_cmd, iod);
> > +	} else {
> > +		ret = spraid_setup_prps(hdev, iod);
> > +		ioq_cmd->common.dptr.prp1 =
> > +
> > cpu_to_le64(sg_dma_address(iod->sg));
> > +		ioq_cmd->common.dptr.prp2 =
> > cpu_to_le64(iod->first_dma);
> > +	}
> > +
> > +	if (ret)
> > +		scsi_dma_unmap(scmd);
> > +
> > +	return ret;
> > +}
> > +
> > +static void spraid_map_status(struct spraid_iod *iod, struct
> > scsi_cmnd *scmd,
> > +			      struct spraid_completion *cqe)
> > +{
> > +	scsi_set_resid(scmd, 0);
> > +
> > +	switch ((le16_to_cpu(cqe->status) >> 1) & 0x7f) {
> > +	case FW_STAT_OK:
> > +		set_host_byte(scmd, DID_OK);
> > +		break;
> > +	case FW_STAT_NEED_CHECK:
> > +		set_host_byte(scmd, DID_OK);
> > +		scmd->result |= le16_to_cpu(cqe->status) >> 8;
> > +		if (scmd->result & SAM_STAT_CHECK_CONDITION) {
> > +			memset(scmd->sense_buffer, 0,
> > SCSI_SENSE_BUFFERSIZE);
> > +			memcpy(scmd->sense_buffer, iod->sense,
> > SCSI_SENSE_BUFFERSIZE);
> > +			scmd->result = (scmd->result & 0x00ffffff)
> > | (DRIVER_SENSE << 24);
> > +		}
> > +		break;
> > +	case FW_STAT_ABORTED:
> > +		set_host_byte(scmd, DID_ABORT);
> > +		break;
> > +	case FW_STAT_NEED_RETRY:
> > +		set_host_byte(scmd, DID_REQUEUE);
> > +		break;
> > +	default:
> > +		set_host_byte(scmd, DID_BAD_TARGET);
> > +		break;
> > +	}
> > +}
> > +
> > +static inline void spraid_get_tag_from_scmd(struct scsi_cmnd
> > *scmd, u16 *qid, u16 *cid) +{
> > +	u32 tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
> > +
> > +	*qid = blk_mq_unique_tag_to_hwq(tag) + 1;  
> 
> why + 1? is it related to some admin queue?
> 
0 is the ID of admin queue. IO queue starts from 1.

> > +	*cid = blk_mq_unique_tag_to_tag(tag);
> > +}
> > +
> > +static int spraid_queue_command(struct Scsi_Host *shost, struct
> > scsi_cmnd *scmd) +{
> > +	struct spraid_iod *iod = scsi_cmd_priv(scmd);
> > +	struct spraid_dev *hdev = shost_priv(shost);
> > +	struct scsi_device *sdev = scmd->device;
> > +	struct spraid_sdev_hostdata *hostdata;
> > +	struct spraid_ioq_command ioq_cmd;
> > +	struct spraid_queue *ioq;
> > +	unsigned long elapsed;
> > +	u16 hwq, cid;
> > +	int ret;
> > +
> > +	if (unlikely(!scmd)) {  
> 
> why would this ever be NULL?
> 
will be deleted in the next version.

> > +		dev_err(hdev->dev, "err, scmd is null, return
> > 0\n");
> > +		return 0;
> > +	}
> > +
> > +	if (unlikely(hdev->state != SPRAID_LIVE)) {
> > +		set_host_byte(scmd, DID_NO_CONNECT);
> > +		scsi_done(scmd);
> > +		dev_err(hdev->dev, "[%s] err, hdev state is not
> > live\n", __func__);
> > +		return 0;
> > +	}
> > +
> > +	spraid_get_tag_from_scmd(scmd, &hwq, &cid);
> > +	hostdata = sdev->hostdata;
> > +	ioq = &hdev->queues[hwq];
> > +	memset(&ioq_cmd, 0, sizeof(ioq_cmd));
> > +	ioq_cmd.rw.hdid = cpu_to_le32(hostdata->hdid);
> > +	ioq_cmd.rw.command_id = cid;
> > +
> > +	spraid_setup_ioq_cmd(hdev, &ioq_cmd, scmd);
> > +
> > +	ret = cid * SCSI_SENSE_BUFFERSIZE;
> > +	iod->sense = ioq->sense + ret;
> > +	iod->sense_dma = ioq->sense_dma_addr + ret;
> > +
> > +	ret = spraid_init_iod(hdev, iod, &ioq_cmd, scmd);
> > +	if (unlikely(ret))
> > +		return SCSI_MLQUEUE_HOST_BUSY;
> > +
> > +	iod->spraidq = ioq;
> > +	ret = spraid_io_map_data(hdev, iod, scmd, &ioq_cmd);
> > +	if (unlikely(ret)) {
> > +		dev_err(hdev->dev, "spraid_io_map_data Err.\n");
> > +		set_host_byte(scmd, DID_ERROR);
> > +		scsi_done(scmd);
> > +		ret = 0;
> > +		goto deinit_iod;
> > +	}
> > +
> > +	WRITE_ONCE(iod->state, SPRAID_CMD_IN_FLIGHT);
> > +	spraid_submit_cmd(ioq, &ioq_cmd);
> > +	elapsed = jiffies - scmd->jiffies_at_alloc;
> > +	pr_debug("cid[%d], qid[%d] submit IO cost %3ld.%3ld
> > seconds\n",
> > +		    cid, hwq, elapsed / HZ, elapsed % HZ);
> > +	return 0;
> > +
> > +deinit_iod:
> > +	spraid_free_iod_res(hdev, iod);
> > +	return ret;
> > +}
> > +
> > +static int spraid_match_dev(struct spraid_dev *hdev, u16 idx,
> > struct scsi_device *sdev) +{
> > +	if (SPRAID_DEV_INFO_FLAG_VALID(hdev->devices[idx].flag)) {
> > +		if (sdev->channel == hdev->devices[idx].channel &&
> > +		    sdev->id ==
> > le16_to_cpu(hdev->devices[idx].target) &&
> > +		    sdev->lun < hdev->devices[idx].lun) {
> > +			dev_info(hdev->dev, "Match device success,
> > channel:target:lun[%d:%d:%d]\n",
> > +				 hdev->devices[idx].channel,
> > +				 hdev->devices[idx].target,
> > +				 hdev->devices[idx].lun);
> > +			return 1;
> > +		}
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int spraid_slave_alloc(struct scsi_device *sdev)
> > +{
> > +	struct spraid_sdev_hostdata *hostdata;
> > +	struct spraid_dev *hdev;
> > +	u16 idx;
> > +
> > +	hdev = shost_priv(sdev->host);
> > +	hostdata = kzalloc(sizeof(*hostdata), GFP_KERNEL);
> > +	if (!hostdata) {
> > +		dev_err(hdev->dev, "Alloc scsi host data memory
> > failed\n");  
> 
> you don't need to print message when kmalloc fails
> 
I did not get your point. Why does error message is not needed?

> > +		return -ENOMEM;
> > +	}
> > +
> > +	down_read(&hdev->devices_rwsem);
> > +	for (idx = 0; idx < le32_to_cpu(hdev->ctrl_info->nd);
> > idx++) {
> > +		if (spraid_match_dev(hdev, idx, sdev))
> > +			goto scan_host;
> > +	}
> > +	up_read(&hdev->devices_rwsem);
> > +
> > +	kfree(hostdata);
> > +	return -ENXIO;
> > +
> > +scan_host:
> > +	hostdata->hdid = le32_to_cpu(hdev->devices[idx].hdid);
> > +	sdev->hostdata = hostdata;
> > +	up_read(&hdev->devices_rwsem);
> > +	return 0;  
> 
> we normally put the error exit at the bottom
> 
Ok. Change will be included in the next version.

> > +}
> > +
> > +static void spraid_slave_destroy(struct scsi_device *sdev)
> > +{
> > +	kfree(sdev->hostdata);
> > +	sdev->hostdata = NULL;
> > +}
> > +
> > +static int spraid_slave_configure(struct scsi_device *sdev)
> > +{
> > +	u16 idx;
> > +	unsigned int timeout = SCMD_TMOUT_RAWDISK * HZ;
> > +	struct spraid_dev *hdev = shost_priv(sdev->host);
> > +	struct spraid_sdev_hostdata *hostdata = sdev->hostdata;
> > +	u32 max_sec = sdev->host->max_sectors;
> > +
> > +	if (hostdata) {
> > +		idx = hostdata->hdid - 1;
> > +		if (sdev->channel == hdev->devices[idx].channel &&
> > +		    sdev->id ==
> > le16_to_cpu(hdev->devices[idx].target) &&
> > +		    sdev->lun < hdev->devices[idx].lun) {
> > +			if
> > (SPRAID_DEV_INFO_ATTR_VD(hdev->devices[idx].attr))
> > +				timeout = SCMD_TMOUT_VD * HZ;
> > +			else if
> > (SPRAID_DEV_INFO_ATTR_RAWDISK(hdev->devices[idx].attr))
> > +				timeout = SCMD_TMOUT_RAWDISK * HZ;
> > +			max_sec =
> > le16_to_cpu(hdev->devices[idx].max_io_kb) << 1;
> > +		} else {
> > +			dev_err(hdev->dev, "[%s] err,
> > sdev->channel:id:lun[%d:%d:%lld];"
> > +				"devices[%d],
> > channel:target:lun[%d:%d:%d]\n",
> > +				__func__, sdev->channel, sdev->id,
> > sdev->lun,
> > +				idx, hdev->devices[idx].channel,
> > +				hdev->devices[idx].target,
> > +				hdev->devices[idx].lun);
> > +		}
> > +	} else {
> > +		dev_err(hdev->dev, "[%s] err, sdev->hostdata is
> > null\n", __func__);  
> 
> can this really happen?
> 
The print will be deleted in the next version.

> > +	}
> > +
> > +	blk_queue_rq_timeout(sdev->request_queue, timeout);
> > +	sdev->eh_timeout = timeout;
> > +
> > +	if ((max_sec == 0) || (max_sec > sdev->host->max_sectors))
> > +		max_sec = sdev->host->max_sectors;
> > +	blk_queue_max_hw_sectors(sdev->request_queue, max_sec);
> > +
> > +	dev_info(hdev->dev, "[%s]
> > sdev->channel:id:lun[%d:%d:%lld], scmd_timeout[%d]s, maxsec[%d]\n",
> > +		 __func__, sdev->channel, sdev->id, sdev->lun,
> > timeout / HZ, max_sec); +
> > +	return 0;
> > +}
> > +
> > +static void spraid_shost_init(struct spraid_dev *hdev)
> > +{
> > +	struct pci_dev *pdev = hdev->pdev;
> > +	u8 domain, bus;
> > +	u32 dev_func;
> > +
> > +	domain = pci_domain_nr(pdev->bus);
> > +	bus = pdev->bus->number;
> > +	dev_func = pdev->devfn;
> > +
> > +	hdev->shost->nr_hw_queues = hdev->online_queues - 1;
> > +	hdev->shost->can_queue = (hdev->ioq_depth -
> > SPRAID_PTCMDS_PERQ); +
> > +	hdev->shost->sg_tablesize =
> > le16_to_cpu(hdev->ctrl_info->max_num_sge);
> > +	/* 512B per sector */
> > +	hdev->shost->max_sectors = (1U << ((hdev->ctrl_info->mdts)
> > * 1U) << 12) / 512;
> > +	hdev->shost->cmd_per_lun = MAX_CMD_PER_DEV;
> > +	hdev->shost->max_channel =
> > le16_to_cpu(hdev->ctrl_info->max_channel) - 1;
> > +	hdev->shost->max_id =
> > le32_to_cpu(hdev->ctrl_info->max_tgt_id);
> > +	hdev->shost->max_lun =
> > le16_to_cpu(hdev->ctrl_info->max_lun); +
> > +	hdev->shost->this_id = -1;
> > +	hdev->shost->unique_id = (domain << 16) | (bus << 8) |
> > dev_func;
> > +	hdev->shost->max_cmd_len = MAX_CDB_LEN;
> > +	hdev->shost->hostt->cmd_size = max(spraid_cmd_size(hdev,
> > false, true),
> > +					   spraid_cmd_size(hdev,
> > false, false)); +}
> > +
> > +static inline void spraid_host_deinit(struct spraid_dev *hdev)
> > +{
> > +	ida_free(&spraid_instance_ida, hdev->instance);
> > +}
> > +
> > +static int spraid_alloc_queue(struct spraid_dev *hdev, u16 qid,
> > u16 depth) +{
> > +	struct spraid_queue *spraidq = &hdev->queues[qid];
> > +	int ret = 0;
> > +
> > +	if (hdev->queue_count > qid) {
> > +		dev_info(hdev->dev, "[%s] warn: queue[%d] is
> > exist\n", __func__, qid);
> > +		return 0;
> > +	}
> > +
> > +	spraidq->cqes = dma_alloc_coherent(hdev->dev,
> > CQ_SIZE(depth),
> > +					   &spraidq->cq_dma_addr,
> > GFP_KERNEL | __GFP_ZERO);
> > +	if (!spraidq->cqes)
> > +		return -ENOMEM;
> > +
> > +	spraidq->sq_cmds = dma_alloc_coherent(hdev->dev,
> > SQ_SIZE(qid, depth),
> > +
> > &spraidq->sq_dma_addr, GFP_KERNEL);
> > +	if (!spraidq->sq_cmds) {
> > +		ret = -ENOMEM;
> > +		goto  free_cqes;
> > +	}
> > +
> > +	spin_lock_init(&spraidq->sq_lock);
> > +	spin_lock_init(&spraidq->cq_lock);
> > +	spraidq->hdev = hdev;
> > +	spraidq->q_depth = depth;
> > +	spraidq->qid = qid;
> > +	spraidq->cq_vector = -1;
> > +	hdev->queue_count++;
> > +
> > +	/* alloc sense buffer */
> > +	spraidq->sense = dma_alloc_coherent(hdev->dev,
> > SENSE_SIZE(depth),
> > +
> > &spraidq->sense_dma_addr, GFP_KERNEL | __GFP_ZERO);
> > +	if (!spraidq->sense) {
> > +		ret = -ENOMEM;
> > +		goto free_sq_cmds;
> > +	}
> > +
> > +	return 0;
> > +
> > +free_sq_cmds:
> > +	dma_free_coherent(hdev->dev, SQ_SIZE(qid, depth), (void
> > *)spraidq->sq_cmds,
> > +			  spraidq->sq_dma_addr);
> > +free_cqes:
> > +	dma_free_coherent(hdev->dev, CQ_SIZE(depth), (void
> > *)spraidq->cqes,
> > +			  spraidq->cq_dma_addr);
> > +	return ret;
> > +}
> > +
> > +static int spraid_wait_ready(struct spraid_dev *hdev, u64 cap,
> > bool enabled) +{
> > +	unsigned long timeout =
> > +	((SPRAID_CAP_TIMEOUT(cap) + 1) *
> > SPRAID_CAP_TIMEOUT_UNIT_MS) + jiffies;
> > +	u32 bit = enabled ? SPRAID_CSTS_RDY : 0;
> > +
> > +	while ((readl(hdev->bar + SPRAID_REG_CSTS) &
> > SPRAID_CSTS_RDY) != bit) {
> > +		usleep_range(1000, 2000);
> > +		if (fatal_signal_pending(current))
> > +			return -EINTR;
> > +
> > +		if (time_after(jiffies, timeout)) {
> > +			dev_err(hdev->dev, "Device not ready;
> > aborting %s\n",
> > +				enabled ? "initialisation" :
> > "reset");
> > +			return -ENODEV;
> > +		}
> > +	}
> > +	return 0;
> > +}
> > +
> > +static int spraid_shutdown_ctrl(struct spraid_dev *hdev)
> > +{
> > +	unsigned long timeout = hdev->ctrl_info->rtd3e + jiffies;
> > +
> > +	hdev->ctrl_config &= ~SPRAID_CC_SHN_MASK;
> > +	hdev->ctrl_config |= SPRAID_CC_SHN_NORMAL;
> > +	writel(hdev->ctrl_config, hdev->bar + SPRAID_REG_CC);
> > +
> > +	while ((readl(hdev->bar + SPRAID_REG_CSTS) &
> > SPRAID_CSTS_SHST_MASK) !=
> > +		SPRAID_CSTS_SHST_CMPLT) {
> > +		msleep(100);
> > +		if (fatal_signal_pending(current))
> > +			return -EINTR;
> > +		if (time_after(jiffies, timeout)) {
> > +			dev_err(hdev->dev, "Device shutdown
> > incomplete; abort shutdown\n");
> > +			return -ENODEV;
> > +		}
> > +	}
> > +	return 0;
> > +}
> > +
> > +static int spraid_disable_ctrl(struct spraid_dev *hdev)
> > +{
> > +	hdev->ctrl_config &= ~SPRAID_CC_SHN_MASK;
> > +	hdev->ctrl_config &= ~SPRAID_CC_ENABLE;
> > +	writel(hdev->ctrl_config, hdev->bar + SPRAID_REG_CC);
> > +
> > +	return spraid_wait_ready(hdev, hdev->cap, false);
> > +}
> > +
> > +static int spraid_enable_ctrl(struct spraid_dev *hdev)
> > +{
> > +	u64 cap = hdev->cap;
> > +	u32 dev_page_min = SPRAID_CAP_MPSMIN(cap) + 12;
> > +	u32 page_shift = PAGE_SHIFT;
> > +
> > +	if (page_shift < dev_page_min) {
> > +		dev_err(hdev->dev, "Minimum device page size[%u],
> > too large for host[%u]\n",
> > +			1U << dev_page_min, 1U << page_shift);
> > +		return -ENODEV;
> > +	}
> > +
> > +	page_shift = min_t(unsigned int, SPRAID_CAP_MPSMAX(cap) +
> > 12, PAGE_SHIFT);
> > +	hdev->page_size = 1U << page_shift;
> > +
> > +	hdev->ctrl_config = SPRAID_CC_CSS_NVM;
> > +	hdev->ctrl_config |= (page_shift - 12) <<
> > SPRAID_CC_MPS_SHIFT;
> > +	hdev->ctrl_config |= SPRAID_CC_AMS_RR | SPRAID_CC_SHN_NONE;
> > +	hdev->ctrl_config |= SPRAID_CC_IOSQES | SPRAID_CC_IOCQES;
> > +	hdev->ctrl_config |= SPRAID_CC_ENABLE;
> > +	writel(hdev->ctrl_config, hdev->bar + SPRAID_REG_CC);
> > +
> > +	return spraid_wait_ready(hdev, cap, true);
> > +}
> > +
> > +static void spraid_init_queue(struct spraid_queue *spraidq, u16
> > qid) +{
> > +	struct spraid_dev *hdev = spraidq->hdev;
> > +
> > +	memset((void *)spraidq->cqes, 0,
> > CQ_SIZE(spraidq->q_depth)); +
> > +	spraidq->sq_tail = 0;
> > +	spraidq->cq_head = 0;
> > +	spraidq->cq_phase = 1;
> > +	spraidq->q_db = &hdev->dbs[qid * 2 * hdev->db_stride];
> > +	spraidq->prp_small_pool = hdev->prp_small_pool[qid %
> > small_pool_num];
> > +	hdev->online_queues++;
> > +}
> > +
> > +static inline bool spraid_cqe_pending(struct spraid_queue *spraidq)
> > +{
> > +	return
> > (le16_to_cpu(spraidq->cqes[spraidq->cq_head].status) & 1) ==
> > +		spraidq->cq_phase;
> > +}
> > +
> > +static void spraid_complete_ioq_cmnd(struct spraid_queue *ioq,
> > struct spraid_completion *cqe) +{
> > +	struct spraid_dev *hdev = ioq->hdev;
> > +	struct blk_mq_tags *tags;
> > +	struct scsi_cmnd *scmd;
> > +	struct spraid_iod *iod;
> > +	struct request *req;
> > +	unsigned long elapsed;
> > +
> > +	tags = hdev->shost->tag_set.tags[ioq->qid - 1];
> > +	req = blk_mq_tag_to_rq(tags, cqe->cmd_id);
> > +	if (unlikely(!req || !blk_mq_request_started(req))) {
> > +		dev_warn(hdev->dev, "Invalid id %d completed on
> > queue %d\n",
> > +			 cqe->cmd_id, ioq->qid);
> > +		return;
> > +	}
> > +
> > +	scmd = blk_mq_rq_to_pdu(req);
> > +	iod = scsi_cmd_priv(scmd);
> > +
> > +	elapsed = jiffies - scmd->jiffies_at_alloc;  
> 
> why print stuff like this? what use is it?
> 
> And it is just inefficient...as we will always store 
> scmd->jiffies_at_alloc even if the pr_debug statements are disabled
>
The print is to tune performance.
 
> > +	pr_debug("cid[%d], qid[%d] finish IO cost %3ld.%3ld
> > seconds\n",
> > +		    cqe->cmd_id, ioq->qid, elapsed / HZ, elapsed %
> > HZ); +
> > +	if (cmpxchg(&iod->state, SPRAID_CMD_IN_FLIGHT,
> > SPRAID_CMD_COMPLETE) !=
> > +		SPRAID_CMD_IN_FLIGHT) {
> > +		dev_warn(hdev->dev, "cid[%d], qid[%d] enters
> > abnormal handler, cost %3ld.%3ld seconds\n",
> > +			 cqe->cmd_id, ioq->qid, elapsed / HZ,
> > elapsed % HZ);
> > +		WRITE_ONCE(iod->state, SPRAID_CMD_TMO_COMPLETE);
> > +
> > +		if (iod->nsge) {  
> 
> if we possibly complete twice then is this also just racy?
> 
FW/chip will guarantee this case will not happen. 

> > +			iod->nsge = 0;
> > +			scsi_dma_unmap(scmd);
> > +		}
> > +		spraid_free_iod_res(hdev, iod);
> > +
> > +		return;
> > +	}
> > +
> > +	spraid_map_status(iod, scmd, cqe);
> > +	if (iod->nsge) {
> > +		iod->nsge = 0;
> > +		scsi_dma_unmap(scmd);
> > +	}
> > +	spraid_free_iod_res(hdev, iod);
> > +	scsi_done(scmd);
> > +}
> > +
> > +
> > +static void spraid_complete_adminq_cmnd(struct spraid_queue
> > *adminq, struct spraid_completion *cqe) +{
> > +	struct spraid_dev *hdev = adminq->hdev;
> > +	struct spraid_cmd *adm_cmd;
> > +
> > +	adm_cmd = hdev->adm_cmds + cqe->cmd_id;
> > +	if (unlikely(adm_cmd->state == SPRAID_CMD_IDLE)) {
> > +		dev_warn(adminq->hdev->dev, "Invalid id %d
> > completed on queue %d\n",
> > +			 cqe->cmd_id, le16_to_cpu(cqe->sq_id));
> > +		return;
> > +	}
> > +
> > +	adm_cmd->status = le16_to_cpu(cqe->status) >> 1;
> > +	adm_cmd->result0 = le32_to_cpu(cqe->result);
> > +	adm_cmd->result1 = le32_to_cpu(cqe->result1);
> > +
> > +	complete(&adm_cmd->cmd_done);
> > +}
> > +
> > +static void spraid_send_aen(struct spraid_dev *hdev, u16 cid);
> > +
> > +static void spraid_complete_aen(struct spraid_queue *spraidq,
> > struct spraid_completion *cqe) +{
> > +	struct spraid_dev *hdev = spraidq->hdev;
> > +	u32 result = le32_to_cpu(cqe->result);
> > +
> > +	dev_info(hdev->dev, "rcv aen, status[%x], result[%x]\n",
> > +		 le16_to_cpu(cqe->status) >> 1, result);
> > +
> > +	spraid_send_aen(hdev, cqe->cmd_id);
> > +
> > +	if ((le16_to_cpu(cqe->status) >> 1) != SPRAID_SC_SUCCESS)
> > +		return;
> > +	switch (result & 0x7) {
> > +	case SPRAID_AEN_NOTICE:
> > +		spraid_handle_aen_notice(hdev, result);
> > +		break;
> > +	case SPRAID_AEN_VS:
> > +		spraid_handle_aen_vs(hdev, result,
> > le32_to_cpu(cqe->result1));
> > +		break;
> > +	default:
> > +		dev_warn(hdev->dev, "Unsupported async event type:
> > %u\n",
> > +			 result & 0x7);
> > +		break;
> > +	}
> > +}
> > +
> > +static void spraid_complete_ioq_sync_cmnd(struct spraid_queue
> > *ioq, struct spraid_completion *cqe) +{
> > +	struct spraid_dev *hdev = ioq->hdev;
> > +	struct spraid_cmd *ptcmd;
> > +
> > +	ptcmd = hdev->ioq_ptcmds + (ioq->qid - 1) *
> > SPRAID_PTCMDS_PERQ +
> > +		cqe->cmd_id - SPRAID_IO_BLK_MQ_DEPTH;
> > +
> > +	ptcmd->status = le16_to_cpu(cqe->status) >> 1;
> > +	ptcmd->result0 = le32_to_cpu(cqe->result);
> > +	ptcmd->result1 = le32_to_cpu(cqe->result1);
> > +
> > +	complete(&ptcmd->cmd_done);
> > +}
> > +
> > +static inline void spraid_handle_cqe(struct spraid_queue *spraidq,
> > u16 idx) +{
> > +	struct spraid_completion *cqe = &spraidq->cqes[idx];
> > +	struct spraid_dev *hdev = spraidq->hdev;
> > +
> > +	if (unlikely(cqe->cmd_id >= spraidq->q_depth)) {
> > +		dev_err(hdev->dev, "Invalid command id[%d]
> > completed on queue %d\n",
> > +			cqe->cmd_id, cqe->sq_id);
> > +		return;
> > +	}
> > +
> > +	pr_debug("cid[%d], qid[%d], result[0x%x], sq_id[%d],
> > status[0x%x]\n",
> > +		    cqe->cmd_id, spraidq->qid,
> > le32_to_cpu(cqe->result),
> > +		    le16_to_cpu(cqe->sq_id),
> > le16_to_cpu(cqe->status)); +
> > +	if (unlikely(spraidq->qid == 0 && cqe->cmd_id >=
> > SPRAID_AQ_BLK_MQ_DEPTH)) {
> > +		spraid_complete_aen(spraidq, cqe);
> > +		return;
> > +	}
> > +
> > +	if (unlikely(spraidq->qid && cqe->cmd_id >=
> > SPRAID_IO_BLK_MQ_DEPTH)) {
> > +		spraid_complete_ioq_sync_cmnd(spraidq, cqe);
> > +		return;
> > +	}
> > +
> > +	if (spraidq->qid)
> > +		spraid_complete_ioq_cmnd(spraidq, cqe);
> > +	else
> > +		spraid_complete_adminq_cmnd(spraidq, cqe);
> > +}
> > +
> > +static void spraid_complete_cqes(struct spraid_queue *spraidq, u16
> > start, u16 end) +{
> > +	while (start != end) {
> > +		spraid_handle_cqe(spraidq, start);
> > +		if (++start == spraidq->q_depth)
> > +			start = 0;
> > +	}
> > +}
> > +
> > +static inline void spraid_update_cq_head(struct spraid_queue
> > *spraidq) +{
> > +	if (++spraidq->cq_head == spraidq->q_depth) {
> > +		spraidq->cq_head = 0;
> > +		spraidq->cq_phase = !spraidq->cq_phase;
> > +	}
> > +}
> > +
> > +static inline bool spraid_process_cq(struct spraid_queue *spraidq,
> > u16 *start, u16 *end, int tag) +{
> > +	bool found = false;
> > +
> > +	*start = spraidq->cq_head;
> > +	while (!found && spraid_cqe_pending(spraidq)) {
> > +		if (spraidq->cqes[spraidq->cq_head].cmd_id == tag)
> > +			found = true;
> > +		spraid_update_cq_head(spraidq);
> > +	}
> > +	*end = spraidq->cq_head;
> > +
> > +	if (*start != *end)
> > +		writel(spraidq->cq_head, spraidq->q_db +
> > spraidq->hdev->db_stride); +
> > +	return found;
> > +}
> > +
> > +static bool spraid_poll_cq(struct spraid_queue *spraidq, int cid)
> > +{
> > +	u16 start, end;
> > +	bool found;
> > +
> > +	if (!spraid_cqe_pending(spraidq))
> > +		return 0;
> > +
> > +	spin_lock_irq(&spraidq->cq_lock);
> > +	found = spraid_process_cq(spraidq, &start, &end, cid);
> > +	spin_unlock_irq(&spraidq->cq_lock);
> > +
> > +	spraid_complete_cqes(spraidq, start, end);
> > +	return found;
> > +}
> > +
> > +static irqreturn_t spraid_irq(int irq, void *data)
> > +{
> > +	struct spraid_queue *spraidq = data;
> > +	irqreturn_t ret = IRQ_NONE;  
> 
> note: IRQ_NONE means that there was no interrupt for us/not handled,
> and not specifically that we took no action for the interrupt
> 
IRQ_NONE here meets our expectation: When IRQ_NONE is returned, no
action is needed.

> > +	u16 start, end;
> > +
> > +	spin_lock(&spraidq->cq_lock);
> > +	if (spraidq->cq_head != spraidq->last_cq_head)
> > +		ret = IRQ_HANDLED;
> > +
> > +	spraid_process_cq(spraidq, &start, &end, -1);
> > +	spraidq->last_cq_head = spraidq->cq_head;
> > +	spin_unlock(&spraidq->cq_lock);
> > +
> > +	if (start != end) {
> > +		spraid_complete_cqes(spraidq, start, end);
> > +		ret = IRQ_HANDLED;
> > +	}
> > +	return ret;
> > +}
> > +
> > +static int spraid_setup_admin_queue(struct spraid_dev *hdev)
> > +{
> > +	struct spraid_queue *adminq = &hdev->queues[0];
> > +	u32 aqa;
> > +	int ret;
> > +
> > +	dev_info(hdev->dev, "[%s] start disable ctrl\n", __func__);
> > +
> > +	ret = spraid_disable_ctrl(hdev);
> > +	if (ret)
> > +		return ret;
> > +
> > +	ret = spraid_alloc_queue(hdev, 0, SPRAID_AQ_DEPTH);
> > +	if (ret)
> > +		return ret;
> > +
> > +	aqa = adminq->q_depth - 1;
> > +	aqa |= aqa << 16;
> > +	writel(aqa, hdev->bar + SPRAID_REG_AQA);
> > +	lo_hi_writeq(adminq->sq_dma_addr, hdev->bar +
> > SPRAID_REG_ASQ);
> > +	lo_hi_writeq(adminq->cq_dma_addr, hdev->bar +
> > SPRAID_REG_ACQ); +
> > +	dev_info(hdev->dev, "[%s] start enable ctrl\n", __func__);
> > +
> > +	ret = spraid_enable_ctrl(hdev);
> > +	if (ret) {
> > +		ret = -ENODEV;
> > +		goto free_queue;
> > +	}
> > +
> > +	adminq->cq_vector = 0;
> > +	spraid_init_queue(adminq, 0);
> > +	ret = pci_request_irq(hdev->pdev, adminq->cq_vector,
> > spraid_irq, NULL,
> > +			      adminq, "spraid%d_q%d",
> > hdev->instance, adminq->qid); +
> > +	if (ret) {
> > +		adminq->cq_vector = -1;
> > +		hdev->online_queues--;
> > +		goto free_queue;
> > +	}
> > +
> > +	dev_info(hdev->dev, "[%s] success, queuecount:[%d],
> > onlinequeue:[%d]\n",
> > +		 __func__, hdev->queue_count, hdev->online_queues);
> > +
> > +	return 0;
> > +
> > +free_queue:
> > +	spraid_free_queue(adminq);
> > +	return ret;
> > +}
> > +
> > +static u32 spraid_bar_size(struct spraid_dev *hdev, u32 nr_ioqs)
> > +{
> > +	return (SPRAID_REG_DBS + ((nr_ioqs + 1) * 8 *
> > hdev->db_stride)); +}
> > +
> > +static int spraid_alloc_admin_cmds(struct spraid_dev *hdev)
> > +{
> > +	int i;
> > +
> > +	INIT_LIST_HEAD(&hdev->adm_cmd_list);
> > +	spin_lock_init(&hdev->adm_cmd_lock);
> > +
> > +	hdev->adm_cmds = kcalloc_node(SPRAID_AQ_BLK_MQ_DEPTH,
> > sizeof(struct spraid_cmd),
> > +				      GFP_KERNEL, hdev->numa_node);
> > +
> > +	if (!hdev->adm_cmds) {
> > +		dev_err(hdev->dev, "Alloc admin cmds failed\n");
> > +		return -ENOMEM;
> > +	}
> > +
> > +	for (i = 0; i < SPRAID_AQ_BLK_MQ_DEPTH; i++) {
> > +		hdev->adm_cmds[i].qid = 0;
> > +		hdev->adm_cmds[i].cid = i;
> > +		list_add_tail(&(hdev->adm_cmds[i].list),
> > &hdev->adm_cmd_list);
> > +	}
> > +
> > +	dev_info(hdev->dev, "Alloc admin cmds success, num[%d]\n",
> > SPRAID_AQ_BLK_MQ_DEPTH); +
> > +	return 0;
> > +}
> > +
> > +static void spraid_free_admin_cmds(struct spraid_dev *hdev)
> > +{
> > +	kfree(hdev->adm_cmds);
> > +	INIT_LIST_HEAD(&hdev->adm_cmd_list);
> > +}
> > +
> > +static struct spraid_cmd *spraid_get_cmd(struct spraid_dev *hdev,
> > enum spraid_cmd_type type) +{
> > +	struct spraid_cmd *cmd = NULL;
> > +	unsigned long flags;
> > +	struct list_head *head = &hdev->adm_cmd_list;
> > +	spinlock_t *slock = &hdev->adm_cmd_lock;
> > +
> > +	if (type == SPRAID_CMD_IOPT) {
> > +		head = &hdev->ioq_pt_list;
> > +		slock = &hdev->ioq_pt_lock;
> > +	}
> > +
> > +	spin_lock_irqsave(slock, flags);
> > +	if (list_empty(head)) {
> > +		spin_unlock_irqrestore(slock, flags);
> > +		dev_err(hdev->dev, "err, cmd[%d] list empty\n",
> > type);
> > +		return NULL;
> > +	}
> > +	cmd = list_entry(head->next, struct spraid_cmd, list);
> > +	list_del_init(&cmd->list);
> > +	spin_unlock_irqrestore(slock, flags);
> > +
> > +	WRITE_ONCE(cmd->state, SPRAID_CMD_IN_FLIGHT);
> > +
> > +	return cmd;
> > +}
> > +
> > +static void spraid_put_cmd(struct spraid_dev *hdev, struct
> > spraid_cmd *cmd,
> > +			   enum spraid_cmd_type type)
> > +{
> > +	unsigned long flags;
> > +	struct list_head *head = &hdev->adm_cmd_list;
> > +	spinlock_t *slock = &hdev->adm_cmd_lock;
> > +
> > +	if (type == SPRAID_CMD_IOPT) {
> > +		head = &hdev->ioq_pt_list;
> > +		slock = &hdev->ioq_pt_lock;  
> 
> yuck - why change the lock pointer?
> 
There are different locks for SPRAID_CMD_ADM and SPRAID_CMD_IOPT.

> > +	}
> > +
> > +	spin_lock_irqsave(slock, flags);
> > +	WRITE_ONCE(cmd->state, SPRAID_CMD_IDLE);
> > +	list_add_tail(&cmd->list, head);
> > +	spin_unlock_irqrestore(slock, flags);
> > +}
> > +
> > +
> > +static int spraid_submit_admin_sync_cmd(struct spraid_dev *hdev,
> > struct spraid_admin_command *cmd,
> > +					u32 *result0, u32
> > *result1, u32 timeout) +{
> > +	struct spraid_cmd *adm_cmd = spraid_get_cmd(hdev,
> > SPRAID_CMD_ADM); +
> > +	if (!adm_cmd) {
> > +		dev_err(hdev->dev, "err, get admin cmd failed\n");
> >  
> 
> when spraid_get_cmd() fails then we get a print there already. Too
> chatty.
> 
The print will be deleted in the next version.

> > +		return -EFAULT;
> > +	}
> > +
> > +	timeout = timeout ? timeout : ADMIN_TIMEOUT;
> > +
> > +	init_completion(&adm_cmd->cmd_done);
> > +
> > +	cmd->common.command_id = adm_cmd->cid;
> > +	spraid_submit_cmd(&hdev->queues[0], cmd);
> > +
> > +	if (!wait_for_completion_timeout(&adm_cmd->cmd_done,
> > timeout)) {
> > +		dev_err(hdev->dev, "[%s] cid[%d] qid[%d] timeout,
> > opcode[0x%x] subopcode[0x%x]\n",
> > +			__func__, adm_cmd->cid, adm_cmd->qid,
> > cmd->usr_cmd.opcode,
> > +			cmd->usr_cmd.info_0.subopcode);
> > +		WRITE_ONCE(adm_cmd->state, SPRAID_CMD_TIMEOUT);
> > +		return -EINVAL;
> > +	}
> > +
> > +	if (result0)
> > +		*result0 = adm_cmd->result0;
> > +	if (result1)
> > +		*result1 = adm_cmd->result1;
> > +
> > +	spraid_put_cmd(hdev, adm_cmd, SPRAID_CMD_ADM);
> > +
> > +	return adm_cmd->status;
> > +}
> > +
> > +static int spraid_create_cq(struct spraid_dev *hdev, u16 qid,
> > +			    struct spraid_queue *spraidq, u16
> > cq_vector) +{
> > +	struct spraid_admin_command admin_cmd;
> > +	int flags = SPRAID_QUEUE_PHYS_CONTIG |
> > SPRAID_CQ_IRQ_ENABLED; +
> > +	memset(&admin_cmd, 0, sizeof(admin_cmd));
> > +	admin_cmd.create_cq.opcode = SPRAID_ADMIN_CREATE_CQ;
> > +	admin_cmd.create_cq.prp1 =
> > cpu_to_le64(spraidq->cq_dma_addr);
> > +	admin_cmd.create_cq.cqid = cpu_to_le16(qid);
> > +	admin_cmd.create_cq.qsize = cpu_to_le16(spraidq->q_depth -
> > 1);
> > +	admin_cmd.create_cq.cq_flags = cpu_to_le16(flags);
> > +	admin_cmd.create_cq.irq_vector = cpu_to_le16(cq_vector);
> > +
> > +	return spraid_submit_admin_sync_cmd(hdev, &admin_cmd,
> > NULL, NULL, 0); +}
> > +
> > +static int spraid_create_sq(struct spraid_dev *hdev, u16 qid,
> > +			    struct spraid_queue *spraidq)
> > +{
> > +	struct spraid_admin_command admin_cmd;
> > +	int flags = SPRAID_QUEUE_PHYS_CONTIG;
> > +
> > +	memset(&admin_cmd, 0, sizeof(admin_cmd));
> > +	admin_cmd.create_sq.opcode = SPRAID_ADMIN_CREATE_SQ;
> > +	admin_cmd.create_sq.prp1 =
> > cpu_to_le64(spraidq->sq_dma_addr);
> > +	admin_cmd.create_sq.sqid = cpu_to_le16(qid);
> > +	admin_cmd.create_sq.qsize = cpu_to_le16(spraidq->q_depth -
> > 1);
> > +	admin_cmd.create_sq.sq_flags = cpu_to_le16(flags);
> > +	admin_cmd.create_sq.cqid = cpu_to_le16(qid);
> > +
> > +	return spraid_submit_admin_sync_cmd(hdev, &admin_cmd,
> > NULL, NULL, 0); +}
> > +
> > +static void spraid_free_queue(struct spraid_queue *spraidq)
> > +{
> > +	struct spraid_dev *hdev = spraidq->hdev;
> > +
> > +	hdev->queue_count--;
> > +	dma_free_coherent(hdev->dev, CQ_SIZE(spraidq->q_depth),
> > +			  (void *)spraidq->cqes,
> > spraidq->cq_dma_addr);
> > +	dma_free_coherent(hdev->dev, SQ_SIZE(spraidq->qid,
> > spraidq->q_depth),
> > +			  spraidq->sq_cmds, spraidq->sq_dma_addr);
> > +	dma_free_coherent(hdev->dev, SENSE_SIZE(spraidq->q_depth),
> > +			  spraidq->sense, spraidq->sense_dma_addr);
> > +}
> > +
> > +static void spraid_free_admin_queue(struct spraid_dev *hdev)
> > +{
> > +	spraid_free_queue(&hdev->queues[0]);
> > +}
> > +
> > +static void spraid_free_io_queues(struct spraid_dev *hdev)
> > +{
> > +	int i;
> > +
> > +	for (i = hdev->queue_count - 1; i >= 1; i--)
> > +		spraid_free_queue(&hdev->queues[i]);
> > +}
> > +
> > +static int spraid_delete_queue(struct spraid_dev *hdev, u8 op, u16
> > id) +{
> > +	struct spraid_admin_command admin_cmd;
> > +	int ret;
> > +
> > +	memset(&admin_cmd, 0, sizeof(admin_cmd));
> > +	admin_cmd.delete_queue.opcode = op;
> > +	admin_cmd.delete_queue.qid = cpu_to_le16(id);
> > +
> > +	ret = spraid_submit_admin_sync_cmd(hdev, &admin_cmd, NULL,
> > NULL, 0); +
> > +	if (ret)
> > +		dev_err(hdev->dev, "Delete %s:[%d] failed\n",
> > +			(op == SPRAID_ADMIN_DELETE_CQ) ? "cq" :
> > "sq", id); +
> > +	return ret;
> > +}
> > +
> > +static int spraid_delete_cq(struct spraid_dev *hdev, u16 cqid)
> > +{
> > +	return spraid_delete_queue(hdev, SPRAID_ADMIN_DELETE_CQ,
> > cqid); +}
> > +
> > +static int spraid_delete_sq(struct spraid_dev *hdev, u16 sqid)
> > +{
> > +	return spraid_delete_queue(hdev, SPRAID_ADMIN_DELETE_SQ,
> > sqid); +}
> > +
> > +static int spraid_create_queue(struct spraid_queue *spraidq, u16
> > qid) +{
> > +	struct spraid_dev *hdev = spraidq->hdev;
> > +	u16 cq_vector;
> > +	int ret;
> > +
> > +	cq_vector = (hdev->num_vecs == 1) ? 0 : qid;
> > +	ret = spraid_create_cq(hdev, qid, spraidq, cq_vector);
> > +	if (ret)
> > +		return ret;
> > +
> > +	ret = spraid_create_sq(hdev, qid, spraidq);
> > +	if (ret)
> > +		goto delete_cq;
> > +
> > +	spraid_init_queue(spraidq, qid);
> > +	spraidq->cq_vector = cq_vector;
> > +
> > +	ret = pci_request_irq(hdev->pdev, cq_vector, spraid_irq,
> > NULL,
> > +			      spraidq, "spraid%d_q%d",
> > hdev->instance, qid); +
> > +	if (ret) {
> > +		dev_err(hdev->dev, "Request queue[%d] irq
> > failed\n", qid);
> > +		goto delete_sq;
> > +	}
> > +
> > +	return 0;
> > +
> > +delete_sq:
> > +	spraidq->cq_vector = -1;
> > +	hdev->online_queues--;
> > +	spraid_delete_sq(hdev, qid);
> > +delete_cq:
> > +	spraid_delete_cq(hdev, qid);
> > +
> > +	return ret;
> > +}
> > +
> > +static int spraid_create_io_queues(struct spraid_dev *hdev)
> > +{
> > +	u32 i, max;
> > +	int ret = 0;
> > +
> > +	max = min(hdev->max_qid, hdev->queue_count - 1);
> > +	for (i = hdev->online_queues; i <= max; i++) {
> > +		ret = spraid_create_queue(&hdev->queues[i], i);
> > +		if (ret) {
> > +			dev_err(hdev->dev, "Create queue[%d]
> > failed\n", i);
> > +			break;
> > +		}
> > +	}
> > +
> > +	dev_info(hdev->dev, "[%s] queue_count[%d],
> > online_queue[%d]",
> > +		 __func__, hdev->queue_count, hdev->online_queues);
> > +
> > +	return ret >= 0 ? 0 : ret;
> > +}
> > +
> > +static int spraid_set_features(struct spraid_dev *hdev, u32 fid,
> > u32 dword11, void *buffer,
> > +			       size_t buflen, u32 *result)
> > +{
> > +	struct spraid_admin_command admin_cmd;
> > +	int ret;
> > +	u8 *data_ptr = NULL;
> > +	dma_addr_t data_dma = 0;
> > +
> > +	if (buffer && buflen) {
> > +		data_ptr = dma_alloc_coherent(hdev->dev, buflen,
> > &data_dma, GFP_KERNEL);
> > +		if (!data_ptr)
> > +			return -ENOMEM;
> > +
> > +		memcpy(data_ptr, buffer, buflen);
> > +	}
> > +
> > +	memset(&admin_cmd, 0, sizeof(admin_cmd));
> > +	admin_cmd.features.opcode = SPRAID_ADMIN_SET_FEATURES;
> > +	admin_cmd.features.fid = cpu_to_le32(fid);
> > +	admin_cmd.features.dword11 = cpu_to_le32(dword11);
> > +	admin_cmd.common.dptr.prp1 = cpu_to_le64(data_dma);
> > +
> > +	ret = spraid_submit_admin_sync_cmd(hdev, &admin_cmd,
> > result, NULL, 0); +
> > +	if (data_ptr)
> > +		dma_free_coherent(hdev->dev, buflen, data_ptr,
> > data_dma); +
> > +	return ret;
> > +}
> > +
> > +static int spraid_configure_timestamp(struct spraid_dev *hdev)
> > +{
> > +	__le64 ts;
> > +	int ret;
> > +
> > +	ts = cpu_to_le64(ktime_to_ms(ktime_get_real()));
> > +	ret = spraid_set_features(hdev, SPRAID_FEAT_TIMESTAMP, 0,
> > &ts, sizeof(ts), NULL); +
> > +	if (ret)
> > +		dev_err(hdev->dev, "set timestamp failed: %d\n",
> > ret);
> > +	return ret;
> > +}
> > +
> > +static int spraid_set_queue_cnt(struct spraid_dev *hdev, u32 *cnt)
> > +{
> > +	u32 q_cnt = (*cnt - 1) | ((*cnt - 1) << 16);
> > +	u32 nr_ioqs, result;
> > +	int status;
> > +
> > +	status = spraid_set_features(hdev, SPRAID_FEAT_NUM_QUEUES,
> > q_cnt, NULL, 0, &result);
> > +	if (status) {
> > +		dev_err(hdev->dev, "Set queue count failed,
> > status: %d\n",
> > +			status);
> > +		return -EIO;
> > +	}
> > +
> > +	nr_ioqs = min(result & 0xffff, result >> 16) + 1;
> > +	*cnt = min(*cnt, nr_ioqs);
> > +	if (*cnt == 0) {
> > +		dev_err(hdev->dev, "Illegal queue count: zero\n");
> > +		return -EIO;
> > +	}
> > +	return 0;
> > +}
> > +
> > +static int spraid_setup_io_queues(struct spraid_dev *hdev)
> > +{
> > +	struct spraid_queue *adminq = &hdev->queues[0];
> > +	struct pci_dev *pdev = hdev->pdev;
> > +	u32 nr_ioqs = num_online_cpus();  
> 
> so does the HW always have any limit on the number of queues?
> 
Yes.

> > +	u32 i, size;
> > +	int ret;
> > +
> > +	struct irq_affinity affd = {
> > +		.pre_vectors = 1
> > +	};
> > +
> > +	ret = spraid_set_queue_cnt(hdev, &nr_ioqs);
> > +	if (ret < 0)
> > +		return ret;
> > +
> > +	size = spraid_bar_size(hdev, nr_ioqs);
> > +	ret = spraid_remap_bar(hdev, size);
> > +	if (ret)
> > +		return -ENOMEM;
> > +
> > +	adminq->q_db = hdev->dbs;
> > +
> > +	pci_free_irq(pdev, 0, adminq);
> > +	pci_free_irq_vectors(pdev);
> > +
> > +	ret = pci_alloc_irq_vectors_affinity(pdev, 1, (nr_ioqs +
> > 1),
> > +					     PCI_IRQ_ALL_TYPES |
> > PCI_IRQ_AFFINITY, &affd);
> > +	if (ret <= 0)
> > +		return -EIO;
> > +
> > +	hdev->num_vecs = ret;  
> 
> so we have hdev->num_vecs, ->online_queues, ->max_qid, and
> ->queue_count 
> - are they all needed? Any duplication?
> 
They are needed.
hdev->num_vecs: the number of vectores that OS allocates for the driver.
->online_queues: the number of real workable queue.
->queue_count: the queue count supported by the allocated memory.
->max_qid: the max queue number supported by vectors, saved for the
convenience of later use.


> > +
> > +	hdev->max_qid = max(ret - 1, 1);
> > +
> > +	ret = pci_request_irq(pdev, adminq->cq_vector, spraid_irq,
> > NULL,
> > +			      adminq, "spraid%d_q%d",
> > hdev->instance, adminq->qid);
> > +	if (ret) {
> > +		dev_err(hdev->dev, "Request admin irq failed\n");
> > +		adminq->cq_vector = -1;
> > +		return ret;
> > +	}
> > +
> > +	for (i = hdev->queue_count; i <= hdev->max_qid; i++) {
> > +		ret = spraid_alloc_queue(hdev, i, hdev->ioq_depth);
> > +		if (ret)
> > +			break;
> > +	}
> > +	dev_info(hdev->dev, "[%s] max_qid: %d, queue_count: %d,
> > online_queue: %d, ioq_depth: %d\n",
> > +		 __func__, hdev->max_qid, hdev->queue_count,
> > hdev->online_queues, hdev->ioq_depth); +
> > +	return spraid_create_io_queues(hdev);
> > +}
> > +
> > +static void spraid_delete_io_queues(struct spraid_dev *hdev)
> > +{
> > +	u16 queues = hdev->online_queues - 1;
> > +	u8 opcode = SPRAID_ADMIN_DELETE_SQ;
> > +	u16 i, pass;
> > +
> > +	if (!pci_device_is_present(hdev->pdev)) {
> > +		dev_err(hdev->dev, "pci_device is not present,
> > skip disable io queues\n");
> > +		return;
> > +	}
> > +
> > +	if (hdev->online_queues < 2) {
> > +		dev_err(hdev->dev, "[%s] err, io queue has been
> > delete\n", __func__);
> > +		return;
> > +	}
> > +
> > +	for (pass = 0; pass < 2; pass++) {
> > +		for (i = queues; i > 0; i--)
> > +			if (spraid_delete_queue(hdev, opcode, i))
> > +				break;
> > +
> > +		opcode = SPRAID_ADMIN_DELETE_CQ;
> > +	}
> > +}
> > +
> > +static void spraid_remove_io_queues(struct spraid_dev *hdev)
> > +{
> > +	spraid_delete_io_queues(hdev);
> > +	spraid_free_io_queues(hdev);
> > +}
> > +
> > +static void spraid_pci_disable(struct spraid_dev *hdev)
> > +{
> > +	struct pci_dev *pdev = hdev->pdev;
> > +	u32 i;
> > +
> > +	for (i = 0; i < hdev->online_queues; i++)
> > +		pci_free_irq(pdev, hdev->queues[i].cq_vector,
> > &hdev->queues[i]);
> > +	pci_free_irq_vectors(pdev);
> > +	if (pci_is_enabled(pdev)) {
> > +		pci_disable_pcie_error_reporting(pdev);
> > +		pci_disable_device(pdev);
> > +	}
> > +	hdev->online_queues = 0;
> > +}
> > +
> > +static void spraid_disable_admin_queue(struct spraid_dev *hdev,
> > bool shutdown) +{
> > +	struct spraid_queue *adminq = &hdev->queues[0];
> > +	u16 start, end;
> > +
> > +	if (pci_device_is_present(hdev->pdev)) {
> > +		if (shutdown)
> > +			spraid_shutdown_ctrl(hdev);
> > +		else
> > +			spraid_disable_ctrl(hdev);
> > +	}
> > +
> > +	if (hdev->queue_count == 0) {
> > +		dev_err(hdev->dev, "[%s] err, admin queue has been
> > delete\n", __func__);
> > +		return;
> > +	}
> > +
> > +	spin_lock_irq(&adminq->cq_lock);
> > +	spraid_process_cq(adminq, &start, &end, -1);
> > +	spin_unlock_irq(&adminq->cq_lock);
> > +
> > +	spraid_complete_cqes(adminq, start, end);
> > +	spraid_free_admin_queue(hdev);
> > +}
> > +
> > +static int spraid_create_dma_pools(struct spraid_dev *hdev)
> > +{
> > +	int i;
> > +	char poolname[20] = { 0 };
> > +
> > +	hdev->prp_page_pool = dma_pool_create("prp list page",
> > hdev->dev,
> > +					      PAGE_SIZE,
> > PAGE_SIZE, 0); +
> > +	if (!hdev->prp_page_pool) {
> > +		dev_err(hdev->dev, "create prp_page_pool
> > failed\n");
> > +		return -ENOMEM;
> > +	}
> > +
> > +	for (i = 0; i < small_pool_num; i++) {
> > +		sprintf(poolname, "prp_list_256_%d", i);
> > +		hdev->prp_small_pool[i] =
> > dma_pool_create(poolname, hdev->dev, SMALL_POOL_SIZE,
> > +
> > SMALL_POOL_SIZE, 0); +
> > +		if (!hdev->prp_small_pool[i]) {
> > +			dev_err(hdev->dev, "create prp_small_pool
> > %d failed\n", i);
> > +			goto destroy_prp_small_pool;
> > +		}
> > +	}
> > +
> > +	return 0;
> > +
> > +destroy_prp_small_pool:
> > +	while (i > 0)
> > +		dma_pool_destroy(hdev->prp_small_pool[--i]);
> > +	dma_pool_destroy(hdev->prp_page_pool);
> > +
> > +	return -ENOMEM;
> > +}
> > +
> > +static void spraid_destroy_dma_pools(struct spraid_dev *hdev)
> > +{
> > +	int i;
> > +
> > +	for (i = 0; i < small_pool_num; i++)
> > +		dma_pool_destroy(hdev->prp_small_pool[i]);
> > +	dma_pool_destroy(hdev->prp_page_pool);
> > +}
> > +
> > +static int spraid_get_dev_list(struct spraid_dev *hdev, struct
> > spraid_dev_info *devices) +{
> > +	u32 nd = le32_to_cpu(hdev->ctrl_info->nd);
> > +	struct spraid_admin_command admin_cmd;
> > +	struct spraid_dev_list *list_buf;
> > +	dma_addr_t data_dma = 0;
> > +	u32 i, idx, hdid, ndev;
> > +	int ret = 0;
> > +
> > +	list_buf = dma_alloc_coherent(hdev->dev, PAGE_SIZE,
> > &data_dma, GFP_KERNEL);
> > +	if (!list_buf)
> > +		return -ENOMEM;
> > +
> > +	for (idx = 0; idx < nd;) {
> > +		memset(&admin_cmd, 0, sizeof(admin_cmd));
> > +		admin_cmd.get_info.opcode = SPRAID_ADMIN_GET_INFO;
> > +		admin_cmd.get_info.type = SPRAID_GET_INFO_DEV_LIST;
> > +		admin_cmd.get_info.cdw11 = cpu_to_le32(idx);
> > +		admin_cmd.common.dptr.prp1 = cpu_to_le64(data_dma);
> > +
> > +		ret = spraid_submit_admin_sync_cmd(hdev,
> > &admin_cmd, NULL, NULL, 0); +
> > +		if (ret) {
> > +			dev_err(hdev->dev, "Get device list
> > failed, nd: %u, idx: %u, ret: %d\n",
> > +				nd, idx, ret);
> > +			goto out;
> > +		}
> > +		ndev = le32_to_cpu(list_buf->dev_num);
> > +
> > +		dev_info(hdev->dev, "ndev numbers: %u\n", ndev);
> > +
> > +		for (i = 0; i < ndev; i++) {
> > +			hdid =
> > le32_to_cpu(list_buf->devices[i].hdid);
> > +			dev_info(hdev->dev,
> > "list_buf->devices[%d], hdid: %u target: %d, channel: %d, lun: %d,
> > attr[0x%x]\n",
> > +				 i, hdid,
> > le16_to_cpu(list_buf->devices[i].target),
> > +				 list_buf->devices[i].channel,
> > +				 list_buf->devices[i].lun,
> > +				 list_buf->devices[i].attr);
> > +			if (hdid > nd || hdid == 0) {
> > +				dev_err(hdev->dev, "err, hdid[%d]
> > invalid\n", hdid);
> > +				continue;
> > +			}
> > +			memcpy(&devices[hdid - 1],
> > &list_buf->devices[i],
> > +			       sizeof(struct spraid_dev_info));
> > +		}
> > +		idx += ndev;
> > +
> > +		if (idx < MAX_DEV_ENTRY_PER_PAGE_4K)
> > +			break;
> > +	}
> > +
> > +out:
> > +	dma_free_coherent(hdev->dev, PAGE_SIZE, list_buf,
> > data_dma);
> > +	return ret;
> > +}
> > +
> > +static void spraid_send_aen(struct spraid_dev *hdev, u16 cid)
> > +{
> > +	struct spraid_queue *adminq = &hdev->queues[0];
> > +	struct spraid_admin_command admin_cmd;
> > +
> > +	memset(&admin_cmd, 0, sizeof(admin_cmd));
> > +	admin_cmd.common.opcode = SPRAID_ADMIN_ASYNC_EVENT;
> > +	admin_cmd.common.command_id = cid;
> > +
> > +	spraid_submit_cmd(adminq, &admin_cmd);
> > +	dev_info(hdev->dev, "send aen, cid[%d]\n", cid);
> > +}
> > +static inline void spraid_send_all_aen(struct spraid_dev *hdev)
> > +{
> > +	u16 i;
> > +
> > +	for (i = 0; i < hdev->ctrl_info->aerl; i++)
> > +		spraid_send_aen(hdev, i + SPRAID_AQ_BLK_MQ_DEPTH);
> > +}
> > +
> > +static int spraid_add_device(struct spraid_dev *hdev, struct
> > spraid_dev_info *device) +{
> > +	struct Scsi_Host *shost = hdev->shost;
> > +	struct scsi_device *sdev;
> > +
> > +	sdev = scsi_device_lookup(shost, device->channel,
> > le16_to_cpu(device->target), 0);
> > +	if (sdev) {
> > +		dev_warn(hdev->dev, "Device is already exist,
> > channel: %d, target_id: %d, lun: %d\n",
> > +			 device->channel,
> > le16_to_cpu(device->target), 0);
> > +		scsi_device_put(sdev);
> > +		return -EEXIST;
> > +	}
> > +	scsi_add_device(shost, device->channel,
> > le16_to_cpu(device->target), 0);
> > +	return 0;
> > +}
> > +
> > +static int spraid_rescan_device(struct spraid_dev *hdev, struct
> > spraid_dev_info *device) +{
> > +	struct Scsi_Host *shost = hdev->shost;
> > +	struct scsi_device *sdev;
> > +
> > +	sdev = scsi_device_lookup(shost, device->channel,
> > le16_to_cpu(device->target), 0);
> > +	if (!sdev) {
> > +		dev_warn(hdev->dev, "Device is not exit, channel:
> > %d, target_id: %d, lun: %d\n",
> > +			 device->channel,
> > le16_to_cpu(device->target), 0);
> > +		return -ENODEV;
> > +	}
> > +
> > +	scsi_rescan_device(&sdev->sdev_gendev);
> > +	scsi_device_put(sdev);
> > +	return 0;
> > +}
> > +
> > +static int spraid_remove_device(struct spraid_dev *hdev, struct
> > spraid_dev_info *org_device) +{
> > +	struct Scsi_Host *shost = hdev->shost;
> > +	struct scsi_device *sdev;
> > +
> > +	sdev = scsi_device_lookup(shost, org_device->channel,
> > le16_to_cpu(org_device->target), 0);
> > +	if (!sdev) {
> > +		dev_warn(hdev->dev, "Device is not exit, channel:
> > %d, target_id: %d, lun: %d\n",
> > +			 org_device->channel,
> > le16_to_cpu(org_device->target), 0);
> > +		return -ENODEV;
> > +	}
> > +
> > +	scsi_remove_device(sdev);
> > +	scsi_device_put(sdev);
> > +	return 0;
> > +}
> > +
> > +static int spraid_dev_list_init(struct spraid_dev *hdev)
> > +{
> > +	u32 nd = le32_to_cpu(hdev->ctrl_info->nd);
> > +	int i, ret;
> > +
> > +	hdev->devices = kzalloc_node(nd * sizeof(struct
> > spraid_dev_info),
> > +				     GFP_KERNEL, hdev->numa_node);
> > +	if (!hdev->devices)
> > +		return -ENOMEM;
> > +
> > +	ret = spraid_get_dev_list(hdev, hdev->devices);
> > +	if (ret) {
> > +		dev_err(hdev->dev, "Ignore failure of getting
> > device list within initialization\n");
> > +		return 0;
> > +	}
> > +
> > +	for (i = 0; i < nd; i++) {
> > +		if
> > (SPRAID_DEV_INFO_FLAG_VALID(hdev->devices[i].flag) &&
> > +
> > SPRAID_DEV_INFO_ATTR_BOOT(hdev->devices[i].attr)) {
> > +			spraid_add_device(hdev, &hdev->devices[i]);
> > +			break;
> > +		}
> > +	}
> > +	return 0;
> > +}
> > +
> > +static void spraid_scan_work(struct work_struct *work)
> > +{
> > +	struct spraid_dev *hdev =
> > +		container_of(work, struct spraid_dev, scan_work);
> > +	struct spraid_dev_info *devices, *org_devices;
> > +	u32 nd = le32_to_cpu(hdev->ctrl_info->nd);
> > +	u8 flag, org_flag;
> > +	int i, ret;
> > +
> > +	devices = kcalloc(nd, sizeof(struct spraid_dev_info),
> > GFP_KERNEL);
> > +	if (!devices)
> > +		return;
> > +	ret = spraid_get_dev_list(hdev, devices);
> > +	if (ret)
> > +		goto free_list;
> > +	org_devices = hdev->devices;
> > +	for (i = 0; i < nd; i++) {
> > +		org_flag = org_devices[i].flag;
> > +		flag = devices[i].flag;
> > +
> > +		pr_debug("i: %d, org_flag: 0x%x, flag: 0x%x\n",
> > +			    i, org_flag, flag);
> > +
> > +		if (SPRAID_DEV_INFO_FLAG_VALID(flag)) {
> > +			if (!SPRAID_DEV_INFO_FLAG_VALID(org_flag))
> > {
> > +				down_write(&hdev->devices_rwsem);
> > +				memcpy(&org_devices[i],
> > &devices[i],
> > +				       sizeof(struct
> > spraid_dev_info));
> > +				up_write(&hdev->devices_rwsem);
> > +				spraid_add_device(hdev,
> > &devices[i]);
> > +			} else if
> > (SPRAID_DEV_INFO_FLAG_CHANGE(flag)) {
> > +				spraid_rescan_device(hdev,
> > &devices[i]);
> > +			}
> > +		} else {
> > +			if (SPRAID_DEV_INFO_FLAG_VALID(org_flag)) {
> > +				down_write(&hdev->devices_rwsem);
> > +				org_devices[i].flag &= 0xfe;
> > +				up_write(&hdev->devices_rwsem);
> > +				spraid_remove_device(hdev,
> > &org_devices[i]);
> > +			}
> > +		}
> > +	}
> > +free_list:
> > +	kfree(devices);
> > +}
> > +
> > +static void spraid_timesyn_work(struct work_struct *work)
> > +{
> > +	struct spraid_dev *hdev =
> > +		container_of(work, struct spraid_dev,
> > timesyn_work); +
> > +	spraid_configure_timestamp(hdev);
> > +}
> > +
> > +static void spraid_queue_scan(struct spraid_dev *hdev)
> > +{
> > +	queue_work(spraid_wq, &hdev->scan_work);
> > +}
> > +
> > +static void spraid_handle_aen_notice(struct spraid_dev *hdev, u32
> > result) +{
> > +	switch ((result & 0xff00) >> 8) {
> > +	case SPRAID_AEN_DEV_CHANGED:
> > +		spraid_queue_scan(hdev);
> > +		break;
> > +	case SPRAID_AEN_HOST_PROBING:
> > +		break;
> > +	default:
> > +		dev_warn(hdev->dev, "async event result %08x\n",
> > result);
> > +	}
> > +}
> > +
> > +static void spraid_handle_aen_vs(struct spraid_dev *hdev, u32
> > result, u32 result1) +{
> > +	switch ((result & 0xff00) >> 8) {
> > +	case SPRAID_AEN_TIMESYN:
> > +		queue_work(spraid_wq, &hdev->timesyn_work);
> > +		break;
> > +	default:
> > +		dev_warn(hdev->dev, "async event result: 0x%x\n",
> > result);
> > +	}
> > +}
> > +
> > +static int spraid_alloc_resources(struct spraid_dev *hdev)
> > +{
> > +	int ret, nqueue;
> > +
> > +	ret = ida_alloc(&spraid_instance_ida, GFP_KERNEL);
> > +	if (ret < 0) {
> > +		dev_err(hdev->dev, "Get instance id failed\n");
> > +		return ret;
> > +	}
> > +	hdev->instance = ret;  
> 
> what is hdev->instance actually used for? I see it passed to irq
> handler but I can't figure out how it is used for something
> 
Multiple SPRADI cards can be plugged into one system, hdev->instance is
used to tell different cards.

> > +
> > +	hdev->ctrl_info = kzalloc_node(sizeof(*hdev->ctrl_info),
> > +				       GFP_KERNEL,
> > hdev->numa_node);
> > +	if (!hdev->ctrl_info) {
> > +		ret = -ENOMEM;
> > +		goto release_instance;
> > +	}
> > +
> > +	ret = spraid_create_dma_pools(hdev);
> > +	if (ret)
> > +		goto free_ctrl_info;
> > +	nqueue = num_possible_cpus() + 1;
> > +	hdev->queues = kcalloc_node(nqueue, sizeof(struct
> > spraid_queue),
> > +				    GFP_KERNEL, hdev->numa_node);  
> 
> if some CPUs come online later then will the extra memory allocated
> here be useful? I see checks for num online cpus elsewhere
> 
Yes, Extra memory may be allocated in this case. But it is worth it. Two
reasons: 
1. The possibility of CPU coming online later is low and
extra memory is small. 
2. If using num_online_cpus() here in this case, the value got here by
num_online_cpus() is smaller than the later value, the performance
will be limited by the min value of the two values.

> > +	if (!hdev->queues) {
> > +		ret = -ENOMEM;
> > +		goto destroy_dma_pools;
> > +	}
> > +
> > +	ret = spraid_alloc_admin_cmds(hdev);
> > +	if (ret)
> > +		goto free_queues;
> > +
> > +	dev_info(hdev->dev, "[%s] queues num: %d\n", __func__,
> > nqueue); +
> > +	return 0;
> > +
> > +free_queues:
> > +	kfree(hdev->queues);
> > +destroy_dma_pools:
> > +	spraid_destroy_dma_pools(hdev);
> > +free_ctrl_info:
> > +	kfree(hdev->ctrl_info);
> > +release_instance:
> > +	ida_free(&spraid_instance_ida, hdev->instance);
> > +	return ret;
> > +}
> > +
> > +static void spraid_free_resources(struct spraid_dev *hdev)
> > +{
> > +	spraid_free_admin_cmds(hdev);
> > +	kfree(hdev->queues);
> > +	spraid_destroy_dma_pools(hdev);
> > +	kfree(hdev->ctrl_info);
> > +	ida_free(&spraid_instance_ida, hdev->instance);
> > +}
> > +
> > +
> > +
> > +static void spraid_bsg_unmap_data(struct spraid_dev *hdev, struct
> > bsg_job *job) +{
> > +	struct request *rq = blk_mq_rq_from_pdu(job);
> > +	struct spraid_iod *iod = job->dd_data;
> > +	enum dma_data_direction dma_dir = rq_data_dir(rq) ?
> > DMA_TO_DEVICE : DMA_FROM_DEVICE; +
> > +	if (iod->nsge)
> > +		dma_unmap_sg(hdev->dev, iod->sg, iod->nsge,
> > dma_dir); +
> > +	spraid_free_iod_res(hdev, iod);
> > +}
> > +
> > +static int spraid_bsg_map_data(struct spraid_dev *hdev, struct
> > bsg_job *job,
> > +			       struct spraid_admin_command *cmd)
> > +{
> > +	struct request *rq = blk_mq_rq_from_pdu(job);
> > +	struct spraid_iod *iod = job->dd_data;
> > +	enum dma_data_direction dma_dir = rq_data_dir(rq) ?
> > DMA_TO_DEVICE : DMA_FROM_DEVICE;
> > +	int ret = 0;
> > +
> > +	iod->sg = job->request_payload.sg_list;
> > +	iod->nsge = job->request_payload.sg_cnt;
> > +	iod->length = job->request_payload.payload_len;
> > +	iod->use_sgl = false;
> > +	iod->npages = -1;
> > +	iod->sg_drv_mgmt = false;
> > +
> > +	if (!iod->nsge)
> > +		goto out;
> > +
> > +	ret = dma_map_sg_attrs(hdev->dev, iod->sg, iod->nsge,
> > dma_dir, DMA_ATTR_NO_WARN);
> > +	if (!ret)
> > +		goto out;
> > +
> > +	ret = spraid_setup_prps(hdev, iod);
> > +	if (ret)
> > +		goto unmap;
> > +
> > +	cmd->common.dptr.prp1 =
> > cpu_to_le64(sg_dma_address(iod->sg));
> > +	cmd->common.dptr.prp2 = cpu_to_le64(iod->first_dma);
> > +
> > +	return 0;
> > +
> > +unmap:
> > +	dma_unmap_sg(hdev->dev, iod->sg, iod->nsge, dma_dir);
> > +out:
> > +	return ret;
> > +}
> > +
> > +static int spraid_get_ctrl_info(struct spraid_dev *hdev, struct
> > spraid_ctrl_info *ctrl_info) +{
> > +	struct spraid_admin_command admin_cmd;
> > +	u8 *data_ptr = NULL;  
> 
> no need to init
> 
Ok, Will remove init.

> > +	dma_addr_t data_dma = 0;  
> 
> or here
>
Ok, Will remove init.
 
> > +	int ret;
> > +
> > +	data_ptr = dma_alloc_coherent(hdev->dev, PAGE_SIZE,
> > &data_dma, GFP_KERNEL);
> > +	if (!data_ptr)
> > +		return -ENOMEM;
> > +
> > +	memset(&admin_cmd, 0, sizeof(admin_cmd));
> > +	admin_cmd.get_info.opcode = SPRAID_ADMIN_GET_INFO;
> > +	admin_cmd.get_info.type = SPRAID_GET_INFO_CTRL;
> > +	admin_cmd.common.dptr.prp1 = cpu_to_le64(data_dma);
> > +
> > +	ret = spraid_submit_admin_sync_cmd(hdev, &admin_cmd, NULL,
> > NULL, 0);
> > +	if (!ret)
> > +		memcpy(ctrl_info, data_ptr, sizeof(struct
> > spraid_ctrl_info)); +
> > +	dma_free_coherent(hdev->dev, PAGE_SIZE, data_ptr,
> > data_dma); +
> > +	return ret;
> > +}
> > +
> > +static int spraid_init_ctrl_info(struct spraid_dev *hdev)
> > +{
> > +	int ret;
> > +
> > +	hdev->ctrl_info->nd = cpu_to_le32(240);
> > +	hdev->ctrl_info->mdts = 8;
> > +	hdev->ctrl_info->max_cmds = cpu_to_le16(4096);  
> 
> why are these even cpu_to_le16()? they do not seem to be used in any
> HW interaction or similar
> 
> and max_cmds only seems to be read in the print below, making it next
> to useless
> 
It is possible that spraid_get_ctrl_info() fails, in which case, the
default value will be used.

> > +	hdev->ctrl_info->max_num_sge = cpu_to_le16(128);
> > +	hdev->ctrl_info->max_channel = cpu_to_le16(4);
> > +	hdev->ctrl_info->max_tgt_id = cpu_to_le32(3239);
> > +	hdev->ctrl_info->max_lun = cpu_to_le16(2);
> > +
> > +	ret = spraid_get_ctrl_info(hdev, hdev->ctrl_info);
> > +	if (ret)
> > +		dev_err(hdev->dev, "get controller info failed:
> > %d\n", ret); +
> > +	dev_info(hdev->dev, "[%s]nd = %d\n", __func__,
> > hdev->ctrl_info->nd);
> > +	dev_info(hdev->dev, "[%s]max_cmd = %d\n", __func__,
> > hdev->ctrl_info->max_cmds);
> > +	dev_info(hdev->dev, "[%s]max_channel = %d\n", __func__,
> > hdev->ctrl_info->max_channel);
> > +	dev_info(hdev->dev, "[%s]max_tgt_id = %d\n", __func__,
> > hdev->ctrl_info->max_tgt_id);
> > +	dev_info(hdev->dev, "[%s]max_lun = %d\n", __func__,
> > hdev->ctrl_info->max_lun);
> > +	dev_info(hdev->dev, "[%s]max_num_sge = %d\n", __func__,
> > hdev->ctrl_info->max_num_sge);
> > +	dev_info(hdev->dev, "[%s]lun_num_boot = %d\n", __func__,
> > hdev->ctrl_info->lun_num_in_boot);
> > +	dev_info(hdev->dev, "[%s]mdts = %d\n", __func__,
> > hdev->ctrl_info->mdts);
> > +	dev_info(hdev->dev, "[%s]acl = %d\n", __func__,
> > hdev->ctrl_info->acl);
> > +	dev_info(hdev->dev, "[%s]aer1 = %d\n", __func__,
> > hdev->ctrl_info->aerl);
> > +	dev_info(hdev->dev, "[%s]card_type = %d\n", __func__,
> > hdev->ctrl_info->card_type);
> > +	dev_info(hdev->dev, "[%s]rtd3e = %d\n", __func__,
> > hdev->ctrl_info->rtd3e);
> > +	dev_info(hdev->dev, "[%s]sn = %s\n", __func__,
> > hdev->ctrl_info->sn);
> > +	dev_info(hdev->dev, "[%s]fr = %s\n", __func__,
> > hdev->ctrl_info->fr);  
> 
> why is all this required on the console? looks way too chatty again
> 
The information is from FW. We¡¯d like to know the capability of FW.

> > +
> > +	return 0;
> > +}
> > +
> > +#define SPRAID_MAX_ADMIN_PAYLOAD_SIZE	BIT(16)
> > +static int spraid_alloc_iod_ext_mem_pool(struct spraid_dev *hdev)
> > +{
> > +	u16 max_sge = le16_to_cpu(hdev->ctrl_info->max_num_sge);
> > +	size_t alloc_size;
> > +
> > +	alloc_size = spraid_iod_ext_size(hdev,
> > SPRAID_MAX_ADMIN_PAYLOAD_SIZE,
> > +					 max_sge, true, false);
> > +	if (alloc_size > PAGE_SIZE)
> > +		dev_warn(hdev->dev, "It is unreasonable for sg
> > allocation more than one page\n");
> > +	hdev->iod_mempool = mempool_create_node(1,
> > mempool_kmalloc, mempool_kfree,
> > +						(void
> > *)alloc_size, GFP_KERNEL, hdev->numa_node);
> > +	if (!hdev->iod_mempool) {
> > +		dev_err(hdev->dev, "Create iod extension memory
> > pool failed\n");
> > +		return -ENOMEM;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static void spraid_free_iod_ext_mem_pool(struct spraid_dev *hdev)
> > +{
> > +	mempool_destroy(hdev->iod_mempool);
> > +}
> > +
> > +static int spraid_user_admin_cmd(struct spraid_dev *hdev, struct
> > bsg_job *job) +{
> > +	struct spraid_bsg_request *bsg_req = (struct
> > spraid_bsg_request *)(job->request);
> > +	struct spraid_passthru_common_cmd *cmd =
> > &(bsg_req->admcmd);
> > +	struct spraid_admin_command admin_cmd;
> > +	u32 timeout = msecs_to_jiffies(cmd->timeout_ms);
> > +	u32 result[2] = {0};
> > +	int status;
> > +
> > +	if (hdev->state >= SPRAID_RESETTING) {
> > +		dev_err(hdev->dev, "[%s] err, host state:[%d] is
> > not right\n",
> > +			__func__, hdev->state);
> > +		return -EBUSY;
> > +	}
> > +
> > +	dev_info(hdev->dev, "[%s] opcode[0x%x] subopcode[0x%x]
> > init\n",
> > +		 __func__, cmd->opcode, cmd->info_0.subopcode);
> > +
> > +	memset(&admin_cmd, 0, sizeof(admin_cmd));
> > +	admin_cmd.common.opcode = cmd->opcode;
> > +	admin_cmd.common.flags = cmd->flags;
> > +	admin_cmd.common.hdid = cpu_to_le32(cmd->nsid);
> > +	admin_cmd.common.cdw2[0] = cpu_to_le32(cmd->cdw2);
> > +	admin_cmd.common.cdw2[1] = cpu_to_le32(cmd->cdw3);
> > +	admin_cmd.common.cdw10 = cpu_to_le32(cmd->cdw10);
> > +	admin_cmd.common.cdw11 = cpu_to_le32(cmd->cdw11);
> > +	admin_cmd.common.cdw12 = cpu_to_le32(cmd->cdw12);
> > +	admin_cmd.common.cdw13 = cpu_to_le32(cmd->cdw13);
> > +	admin_cmd.common.cdw14 = cpu_to_le32(cmd->cdw14);
> > +	admin_cmd.common.cdw15 = cpu_to_le32(cmd->cdw15);
> > +
> > +	status = spraid_bsg_map_data(hdev, job, &admin_cmd);
> > +	if (status) {
> > +		dev_err(hdev->dev, "[%s] err, map data failed\n",
> > __func__);
> > +		return status;
> > +	}
> > +
> > +	status = spraid_submit_admin_sync_cmd(hdev, &admin_cmd,
> > &result[0], &result[1], timeout);
> > +	if (status >= 0) {
> > +		job->reply_len = sizeof(result);
> > +		memcpy(job->reply, result, sizeof(result));
> > +	}
> > +
> > +	dev_info(hdev->dev, "[%s] opcode[0x%x] subopcode[0x%x],
> > status[0x%x] result0[0x%x] result1[0x%x]\n",
> > +		 __func__, cmd->opcode, cmd->info_0.subopcode,
> > status, result[0], result[1]); > +
> > +	spraid_bsg_unmap_data(hdev, job);
> > +
> > +	return status;
> > +}
> > +
> > +static int spraid_alloc_ioq_ptcmds(struct spraid_dev *hdev)
> > +{
> > +	int i;
> > +	int ptnum = SPRAID_NR_IOQ_PTCMDS;
> > +
> > +	INIT_LIST_HEAD(&hdev->ioq_pt_list);
> > +	spin_lock_init(&hdev->ioq_pt_lock);
> > +
> > +	hdev->ioq_ptcmds = kcalloc_node(ptnum, sizeof(struct
> > spraid_cmd),
> > +					GFP_KERNEL,
> > hdev->numa_node); +
> > +	if (!hdev->ioq_ptcmds) {
> > +		dev_err(hdev->dev, "Alloc ioq_ptcmds failed\n");
> > +		return -ENOMEM;
> > +	}
> > +
> > +	for (i = 0; i < ptnum; i++) {
> > +		hdev->ioq_ptcmds[i].qid = i / SPRAID_PTCMDS_PERQ +
> > 1;
> > +		hdev->ioq_ptcmds[i].cid = i % SPRAID_PTCMDS_PERQ +
> > SPRAID_IO_BLK_MQ_DEPTH;
> > +		list_add_tail(&(hdev->ioq_ptcmds[i].list),
> > &hdev->ioq_pt_list);
> > +	}
> > +
> > +	dev_info(hdev->dev, "Alloc ioq_ptcmds success,
> > ptnum[%d]\n", ptnum); +
> > +	return 0;
> > +}
> > +
> > +static void spraid_free_ioq_ptcmds(struct spraid_dev *hdev)
> > +{
> > +	kfree(hdev->ioq_ptcmds);
> > +	INIT_LIST_HEAD(&hdev->ioq_pt_list);
> > +}
> > +
> > +static int spraid_submit_ioq_sync_cmd(struct spraid_dev *hdev,
> > struct spraid_ioq_command *cmd,
> > +				      u32 *result, u32 *reslen,
> > u32 timeout) +{
> > +	int ret;
> > +	dma_addr_t sense_dma;
> > +	struct spraid_queue *ioq;
> > +	void *sense_addr = NULL;
> > +	struct spraid_cmd *pt_cmd = spraid_get_cmd(hdev,
> > SPRAID_CMD_IOPT); +
> > +	if (!pt_cmd) {
> > +		dev_err(hdev->dev, "err, get ioq cmd failed\n");
> > +		return -EFAULT;
> > +	}
> > +
> > +	timeout = timeout ? timeout : ADMIN_TIMEOUT;
> > +
> > +	init_completion(&pt_cmd->cmd_done);
> > +
> > +	ioq = &hdev->queues[pt_cmd->qid];
> > +	ret = pt_cmd->cid * SCSI_SENSE_BUFFERSIZE;
> > +	sense_addr = ioq->sense + ret;
> > +	sense_dma = ioq->sense_dma_addr + ret;
> > +
> > +	cmd->common.sense_addr = cpu_to_le64(sense_dma);
> > +	cmd->common.sense_len = cpu_to_le16(SCSI_SENSE_BUFFERSIZE);
> > +	cmd->common.command_id = pt_cmd->cid;
> > +
> > +	spraid_submit_cmd(ioq, cmd);
> > +
> > +	if (!wait_for_completion_timeout(&pt_cmd->cmd_done,
> > timeout)) {
> > +		dev_err(hdev->dev, "[%s] cid[%d] qid[%d] timeout,
> > opcode[0x%x] subopcode[0x%x]\n",
> > +			__func__, pt_cmd->cid, pt_cmd->qid,
> > cmd->common.opcode,
> > +			(le32_to_cpu(cmd->common.cdw3[0]) &
> > 0xffff));
> > +		WRITE_ONCE(pt_cmd->state, SPRAID_CMD_TIMEOUT);
> > +		return -EINVAL;
> > +	}
> > +
> > +	if (result && reslen) {
> > +		if ((pt_cmd->status & 0x17f) == 0x101) {
> > +			memcpy(result, sense_addr,
> > SCSI_SENSE_BUFFERSIZE);
> > +			*reslen = SCSI_SENSE_BUFFERSIZE;
> > +		}
> > +	}
> > +
> > +	spraid_put_cmd(hdev, pt_cmd, SPRAID_CMD_IOPT);
> > +
> > +	return pt_cmd->status;
> > +}
> > +
> > +static int spraid_user_ioq_cmd(struct spraid_dev *hdev, struct
> > bsg_job *job) +{
> > +	struct spraid_bsg_request *bsg_req = (struct
> > spraid_bsg_request *)(job->request);
> > +	struct spraid_ioq_passthru_cmd *cmd = &(bsg_req->ioqcmd);
> > +	struct spraid_ioq_command ioq_cmd;
> > +	int status = 0;
> > +	u32 timeout = msecs_to_jiffies(cmd->timeout_ms);
> > +
> > +	if (cmd->data_len > PAGE_SIZE) {
> > +		dev_err(hdev->dev, "[%s] data len bigger than
> > 4k\n", __func__);
> > +		return -EFAULT;
> > +	}
> > +
> > +	if (hdev->state != SPRAID_LIVE) {
> > +		dev_err(hdev->dev, "[%s] err, host state:[%d] is
> > not live\n",
> > +			__func__, hdev->state);
> > +		return -EBUSY;
> > +	}
> > +
> > +	dev_info(hdev->dev, "[%s] opcode[0x%x] subopcode[0x%x]
> > init, datalen[%d]\n",
> > +		 __func__, cmd->opcode, cmd->info_1.subopcode,
> > cmd->data_len); +
> > +	memset(&ioq_cmd, 0, sizeof(ioq_cmd));
> > +	ioq_cmd.common.opcode = cmd->opcode;
> > +	ioq_cmd.common.flags = cmd->flags;
> > +	ioq_cmd.common.hdid = cpu_to_le32(cmd->nsid);
> > +	ioq_cmd.common.sense_len =
> > cpu_to_le16(cmd->info_0.res_sense_len);
> > +	ioq_cmd.common.cdb_len = cmd->info_0.cdb_len;
> > +	ioq_cmd.common.rsvd2 = cmd->info_0.rsvd0;
> > +	ioq_cmd.common.cdw3[0] = cpu_to_le32(cmd->cdw3);
> > +	ioq_cmd.common.cdw3[1] = cpu_to_le32(cmd->cdw4);
> > +	ioq_cmd.common.cdw3[2] = cpu_to_le32(cmd->cdw5);
> > +
> > +	ioq_cmd.common.cdw10[0] = cpu_to_le32(cmd->cdw10);
> > +	ioq_cmd.common.cdw10[1] = cpu_to_le32(cmd->cdw11);
> > +	ioq_cmd.common.cdw10[2] = cpu_to_le32(cmd->cdw12);
> > +	ioq_cmd.common.cdw10[3] = cpu_to_le32(cmd->cdw13);
> > +	ioq_cmd.common.cdw10[4] = cpu_to_le32(cmd->cdw14);
> > +	ioq_cmd.common.cdw10[5] = cpu_to_le32(cmd->data_len);
> > +
> > +	memcpy(ioq_cmd.common.cdb, &cmd->cdw16,
> > cmd->info_0.cdb_len); +
> > +	ioq_cmd.common.cdw26[0] = cpu_to_le32(cmd->cdw26[0]);
> > +	ioq_cmd.common.cdw26[1] = cpu_to_le32(cmd->cdw26[1]);
> > +	ioq_cmd.common.cdw26[2] = cpu_to_le32(cmd->cdw26[2]);
> > +	ioq_cmd.common.cdw26[3] = cpu_to_le32(cmd->cdw26[3]);
> > +
> > +	status = spraid_bsg_map_data(hdev, job, (struct
> > spraid_admin_command *)&ioq_cmd);
> > +	if (status) {
> > +		dev_err(hdev->dev, "[%s] err, map data failed\n",
> > __func__);
> > +		return status;
> > +	}
> > +
> > +	status = spraid_submit_ioq_sync_cmd(hdev, &ioq_cmd,
> > job->reply, &job->reply_len, timeout); +
> > +	dev_info(hdev->dev, "[%s] opcode[0x%x] subopcode[0x%x],
> > status[0x%x] reply_len[%d]\n",
> > +		 __func__, cmd->opcode, cmd->info_1.subopcode,
> > status, job->reply_len); +
> > +	spraid_bsg_unmap_data(hdev, job);
> > +
> > +	return status;
> > +}
> > +
> > +static int spraid_reset_work_sync(struct spraid_dev *hdev);
> > +
> > +static bool spraid_check_scmd_completed(struct scsi_cmnd *scmd)
> > +{
> > +	struct spraid_dev *hdev = shost_priv(scmd->device->host);
> > +	struct spraid_iod *iod = scsi_cmd_priv(scmd);
> > +	struct spraid_queue *spraidq;
> > +	u16 hwq, cid;
> > +
> > +	spraid_get_tag_from_scmd(scmd, &hwq, &cid);
> > +	spraidq = &hdev->queues[hwq];
> > +	if (READ_ONCE(iod->state) == SPRAID_CMD_COMPLETE ||
> > spraid_poll_cq(spraidq, cid)) {
> > +		dev_warn(hdev->dev, "cid[%d], qid[%d] has been
> > completed\n",  
> 
> "cid[%d], qid[%d] has been completed" why does this merit a warning? 
> what action should the user take when he/she sees it?
> 
The print is in error handling path, so it merits a warning. We can
check the status of driver by it.

> > +			 cid, spraidq->qid);
> > +		return true;
> > +	}
> > +	return false;
> > +}
> > +
> > +static enum blk_eh_timer_return spraid_scmd_timeout(struct
> > scsi_cmnd *scmd) +{
> > +	struct spraid_iod *iod = scsi_cmd_priv(scmd);
> > +	unsigned int timeout =
> > scmd->device->request_queue->rq_timeout; +
> > +	if (spraid_check_scmd_completed(scmd))
> > +		goto out;
> > +
> > +	if (time_after(jiffies, scmd->jiffies_at_alloc + timeout))
> > {
> > +		if (cmpxchg(&iod->state, SPRAID_CMD_IN_FLIGHT,
> > SPRAID_CMD_TIMEOUT) ==
> > +		    SPRAID_CMD_IN_FLIGHT) {
> > +			return BLK_EH_DONE;
> > +		}
> > +	}
> > +out:
> > +	return BLK_EH_RESET_TIMER;
> > +}
> > +
> > +/* send abort command by admin queue temporary */
> > +static int spraid_send_abort_cmd(struct spraid_dev *hdev, u32
> > hdid, u16 qid, u16 cid) +{
> > +	struct spraid_admin_command admin_cmd;
> > +
> > +	memset(&admin_cmd, 0, sizeof(admin_cmd));
> > +	admin_cmd.abort.opcode = SPRAID_ADMIN_ABORT_CMD;
> > +	admin_cmd.abort.hdid = cpu_to_le32(hdid);
> > +	admin_cmd.abort.sqid = cpu_to_le16(qid);
> > +	admin_cmd.abort.cid = cpu_to_le16(cid);
> > +
> > +	return spraid_submit_admin_sync_cmd(hdev, &admin_cmd,
> > NULL, NULL, 0); +}
> > +
> > +/* send reset command by admin quueue temporary */
> > +static int spraid_send_reset_cmd(struct spraid_dev *hdev, int
> > type, u32 hdid) +{
> > +	struct spraid_admin_command admin_cmd;
> > +
> > +	memset(&admin_cmd, 0, sizeof(admin_cmd));
> > +	admin_cmd.reset.opcode = SPRAID_ADMIN_RESET;
> > +	admin_cmd.reset.hdid = cpu_to_le32(hdid);
> > +	admin_cmd.reset.type = type;
> > +
> > +	return spraid_submit_admin_sync_cmd(hdev, &admin_cmd,
> > NULL, NULL, 0); +}
> > +
> > +static bool spraid_change_host_state(struct spraid_dev *hdev, enum
> > spraid_state newstate)  
> 
> I really doubt that you require a state machine
> 
> Please try to operate the driver within standard kernel and scsi 
> framework constaints without this
> 
There is a case: there is only admin queue and no IO queues. In this
case, driver does not operate with scsi framework.

> > +{
> > +	unsigned long flags;
> > +	enum spraid_state oldstate;
> > +	bool change = false;
> > +
> > +	spin_lock_irqsave(&hdev->state_lock, flags);
> > +
> > +	oldstate = hdev->state;
> > +	switch (newstate) {
> > +	case SPRAID_LIVE:
> > +		switch (oldstate) {
> > +		case SPRAID_NEW:
> > +		case SPRAID_RESETTING:
> > +			change = true;
> > +			break;
> > +		default:
> > +			break;
> > +		}
> > +		break;
> > +	case SPRAID_RESETTING:
> > +		switch (oldstate) {
> > +		case SPRAID_LIVE:
> > +			change = true;
> > +			break;
> > +		default:
> > +			break;
> > +		}
> > +		break;
> > +	case SPRAID_DELETING:
> > +		if (oldstate != SPRAID_DELETING)
> > +			change = true;
> > +		break;
> > +	case SPRAID_DEAD:
> > +		switch (oldstate) {
> > +		case SPRAID_NEW:
> > +		case SPRAID_LIVE:
> > +		case SPRAID_RESETTING:
> > +			change = true;
> > +			break;
> > +		default:
> > +			break;
> > +		}
> > +		break;
> > +	default:
> > +		break;
> > +	}
> > +	if (change)
> > +		hdev->state = newstate;
> > +	spin_unlock_irqrestore(&hdev->state_lock, flags);
> > +
> > +	dev_info(hdev->dev, "[%s][%d]->[%d], change[%d]\n",
> > __func__, oldstate, newstate, change); +
> > +	return change;
> > +}
> > +
> > +static void spraid_back_fault_cqe(struct spraid_queue *ioq, struct
> > spraid_completion *cqe) +{
> > +	struct spraid_dev *hdev = ioq->hdev;
> > +	struct blk_mq_tags *tags; > +	struct scsi_cmnd
> > *scmd;
> > +	struct spraid_iod *iod;
> > +	struct request *req;
> > +
> > +	tags = hdev->shost->tag_set.tags[ioq->qid - 1];
> > +	req = blk_mq_tag_to_rq(tags, cqe->cmd_id);
> > +	if (unlikely(!req || !blk_mq_request_started(req)))
> > +		return;
> > +
> > +	scmd = blk_mq_rq_to_pdu(req);
> > +	iod = scsi_cmd_priv(scmd);
> > +
> > +	set_host_byte(scmd, DID_NO_CONNECT);
> > +	if (iod->nsge)
> > +		scsi_dma_unmap(scmd);
> > +	spraid_free_iod_res(hdev, iod);
> > +	scsi_done(scmd);
> > +	dev_warn(hdev->dev, "Back fault CQE, cid[%d], qid[%d]\n",
> > +		 cqe->cmd_id, ioq->qid);
> > +}
> > +
> > +static void spraid_back_all_io(struct spraid_dev *hdev)
> > +{
> > +	int i, j;
> > +	struct spraid_queue *ioq;
> > +	struct spraid_completion cqe = { 0 };
> > +
> > +	for (i = 1; i <= hdev->shost->nr_hw_queues; i++) {
> > +		ioq = &hdev->queues[i];
> > +		for (j = 0; j < hdev->shost->can_queue; j++) {
> > +			cqe.cmd_id = j;
> > +			spraid_back_fault_cqe(ioq, &cqe);
> > +		}
> > +	}
> > +}
> > +
> > +static void spraid_dev_disable(struct spraid_dev *hdev, bool
> > shutdown) +{
> > +	struct spraid_queue *adminq = &hdev->queues[0];
> > +	u16 start, end;
> > +	unsigned long timeout = jiffies + 600 * HZ;
> > +
> > +	if (pci_device_is_present(hdev->pdev)) {
> > +		if (shutdown)
> > +			spraid_shutdown_ctrl(hdev);
> > +		else
> > +			spraid_disable_ctrl(hdev);
> > +	}
> > +
> > +	while (!time_after(jiffies, timeout)) {
> > +		if (!pci_device_is_present(hdev->pdev)) {
> > +			dev_info(hdev->dev, "[%s] pci_device not
> > present, skip wait\n", __func__);
> > +			break;
> > +		}
> > +		if (!spraid_wait_ready(hdev, hdev->cap, false)) {
> > +			dev_info(hdev->dev, "[%s] wait ready
> > success after reset\n", __func__);
> > +			break;
> > +		}
> > +		dev_info(hdev->dev, "[%s] waiting csts_rdy
> > ready\n", __func__);
> > +	}
> > +
> > +	if (hdev->queue_count == 0) {
> > +		dev_err(hdev->dev, "[%s] warn, queue has been
> > delete\n", __func__);
> > +		return;
> > +	}
> > +
> > +	spin_lock_irq(&adminq->cq_lock);
> > +	spraid_process_cq(adminq, &start, &end, -1);
> > +	spin_unlock_irq(&adminq->cq_lock);
> > +	spraid_complete_cqes(adminq, start, end);
> > +
> > +	spraid_pci_disable(hdev);
> > +
> > +	spraid_back_all_io(hdev);
> > +}
> > +
> > +static void spraid_reset_work(struct work_struct *work)
> > +{
> > +	int ret;
> > +	struct spraid_dev *hdev = container_of(work, struct
> > spraid_dev, reset_work); +
> > +	if (hdev->state != SPRAID_RESETTING) {
> > +		dev_err(hdev->dev, "[%s] err, host is not reset
> > state\n", __func__);
> > +		return;
> > +	}
> > +
> > +	dev_info(hdev->dev, "[%s] enter host reset\n", __func__);
> > +
> > +	if (hdev->ctrl_config & SPRAID_CC_ENABLE) {
> > +		dev_info(hdev->dev, "[%s] start dev_disable\n",
> > __func__);
> > +		spraid_dev_disable(hdev, false);
> > +	}
> > +
> > +	ret = spraid_pci_enable(hdev);
> > +	if (ret)
> > +		goto out;
> > +
> > +	ret = spraid_setup_admin_queue(hdev);
> > +	if (ret)
> > +		goto pci_disable;
> > +
> > +	ret = spraid_setup_io_queues(hdev);
> > +	if (ret || hdev->online_queues <=
> > hdev->shost->nr_hw_queues)
> > +		goto pci_disable;
> > +
> > +	spraid_change_host_state(hdev, SPRAID_LIVE);
> > +
> > +	spraid_send_all_aen(hdev);
> > +
> > +	return;
> > +
> > +pci_disable:
> > +	spraid_pci_disable(hdev);
> > +out:
> > +	spraid_change_host_state(hdev, SPRAID_DEAD);
> > +	dev_err(hdev->dev, "[%s] err, host reset failed\n",
> > __func__); +}
> > +
> > +static int spraid_reset_work_sync(struct spraid_dev *hdev)
> > +{
> > +	if (!spraid_change_host_state(hdev, SPRAID_RESETTING)) {
> > +		dev_info(hdev->dev, "[%s] can't change to reset
> > state\n", __func__);
> > +		return -EBUSY;
> > +	}
> > +
> > +	if (!queue_work(spraid_wq, &hdev->reset_work)) {
> > +		dev_err(hdev->dev, "[%s] err, host is already in
> > reset state\n", __func__);
> > +		return -EBUSY;
> > +	}
> > +
> > +	flush_work(&hdev->reset_work);
> > +	if (hdev->state != SPRAID_LIVE)
> > +		return -ENODEV;
> > +
> > +	return 0;
> > +}
> > +
> > +static int spraid_wait_abnl_cmd_done(struct spraid_iod *iod)
> > +{
> > +	u16 times = 0;
> > +
> > +	do {
> > +		if (READ_ONCE(iod->state) ==
> > SPRAID_CMD_TMO_COMPLETE)
> > +			break;
> > +		msleep(500);
> > +		times++;
> > +	} while (times <= SPRAID_WAIT_ABNL_CMD_TIMEOUT);
> > +
> > +	/* wait command completion timeout after abort/reset
> > success */
> > +	if (times >= SPRAID_WAIT_ABNL_CMD_TIMEOUT)
> > +		return -ETIMEDOUT;
> > +
> > +	return 0;
> > +}
> > +
> > +static int spraid_abort_handler(struct scsi_cmnd *scmd)
> > +{
> > +	struct spraid_dev *hdev = shost_priv(scmd->device->host);
> > +	struct spraid_iod *iod = scsi_cmd_priv(scmd);
> > +	struct spraid_sdev_hostdata *hostdata;
> > +	u16 hwq, cid;
> > +	int ret;
> > +
> > +	scsi_print_command(scmd);
> > +
> > +	if (!spraid_wait_abnl_cmd_done(iod) ||
> > spraid_check_scmd_completed(scmd) ||
> > +	    hdev->state != SPRAID_LIVE)
> > +		return SUCCESS;
> > +
> > +	hostdata = scmd->device->hostdata;
> > +	spraid_get_tag_from_scmd(scmd, &hwq, &cid);
> > +
> > +	dev_warn(hdev->dev, "cid[%d] qid[%d] timeout, aborting\n",
> > cid, hwq);
> > +	ret = spraid_send_abort_cmd(hdev, hostdata->hdid, hwq,
> > cid);
> > +	if (ret != ADMIN_ERR_TIMEOUT) {
> > +		ret = spraid_wait_abnl_cmd_done(iod);
> > +		if (ret) {
> > +			dev_warn(hdev->dev, "cid[%d] qid[%d] abort
> > failed, not found\n", cid, hwq);
> > +			return FAILED;
> > +		}
> > +		dev_warn(hdev->dev, "cid[%d] qid[%d] abort
> > succ\n", cid, hwq);
> > +		return SUCCESS;
> > +	}
> > +	dev_warn(hdev->dev, "cid[%d] qid[%d] abort failed,
> > timeout\n", cid, hwq);
> > +	return FAILED;
> > +}
> > +
> > +static int spraid_tgt_reset_handler(struct scsi_cmnd *scmd)
> > +{
> > +	struct spraid_dev *hdev = shost_priv(scmd->device->host);
> > +	struct spraid_iod *iod = scsi_cmd_priv(scmd);
> > +	struct spraid_sdev_hostdata *hostdata;
> > +	u16 hwq, cid;
> > +	int ret;
> > +
> > +	scsi_print_command(scmd);
> > +
> > +	if (!spraid_wait_abnl_cmd_done(iod) ||
> > spraid_check_scmd_completed(scmd) ||
> > +	    hdev->state != SPRAID_LIVE)
> > +		return SUCCESS;
> > +
> > +	hostdata = scmd->device->hostdata;
> > +	spraid_get_tag_from_scmd(scmd, &hwq, &cid);
> > +
> > +	dev_warn(hdev->dev, "cid[%d] qid[%d] timeout, target
> > reset\n", cid, hwq);
> > +	ret = spraid_send_reset_cmd(hdev, SPRAID_RESET_TARGET,
> > hostdata->hdid);
> > +	if (ret == 0) {
> > +		ret = spraid_wait_abnl_cmd_done(iod);
> > +		if (ret) {
> > +			dev_warn(hdev->dev, "cid[%d] qid[%d]target
> > reset failed, not found\n",
> > +				 cid, hwq);
> > +			return FAILED;
> > +		}
> > +
> > +		dev_warn(hdev->dev, "cid[%d] qid[%d] target reset
> > success\n", cid, hwq);
> > +		return SUCCESS;
> > +	}
> > +
> > +	dev_warn(hdev->dev, "cid[%d] qid[%d] ret[%d] target reset
> > failed\n", cid, hwq, ret);
> > +	return FAILED;
> > +}
> > +
> > +static int spraid_bus_reset_handler(struct scsi_cmnd *scmd)
> > +{
> > +	struct spraid_dev *hdev = shost_priv(scmd->device->host);
> > +	struct spraid_iod *iod = scsi_cmd_priv(scmd);
> > +	struct spraid_sdev_hostdata *hostdata;
> > +	u16 hwq, cid;
> > +	int ret;
> > +
> > +	scsi_print_command(scmd);
> > +
> > +	if (!spraid_wait_abnl_cmd_done(iod) ||
> > spraid_check_scmd_completed(scmd) ||
> > +	    hdev->state != SPRAID_LIVE)
> > +		return SUCCESS;
> > +
> > +	hostdata = scmd->device->hostdata;
> > +	spraid_get_tag_from_scmd(scmd, &hwq, &cid);
> > +
> > +	dev_warn(hdev->dev, "cid[%d] qid[%d] timeout, bus
> > reset\n", cid, hwq);
> > +	ret = spraid_send_reset_cmd(hdev, SPRAID_RESET_BUS,
> > hostdata->hdid);
> > +	if (ret == 0) {
> > +		ret = spraid_wait_abnl_cmd_done(iod);
> > +		if (ret) {
> > +			dev_warn(hdev->dev, "cid[%d] qid[%d] bus
> > reset failed, not found\n",
> > +				 cid, hwq);
> > +			return FAILED;
> > +		}
> > +
> > +		dev_warn(hdev->dev, "cid[%d] qid[%d] bus reset
> > succ\n", cid, hwq);
> > +		return SUCCESS;
> > +	}
> > +
> > +	dev_warn(hdev->dev, "cid[%d] qid[%d] ret[%d] bus reset
> > failed\n", cid, hwq, ret);
> > +	return FAILED;
> > +}
> > +
> > +static int spraid_shost_reset_handler(struct scsi_cmnd *scmd)
> > +{
> > +	u16 hwq, cid;
> > +	struct spraid_dev *hdev = shost_priv(scmd->device->host);
> > +
> > +	scsi_print_command(scmd);
> > +	if (spraid_check_scmd_completed(scmd) || hdev->state !=
> > SPRAID_LIVE)
> > +		return SUCCESS;
> > +
> > +	spraid_get_tag_from_scmd(scmd, &hwq, &cid);
> > +	dev_warn(hdev->dev, "cid[%d] qid[%d] host reset\n", cid,
> > hwq); +
> > +	if (spraid_reset_work_sync(hdev)) {
> > +		dev_warn(hdev->dev, "cid[%d] qid[%d] host reset
> > failed\n", cid, hwq);
> > +		return FAILED;
> > +	}
> > +
> > +	dev_warn(hdev->dev, "cid[%d] qid[%d] host reset
> > success\n", cid, hwq); +
> > +	return SUCCESS;
> > +}
> > +
> > +static ssize_t csts_pp_show(struct device *cdev, struct
> > device_attribute *attr, char *buf) +{
> > +	struct Scsi_Host *shost = class_to_shost(cdev);
> > +	struct spraid_dev *hdev = shost_priv(shost);
> > +	int ret = -1;
> > +
> > +	if (pci_device_is_present(hdev->pdev)) {
> > +		ret = (readl(hdev->bar + SPRAID_REG_CSTS) &
> > SPRAID_CSTS_PP_MASK);
> > +		ret >>= SPRAID_CSTS_PP_SHIFT;
> > +	}
> > +
> > +	return snprintf(buf, PAGE_SIZE, "%d\n", ret);
> > +}
> > +
> > +static ssize_t csts_shst_show(struct device *cdev, struct
> > device_attribute *attr, char *buf) +{
> > +	struct Scsi_Host *shost = class_to_shost(cdev);
> > +	struct spraid_dev *hdev = shost_priv(shost);
> > +	int ret = -1;
> > +
> > +	if (pci_device_is_present(hdev->pdev)) {
> > +		ret = (readl(hdev->bar + SPRAID_REG_CSTS) &
> > SPRAID_CSTS_SHST_MASK);
> > +		ret >>= SPRAID_CSTS_SHST_SHIFT;
> > +	}
> > +
> > +	return snprintf(buf, PAGE_SIZE, "%d\n", ret);
> > +}
> > +
> > +static ssize_t csts_cfs_show(struct device *cdev, struct
> > device_attribute *attr, char *buf) +{
> > +	struct Scsi_Host *shost = class_to_shost(cdev);
> > +	struct spraid_dev *hdev = shost_priv(shost);
> > +	int ret = -1;
> > +
> > +	if (pci_device_is_present(hdev->pdev)) {
> > +		ret = (readl(hdev->bar + SPRAID_REG_CSTS) &
> > SPRAID_CSTS_CFS_MASK);
> > +		ret >>= SPRAID_CSTS_CFS_SHIFT;
> > +	}
> > +
> > +	return snprintf(buf, PAGE_SIZE, "%d\n", ret);
> > +}
> > +
> > +static ssize_t csts_rdy_show(struct device *cdev, struct
> > device_attribute *attr, char *buf) +{
> > +	struct Scsi_Host *shost = class_to_shost(cdev);
> > +	struct spraid_dev *hdev = shost_priv(shost);
> > +	int ret = -1;
> > +
> > +	if (pci_device_is_present(hdev->pdev))  
> 
> why would the device not be present?
> 
PCIe link may be not stable or the link is down.

> > +		ret = (readl(hdev->bar + SPRAID_REG_CSTS) &
> > SPRAID_CSTS_RDY); +
> > +	return snprintf(buf, PAGE_SIZE, "%d\n", ret);
> > +}
> > +
> > +static ssize_t fw_version_show(struct device *cdev, struct
> > device_attribute *attr, char *buf) +{
> > +	struct Scsi_Host *shost = class_to_shost(cdev);
> > +	struct spraid_dev *hdev = shost_priv(shost);
> > +
> > +	return snprintf(buf, PAGE_SIZE, "%s\n",
> > hdev->ctrl_info->fr); +}
> > +
> > +static DEVICE_ATTR_RO(csts_pp);
> > +static DEVICE_ATTR_RO(csts_shst);
> > +static DEVICE_ATTR_RO(csts_cfs);
> > +static DEVICE_ATTR_RO(csts_rdy);
> > +static DEVICE_ATTR_RO(fw_version);
> > +
> > +static struct attribute *spraid_host_attrs[] = {
> > +	&dev_attr_csts_pp.attr,
> > +	&dev_attr_csts_shst.attr,
> > +	&dev_attr_csts_cfs.attr,
> > +	&dev_attr_csts_rdy.attr,
> > +	&dev_attr_fw_version.attr,
> > +	NULL,
> > +};
> > +
> > +static const struct attribute_group spraid_host_attr_group = {
> > +	.attrs = spraid_host_attrs
> > +};
> > +
> > +static const struct attribute_group *spraid_host_attr_groups[] = {
> > +	&spraid_host_attr_group,
> > +	NULL,  
> 
> no need for ',' on sentinel
> 
Ok, will remove it.

> > +};
> > +
> > +static struct scsi_host_template spraid_driver_template = {
> > +	.module			= THIS_MODULE,
> > +	.name			= "Ramaxel Logic spraid
> > driver",
> > +	.proc_name		= "spraid",
> > +	.queuecommand		= spraid_queue_command,
> > +	.slave_alloc		= spraid_slave_alloc,
> > +	.slave_destroy		= spraid_slave_destroy,
> > +	.slave_configure	= spraid_slave_configure,
> > +	.eh_timed_out		= spraid_scmd_timeout,
> > +	.eh_abort_handler	= spraid_abort_handler,
> > +	.eh_target_reset_handler	= spraid_tgt_reset_handler,
> > +	.eh_bus_reset_handler		=
> > spraid_bus_reset_handler,
> > +	.eh_host_reset_handler		=
> > spraid_shost_reset_handler,
> > +	.change_queue_depth		=
> > scsi_change_queue_depth,
> > +	.host_tagset			= 0,
> > +	.this_id			= -1,
> > +	.shost_groups			=
> > spraid_host_attr_groups, +};
> > +
> > +static void spraid_shutdown(struct pci_dev *pdev)
> > +{
> > +	struct spraid_dev *hdev = pci_get_drvdata(pdev);
> > +
> > +	spraid_remove_io_queues(hdev);
> > +	spraid_disable_admin_queue(hdev, true);
> > +}
> > +
> > +/* bsg dispatch user command */
> > +static int spraid_bsg_host_dispatch(struct bsg_job *job)
> > +{
> > +	struct Scsi_Host *shost = dev_to_shost(job->dev);
> > +	struct spraid_dev *hdev = shost_priv(shost);
> > +	struct request *rq = blk_mq_rq_from_pdu(job);
> > +	struct spraid_bsg_request *bsg_req = job->request;
> > +	int ret = 0;
> > +
> > +	dev_info(hdev->dev, "[%s]msgcode[%d], msglen[%d],
> > timeout[%d], req_nsge[%d], req_len[%d]\n",
> > +		 __func__, bsg_req->msgcode, job->request_len,
> > rq->timeout,
> > +		 job->request_payload.sg_cnt,
> > job->request_payload.payload_len); +
> > +	job->reply_len = 0;
> > +
> > +	switch (bsg_req->msgcode) {
> > +	case SPRAID_BSG_ADM:
> > +		ret = spraid_user_admin_cmd(hdev, job);
> > +		break;
> > +	case SPRAID_BSG_IOQ:
> > +		ret = spraid_user_ioq_cmd(hdev, job);
> > +		break;
> > +	default:
> > +		dev_info(hdev->dev, "[%s] unsupport
> > msgcode[%d]\n", __func__, bsg_req->msgcode);
> > +		break;
> > +	}
> > +
> > +	bsg_job_done(job, ret, 0);
> > +	return 0;
> > +}
> > +
> > +static inline void spraid_remove_bsg(struct spraid_dev *hdev)
> > +{
> > +	struct request_queue *q = hdev->bsg_queue;
> > +
> > +	if (q)
> > +		bsg_remove_queue(q);
> > +}
> > +static int spraid_probe(struct pci_dev *pdev, const struct
> > pci_device_id *id) +{
> > +	struct spraid_dev *hdev;
> > +	struct Scsi_Host *shost;
> > +	int node, ret;
> > +	char bsg_name[15];
> > +
> > +	shost = scsi_host_alloc(&spraid_driver_template,
> > sizeof(*hdev));
> > +	if (!shost) {
> > +		dev_err(&pdev->dev, "Failed to allocate scsi
> > host\n");
> > +		return -ENOMEM;
> > +	}
> > +	hdev = shost_priv(shost);
> > +	hdev->pdev = pdev;
> > +	hdev->dev = get_device(&pdev->dev);  
> 
> why need get_device()?
> 
Will remove get_device() in the next version.

> > +
> > +	node = dev_to_node(hdev->dev);
> > +	if (node == NUMA_NO_NODE) {
> > +		node = first_memory_node;
> > +		set_dev_node(hdev->dev, node);  
> 
> why do this?
>
Will remove if statement and use NUMA_NO_NODE in the next version.
 
> > +	}
> > +	hdev->numa_node = node;  
> 
> again, this seems to be something which we already hold indirectly in 
> the hdev struct
> 
It is for the convenience of later use.

> > +	hdev->shost = shost;
> > +	pci_set_drvdata(pdev, hdev);
> > +
> > +	ret = spraid_dev_map(hdev);
> > +	if (ret)
> > +		goto put_dev;
> > +
> > +	init_rwsem(&hdev->devices_rwsem);
> > +	INIT_WORK(&hdev->scan_work, spraid_scan_work);
> > +	INIT_WORK(&hdev->timesyn_work, spraid_timesyn_work);
> > +	INIT_WORK(&hdev->reset_work, spraid_reset_work);
> > +	spin_lock_init(&hdev->state_lock);
> > +
> > +	ret = spraid_alloc_resources(hdev);
> > +	if (ret)
> > +		goto dev_unmap;
> > +
> > +	ret = spraid_pci_enable(hdev);
> > +	if (ret)
> > +		goto resources_free;
> > +
> > +	ret = spraid_setup_admin_queue(hdev);
> > +	if (ret)
> > +		goto pci_disable;
> > +
> > +	ret = spraid_init_ctrl_info(hdev);
> > +	if (ret)
> > +		goto disable_admin_q;
> > +
> > +	ret = spraid_alloc_iod_ext_mem_pool(hdev);
> > +	if (ret)
> > +		goto disable_admin_q;
> > +
> > +	ret = spraid_setup_io_queues(hdev);
> > +	if (ret)
> > +		goto free_iod_mempool;
> > +
> > +	spraid_shost_init(hdev);
> > +
> > +	ret = scsi_add_host(hdev->shost, hdev->dev);
> > +	if (ret) {
> > +		dev_err(hdev->dev, "Add shost to system failed,
> > ret: %d\n",
> > +			ret);
> > +		goto remove_io_queues;
> > +	}
> > +
> > +	snprintf(bsg_name, sizeof(bsg_name), "spraid%d",
> > shost->host_no);
> > +	hdev->bsg_queue = bsg_setup_queue(&shost->shost_gendev,
> > bsg_name,
> > +
> > spraid_bsg_host_dispatch, NULL,
> > +					  spraid_cmd_size(hdev,
> > true, false));
> > +	if (IS_ERR(hdev->bsg_queue)) {
> > +		dev_err(hdev->dev, "err, setup bsg failed\n");
> > +		hdev->bsg_queue = NULL;
> > +		goto remove_io_queues;
> > +	}
> > +
> > +	if (hdev->online_queues == SPRAID_ADMIN_QUEUE_NUM) {
> > +		dev_warn(hdev->dev, "warn only admin queue can be
> > used\n");  
> 
> is this case really ok to continue?
> 
Yes. It is possible that there is only Admin queue.

> > +		return 0;
> > +	}
> > +
> > +	hdev->state = SPRAID_LIVE;
> > +
> > +	spraid_send_all_aen(hdev);
> > +
> > +	ret = spraid_dev_list_init(hdev);
> > +	if (ret)
> > +		goto remove_bsg;
> > +
> > +	ret = spraid_configure_timestamp(hdev);
> > +	if (ret)
> > +		dev_warn(hdev->dev, "init set timestamp failed\n");
> > +
> > +	ret = spraid_alloc_ioq_ptcmds(hdev);
> > +	if (ret)
> > +		goto remove_bsg;
> > +
> > +	scsi_scan_host(hdev->shost);
> > +
> > +	return 0;
> > +
> > +remove_bsg:
> > +	spraid_remove_bsg(hdev);
> > +remove_io_queues:
> > +	spraid_remove_io_queues(hdev);
> > +free_iod_mempool:
> > +	spraid_free_iod_ext_mem_pool(hdev);
> > +disable_admin_q:
> > +	spraid_disable_admin_queue(hdev, false);
> > +pci_disable:
> > +	spraid_pci_disable(hdev);
> > +resources_free:
> > +	spraid_free_resources(hdev);
> > +dev_unmap:
> > +	spraid_dev_unmap(hdev);
> > +put_dev:
> > +	put_device(hdev->dev);
> > +	scsi_host_put(shost);
> > +
> > +	return -ENODEV;
> > +}
> > +
> > +static void spraid_remove(struct pci_dev *pdev)
> > +{
> > +	struct spraid_dev *hdev = pci_get_drvdata(pdev);
> > +	struct Scsi_Host *shost = hdev->shost;
> > +
> > +	dev_info(hdev->dev, "enter spraid remove\n");
> > +
> > +	spraid_change_host_state(hdev, SPRAID_DELETING);
> > +
> > +	if (!pci_device_is_present(pdev)) {
> > +		scsi_block_requests(shost);
> > +		spraid_back_all_io(hdev);
> > +		scsi_unblock_requests(shost);
> > +	}
> > +
> > +	flush_work(&hdev->reset_work);
> > +	spraid_remove_bsg(hdev);
> > +	scsi_remove_host(shost);
> > +	spraid_free_ioq_ptcmds(hdev);
> > +	kfree(hdev->devices);
> > +	spraid_remove_io_queues(hdev);
> > +	spraid_free_iod_ext_mem_pool(hdev);
> > +	spraid_disable_admin_queue(hdev, false);
> > +	spraid_pci_disable(hdev);
> > +	spraid_free_resources(hdev);
> > +	spraid_dev_unmap(hdev);
> > +	put_device(hdev->dev);
> > +	scsi_host_put(shost);
> > +
> > +	dev_info(hdev->dev, "exit spraid remove\n");
> > +}
> > +
> > +static const struct pci_device_id spraid_id_table[] = {
> > +	{ PCI_DEVICE(PCI_VENDOR_ID_RAMAXEL_LOGIC,
> > SPRAID_SERVER_DEVICE_HBA_DID) },
> > +	{ PCI_DEVICE(PCI_VENDOR_ID_RAMAXEL_LOGIC,
> > SPRAID_SERVER_DEVICE_RAID_DID) },
> > +	{ 0, }
> > +};
> > +MODULE_DEVICE_TABLE(pci, spraid_id_table);
> > +
> > +static struct pci_driver spraid_driver = {
> > +	.name		= "spraid",
> > +	.id_table	= spraid_id_table,
> > +	.probe		= spraid_probe,
> > +	.remove		= spraid_remove,
> > +	.shutdown	= spraid_shutdown,
> > +};
> > +
> > +static int __init spraid_init(void)
> > +{
> > +	int ret;
> > +
> > +	spraid_wq = alloc_workqueue("spraid-wq", WQ_UNBOUND |
> > WQ_MEM_RECLAIM | WQ_SYSFS, 0);
> > +	if (!spraid_wq)
> > +		return -ENOMEM;
> > +
> > +	spraid_class = class_create(THIS_MODULE, "spraid");
> > +	if (IS_ERR(spraid_class)) {
> > +		ret = PTR_ERR(spraid_class);
> > +		goto destroy_wq;
> > +	}
> > +
> > +	ret = pci_register_driver(&spraid_driver);
> > +	if (ret < 0)
> > +		goto destroy_class;
> > +
> > +	return 0;
> > +
> > +destroy_class:
> > +	class_destroy(spraid_class);
> > +destroy_wq:
> > +	destroy_workqueue(spraid_wq);
> > +
> > +	return ret;
> > +}
> > +
> > +static void __exit spraid_exit(void)
> > +{
> > +	pci_unregister_driver(&spraid_driver);
> > +	class_destroy(spraid_class);
> > +	destroy_workqueue(spraid_wq);
> > +	ida_destroy(&spraid_instance_ida);
> > +}
> > +
> > +MODULE_AUTHOR("songyl@ramaxel.com");
> > +MODULE_DESCRIPTION("Ramaxel Memory Technology SPraid Driver");
> > +MODULE_LICENSE("GPL");
> > +MODULE_VERSION(SPRAID_DRV_VERSION);
> > +module_init(spraid_init);
> > +module_exit(spraid_exit);  
>

     prev parent reply	other threads:[~2022-03-30  2:54 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-14  2:53 [PATCH v5] scsi:spraid: initial commit of Ramaxel spraid driver Yanling Song
2022-03-15 14:43 ` John Garry
2022-03-16 23:03   ` Bart Van Assche
2022-03-17  8:46     ` John Garry
2022-03-18 12:52   ` Yanling Song
2022-03-24 16:34 ` John Garry
2022-03-30  2:54   ` Yanling Song [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220330105409.0000636e@ramaxel.com \
    --to=songyl@ramaxel.com \
    --cc=bvanassche@acm.org \
    --cc=john.garry@huawei.com \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.