All of lore.kernel.org
 help / color / mirror / Atom feed
From: Balakrishna Godavarthi <bgodavar@codeaurora.org>
To: Marcel Holtmann <marcel@holtmann.org>
Cc: Johan Hedberg <johan.hedberg@gmail.com>,
	Matthias Kaehlcke <mka@chromium.org>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Bluez mailing list <linux-bluetooth@vger.kernel.org>,
	hemantg@codeaurora.org, linux-arm-msm@vger.kernel.org
Subject: Re: [RFC,v2] Bluetooth: hci_qca: Collect controller memory dump during SSR
Date: Mon, 21 Jan 2019 20:09:49 +0530	[thread overview]
Message-ID: <fa15134608944498d9f5bc7950b98cde@codeaurora.org> (raw)
In-Reply-To: <E763DD7D-CC97-4594-87A8-86480616F08C@holtmann.org>

Hi Marcel,

On 2019-01-18 14:24, Marcel Holtmann wrote:
> Hi Balakrishna,
> 
>>>>>> We will collect the ramdump of BT controller when hardware error 
>>>>>> event
>>>>>> received before rebooting the HCI layer. Before restarting a 
>>>>>> subsystem
>>>>>> or a process running on a subsystem, it is often required to 
>>>>>> request
>>>>>> either a subsystem or a process to perform proper cache dump and
>>>>>> software failure reason into a memory buffer which application
>>>>>> processor can retrieve afterwards. SW developers can often provide
>>>>>> initial investigation by looking into that debugging information.
>>>>>> Signed-off-by: Balakrishna Godavarthi <bgodavar@codeaurora.org>
>>>>>> ---
>>>>>> changes v2:
>>>>>> * entirely an newer approach handling with an work queue.
>>>>>> ---
>>>>>> drivers/bluetooth/hci_qca.c | 289 
>>>>>> ++++++++++++++++++++++++++++++++++--
>>>>>> 1 file changed, 278 insertions(+), 11 deletions(-)
>>>>>> diff --git a/drivers/bluetooth/hci_qca.c 
>>>>>> b/drivers/bluetooth/hci_qca.c
>>>>>> index f036c8f98ea3..62b768bc32ec 100644
>>>>>> --- a/drivers/bluetooth/hci_qca.c
>>>>>> +++ b/drivers/bluetooth/hci_qca.c
>>>>>> @@ -32,6 +32,7 @@
>>>>>> #include <linux/clk.h>
>>>>>> #include <linux/debugfs.h>
>>>>>> #include <linux/delay.h>
>>>>>> +#include <linux/devcoredump.h>
>>>>>> #include <linux/device.h>
>>>>>> #include <linux/gpio/consumer.h>
>>>>>> #include <linux/mod_devicetable.h>
>>>>> don't we have some crashdump core facility that could be utilized 
>>>>> here?
>>>> [Bala]: no i don't think so. that is reason calling devcoredump.h
>>> what is wrong with using devcoredump.h?
>> 
>> [Bala]: using the  devcoredump.h
>> 
>>>>>> @@ -57,9 +58,10 @@
>>>>>> /* Controller states */
>>>>>> #define STATE_IN_BAND_SLEEP_ENABLED	1
>>>>>> -#define IBS_WAKE_RETRANS_TIMEOUT_MS	100
>>>>>> +#define	IBS_WAKE_RETRANS_TIMEOUT_MS	100
>>>>>> #define IBS_TX_IDLE_TIMEOUT_MS		2000
>>>>>> -#define BAUDRATE_SETTLE_TIMEOUT_MS	300
>>>>>> +#define	BAUDRATE_SETTLE_TIMEOUT_MS	300
>>>>>> +#define MEMDUMP_COLLECTION_TIMEOUT_MS	8000
>>>>>> /* susclk rate */
>>>>>> #define SUSCLK_RATE_32KHZ	32768
>>>>>> @@ -67,6 +69,13 @@
>>>>>> /* Controller debug log header */
>>>>>> #define QCA_DEBUG_HANDLE	0x2EDC
>>>>>> +/* Controller dump header */
>>>>>> +#define QCA_SSR_DUMP_HANDLE		0x0108
>>>>>> +#define QCA_DUMP_PACKET_SIZE		255
>>>>>> +#define QCA_LAST_SEQUENCE_NUM		0xFFFF
>>>>>> +#define QCA_CRASHBYTE_PACKET_LEN	1100
>>>>>> +#define QCA_MEMDUMP_BYTE		0xFB
>>>>>> +
>>>>>> /* HCI_IBS transmit side sleep protocol states */
>>>>>> enum tx_ibs_states {
>>>>>> 	HCI_IBS_TX_ASLEEP,
>>>>>> @@ -89,12 +98,41 @@ enum hci_ibs_clock_state_vote {
>>>>>> 	HCI_IBS_RX_VOTE_CLOCK_OFF,
>>>>>> };
>>>>>> +/* Controller memory dump states */
>>>>>> +enum qca_memdump_states {
>>>>>> +	QCA_MEMDUMP_IDLE,
>>>>>> +	QCA_MEMDUMP_COLLECTING,
>>>>>> +	QCA_MEMDUMP_COLLECTED,
>>>>>> +	QCA_MEMDUMP_TIMEOUT,
>>>>>> +};
>>>>>> +
>>>>>> +struct qca_memdump_data {
>>>>>> +	char *memdump_buf_head;
>>>>>> +	char *memdump_buf_tail;
>>>>>> +	u32 current_seq_no;
>>>>>> +	u32 received_dump;
>>>>>> +};
>>>>>> +
>>>>>> +struct qca_memdump_event_hdr {
>>>>>> +	__u8    evt;
>>>>>> +	__u8    plen;
>>>>>> +	__u16   opcode;
>>>>>> +	__u16   seq_no;
>>>>>> +	__u8    reserved;
>>>>>> +} __packed;
>>>>>> +
>>>>>> +
>>>>>> +struct qca_dump_size {
>>>>>> +	u32 dump_size;
>>>>>> +} __packed;
>>>>>> +
>>>>>> struct qca_data {
>>>>>> 	struct hci_uart *hu;
>>>>>> 	struct sk_buff *rx_skb;
>>>>>> 	struct sk_buff_head txq;
>>>>>> -	struct sk_buff_head tx_wait_q;	/* HCI_IBS wait queue	*/
>>>>>> -	spinlock_t hci_ibs_lock;	/* HCI_IBS state lock	*/
>>>>>> +	struct sk_buff_head tx_wait_q;		/* HCI_IBS wait queue	*/
>>>>>> +	struct sk_buff_head rx_memdump_q;	/* Memdump wait queue	*/
>>>>>> +	spinlock_t hci_ibs_lock;		/* HCI_IBS state lock	*/
>>>>>> 	u8 tx_ibs_state;	/* HCI_IBS transmit side power state*/
>>>>>> 	u8 rx_ibs_state;	/* HCI_IBS receive side power state */
>>>>>> 	bool tx_vote;		/* Clock must be on for TX */
>>>>>> @@ -103,12 +141,17 @@ struct qca_data {
>>>>>> 	u32 tx_idle_delay;
>>>>>> 	struct timer_list wake_retrans_timer;
>>>>>> 	u32 wake_retrans;
>>>>>> +	struct timer_list memdump_timer;
>>>>>> +	u32 memdump_delay;
>>>>>> 	struct workqueue_struct *workqueue;
>>>>>> 	struct work_struct ws_awake_rx;
>>>>>> 	struct work_struct ws_awake_device;
>>>>>> 	struct work_struct ws_rx_vote_off;
>>>>>> 	struct work_struct ws_tx_vote_off;
>>>>>> +	struct work_struct ctrl_memdump_evt;
>>>>>> +	struct qca_memdump_data *qca_memdump;
>>>>>> 	unsigned long flags;
>>>>>> +	enum qca_memdump_states memdump_state;
>>>>>> 	/* For debugging purpose */
>>>>>> 	u64 ibs_sent_wacks;
>>>>>> @@ -173,6 +216,7 @@ struct qca_serdev {
>>>>>> static int qca_power_setup(struct hci_uart *hu, bool on);
>>>>>> static void qca_power_shutdown(struct hci_uart *hu);
>>>>>> static int qca_power_off(struct hci_dev *hdev);
>>>>>> +static void qca_controller_memdump(struct work_struct *work);
>>>>>> static void __serial_clock_on(struct tty_struct *tty)
>>>>>> {
>>>>>> @@ -446,6 +490,21 @@ static void 
>>>>>> hci_ibs_wake_retrans_timeout(struct timer_list *t)
>>>>>> 		hci_uart_tx_wakeup(hu);
>>>>>> }
>>>>>> +static void hci_memdump_timeout(struct timer_list *t)
>>>>>> +{
>>>>>> +	struct qca_data *qca = from_timer(qca, t, tx_idle_timer);
>>>>>> +	struct hci_uart *hu = qca->hu;
>>>>>> +	struct qca_memdump_data *qca_memdump = qca->qca_memdump;
>>>>>> +	char *memdump_buf = qca_memdump->memdump_buf_tail;
>>>>>> +
>>>>>> +	bt_dev_err(hu->hdev, "clearing allocated memory due to memdump 
>>>>>> timeout");
>>>>>> +	kfree(memdump_buf);
>>>>>> +	kfree(qca_memdump);
>>>>>> +	qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
>>>>>> +	del_timer(&qca->memdump_timer);
>>>>>> +	cancel_work_sync(&qca->ctrl_memdump_evt);
>>>>>> +}
>>>>>> +
>>>>>> /* Initialize protocol */
>>>>>> static int qca_open(struct hci_uart *hu)
>>>>>> {
>>>>>> @@ -461,6 +520,7 @@ static int qca_open(struct hci_uart *hu)
>>>>>> 	skb_queue_head_init(&qca->txq);
>>>>>> 	skb_queue_head_init(&qca->tx_wait_q);
>>>>>> +	skb_queue_head_init(&qca->rx_memdump_q);
>>>>>> 	spin_lock_init(&qca->hci_ibs_lock);
>>>>>> 	qca->workqueue = alloc_ordered_workqueue("qca_wq", 0);
>>>>>> 	if (!qca->workqueue) {
>>>>>> @@ -473,6 +533,7 @@ static int qca_open(struct hci_uart *hu)
>>>>>> 	INIT_WORK(&qca->ws_awake_device, qca_wq_awake_device);
>>>>>> 	INIT_WORK(&qca->ws_rx_vote_off, qca_wq_serial_rx_clock_vote_off);
>>>>>> 	INIT_WORK(&qca->ws_tx_vote_off, qca_wq_serial_tx_clock_vote_off);
>>>>>> +	INIT_WORK(&qca->ctrl_memdump_evt, qca_controller_memdump);
>>>>>> 	qca->hu = hu;
>>>>>> @@ -500,7 +561,7 @@ static int qca_open(struct hci_uart *hu)
>>>>>> 	qca->tx_votes_off = 0;
>>>>>> 	qca->rx_votes_on = 0;
>>>>>> 	qca->rx_votes_off = 0;
>>>>>> -
>>>>>> +	qca->memdump_state = QCA_MEMDUMP_IDLE;
>>>>>> 	hu->priv = qca;
>>>>>> 	if (hu->serdev) {
>>>>>> @@ -528,6 +589,9 @@ static int qca_open(struct hci_uart *hu)
>>>>>> 	timer_setup(&qca->tx_idle_timer, hci_ibs_tx_idle_timeout, 0);
>>>>>> 	qca->tx_idle_delay = IBS_TX_IDLE_TIMEOUT_MS;
>>>>>> +	timer_setup(&qca->memdump_timer, hci_memdump_timeout, 0);
>>>>>> +	qca->memdump_delay = MEMDUMP_COLLECTION_TIMEOUT_MS;
>>>>>> +
>>>>>> 	BT_DBG("HCI_UART_QCA open, tx_idle_delay=%u, wake_retrans=%u",
>>>>>> 	       qca->tx_idle_delay, qca->wake_retrans);
>>>>>> @@ -605,8 +669,10 @@ static int qca_close(struct hci_uart *hu)
>>>>>> 	skb_queue_purge(&qca->tx_wait_q);
>>>>>> 	skb_queue_purge(&qca->txq);
>>>>>> +	skb_queue_purge(&qca->rx_memdump_q);
>>>>>> 	del_timer(&qca->tx_idle_timer);
>>>>>> 	del_timer(&qca->wake_retrans_timer);
>>>>>> +	del_timer(&qca->memdump_timer);
>>>>>> 	destroy_workqueue(qca->workqueue);
>>>>>> 	qca->hu = NULL;
>>>>>> @@ -866,6 +932,141 @@ static int qca_recv_acl_data(struct hci_dev 
>>>>>> *hdev, struct sk_buff *skb)
>>>>>> 	return hci_recv_frame(hdev, skb);
>>>>>> }
>>>>>> +static void qca_controller_memdump(struct work_struct *work)
>>>>>> +{
>>>>>> +	struct qca_data *qca = container_of(work, struct qca_data,
>>>>>> +					    ctrl_memdump_evt);
>>>>>> +	struct hci_uart *hu = qca->hu;
>>>>>> +	struct sk_buff *skb;
>>>>>> +	struct qca_memdump_event_hdr *cmd_hdr;
>>>>>> +	struct qca_memdump_data *qca_memdump = qca->qca_memdump;
>>>>>> +	struct qca_dump_size *dump;
>>>>>> +	char *memdump_buf;
>>>>>> +	char nullBuff[QCA_DUMP_PACKET_SIZE] = { 0 };
>>>>>> +	u16 opcode, seq_no;
>>>>>> +	u32 dump_size;
>>>>>> +
>>>>>> +	while ((skb = skb_dequeue(&qca->rx_memdump_q))) {
>>>>>> +
>>>>>> +		if (!qca_memdump) {
>>>>>> +			qca_memdump = kzalloc(sizeof(struct qca_memdump_data),
>>>>>> +					      GFP_ATOMIC);
>>>>>> +			if (!qca_memdump)
>>>>>> +				return;
>>>>>> +
>>>>>> +			qca->qca_memdump = qca_memdump;
>>>>>> +		}
>>>>>> +
>>>>>> +		qca->memdump_state = QCA_MEMDUMP_COLLECTING;
>>>>>> +		cmd_hdr = (void *) skb->data;
>>>>>> +		opcode = __le16_to_cpu(cmd_hdr->opcode);
>>>>>> +		seq_no = __le16_to_cpu(cmd_hdr->seq_no);
>>>>>> +		skb_pull(skb, sizeof(struct qca_memdump_event_hdr));
>>>>>> +
>>>>>> +		if (!seq_no) {
>>>>>> +
>>>>>> +			/* This is the first frame of memdump packet from
>>>>>> +			 * the controller, Disable IBS to recevie dump
>>>>>> +			 * with out any interruption, ideally time required for
>>>>>> +			 * the controller to send the dump is 8 seconds. let us
>>>>>> +			 * start timer to handle this asynchronous activity.
>>>>>> +			 */
>>>>>> +			clear_bit(STATE_IN_BAND_SLEEP_ENABLED, &qca->flags);
>>>>>> +
>>>>>> +			dump = (void *) skb->data;
>>>>>> +			dump_size = __le32_to_cpu(dump->dump_size);
>>>>>> +			if (!(dump_size)) {
>>>>>> +				bt_dev_err(hu->hdev, "QCA invalid memdump size");
>>>>>> +				kfree_skb(skb);
>>>>>> +				return;
>>>>>> +			}
>>>>>> +
>>>>>> +			bt_dev_info(hu->hdev, "QCA: collecting memdump started"
>>>>>> +				     "of size:%u", dump_size);
>>>>>> +			mod_timer(&qca->memdump_timer, (jiffies +
>>>>>> +				  msecs_to_jiffies(qca->memdump_delay)));
>>>>>> +
>>>>>> +			skb_pull(skb, sizeof(dump_size));
>>>>>> +			memdump_buf = vmalloc(dump_size);
>>>>>> +			qca_memdump->memdump_buf_head = memdump_buf;
>>>>>> +			qca_memdump->memdump_buf_tail = memdump_buf;
>>>>>> +		}
>>>>>> +
>>>>>> +		memdump_buf = qca_memdump->memdump_buf_tail;
>>>>>> +
>>>>>> +		/* If sequence no 0 is missed then there is no point in
>>>>>> +		 * accepting the other sequences.
>>>>>> +		 */
>>>>>> +		if (!memdump_buf) {
>>>>>> +			bt_dev_err(hu->hdev, "QCA: Discarding other packets");
>>>>>> +			kfree(qca_memdump);
>>>>>> +			kfree_skb(skb);
>>>>>> +			qca->qca_memdump = NULL;
>>>>>> +			return;
>>>>>> +		}
>>>>>> +
>>>>>> +		/* There could be chance of missing some packets from
>>>>>> +		 * the controller. In such cases let us store the dummy
>>>>>> +		 * packets in the buffer.
>>>>>> +		 */
>>>>>> +		while ((seq_no > qca_memdump->current_seq_no + 1) &&
>>>>>> +			seq_no != QCA_LAST_SEQUENCE_NUM) {
>>>>>> +			bt_dev_err(hu->hdev, "QCA controller missed packet:%d",
>>>>>> +				   qca_memdump->current_seq_no);
>>>>>> +			memcpy(memdump_buf, nullBuff, QCA_DUMP_PACKET_SIZE);
>>>>>> +			memdump_buf = memdump_buf + QCA_DUMP_PACKET_SIZE;
>>>>>> +			qca_memdump->received_dump += QCA_DUMP_PACKET_SIZE;
>>>>>> +			qca_memdump->current_seq_no++;
>>>>>> +		}
>>>>>> +
>>>>>> +		memcpy(memdump_buf, (unsigned char *) skb->data, skb->len);
>>>>>> +		memdump_buf = memdump_buf + skb->len;
>>>>>> +		qca_memdump->memdump_buf_tail = memdump_buf;
>>>>>> +		qca_memdump->current_seq_no = seq_no + 1;
>>>>>> +		qca_memdump->received_dump += skb->len;
>>>>>> +		qca->qca_memdump = qca_memdump;
>>>>>> +		kfree_skb(skb);
>>>>>> +		if (seq_no == QCA_LAST_SEQUENCE_NUM) {
>>>>>> +			bt_dev_info(hu->hdev, "QCA writing crash dump of size %d 
>>>>>> bytes",
>>>>>> +				   qca_memdump->received_dump);
>>>>>> +			memdump_buf = qca_memdump->memdump_buf_head;
>>>>>> +			dev_coredumpv(&hu->serdev->dev, memdump_buf,
>>>>>> +				      qca_memdump->received_dump, GFP_KERNEL);
>>>>>> +			// Revisit for free(memdump) if needed.
>>>>>> +			del_timer(&qca->memdump_timer);
>>>>>> +			kfree(qca->qca_memdump);
>>>>>> +			qca->qca_memdump = NULL;
>>>>>> +			qca->memdump_state = QCA_MEMDUMP_COLLECTED;
>>>>>> +		}
>>>>>> +	}
>>>>>> +
>>>>>> +}
>>>>>> +
>>>>>> +int qca_controller_memdump_event(struct hci_dev *hdev, struct 
>>>>>> sk_buff *skb)
>>>>>> +{
>>>>>> +	struct hci_uart *hu = hci_get_drvdata(hdev);
>>>>>> +	struct qca_data *qca = hu->priv;
>>>>>> +
>>>>>> +	skb_queue_tail(&qca->rx_memdump_q, skb);
>>>>>> +	queue_work(qca->workqueue, &qca->ctrl_memdump_evt);
>>>>>> +
>>>>>> +	return 0;
>>>>>> +}
>>>>>> +
>>>>>> +static int qca_recv_event_data(struct hci_dev *hdev, struct 
>>>>>> sk_buff *skb)
>>>>>> +{
>>>>>> +	/* We receive chip memory dump as an event packet, With a 
>>>>>> dedicated
>>>>>> +	 * handler followed by a hardware error event. When this event 
>>>>>> is
>>>>>> +	 * received we store dump into a file before closing hci. This
>>>>>> +	 * dump will help in triaging the issues.
>>>>>> +	 */
>>>>>> +	if ((skb->data[0] == HCI_VENDOR_PKT) &&
>>>>>> +	    (get_unaligned_be16((skb->data) + 2) == 
>>>>>> QCA_SSR_DUMP_HANDLE))
>>>>> The (skb->data) is pointless, just do skb->data.
>>>> [Bala]: will update.
>>>>>> +		return qca_controller_memdump_event(hdev, skb);
>>>>>> +
>>>>>> +	return hci_recv_frame(hdev, skb);
>>>>>> +}
>>>>>> +
>>>>>> #define QCA_IBS_SLEEP_IND_EVENT \
>>>>>> 	.type = HCI_IBS_SLEEP_IND, \
>>>>>> 	.hlen = 0, \
>>>>>> @@ -888,12 +1089,12 @@ static int qca_recv_acl_data(struct hci_dev 
>>>>>> *hdev, struct sk_buff *skb)
>>>>>> 	.maxlen = HCI_MAX_IBS_SIZE
>>>>>> static const struct h4_recv_pkt qca_recv_pkts[] = {
>>>>>> -	{ H4_RECV_ACL,             .recv = qca_recv_acl_data },
>>>>>> -	{ H4_RECV_SCO,             .recv = hci_recv_frame    },
>>>>>> -	{ H4_RECV_EVENT,           .recv = hci_recv_frame    },
>>>>>> -	{ QCA_IBS_WAKE_IND_EVENT,  .recv = qca_ibs_wake_ind  },
>>>>>> -	{ QCA_IBS_WAKE_ACK_EVENT,  .recv = qca_ibs_wake_ack  },
>>>>>> -	{ QCA_IBS_SLEEP_IND_EVENT, .recv = qca_ibs_sleep_ind },
>>>>>> +	{ H4_RECV_ACL,             .recv = qca_recv_acl_data	},
>>>>>> +	{ H4_RECV_SCO,             .recv = hci_recv_frame	},
>>>>>> +	{ H4_RECV_EVENT,           .recv = qca_recv_event_data	},
>>>>> call it qca_recv_event and don't realign the whole table.
>>>> [Bala]: will update.
>>>>>> +	{ QCA_IBS_WAKE_IND_EVENT,  .recv = qca_ibs_wake_ind	},
>>>>>> +	{ QCA_IBS_WAKE_ACK_EVENT,  .recv = qca_ibs_wake_ack	},
>>>>>> +	{ QCA_IBS_SLEEP_IND_EVENT, .recv = qca_ibs_sleep_ind	},
>>>>>> };
>>>>>> static int qca_recv(struct hci_uart *hu, const void *data, int 
>>>>>> count)
>>>>>> @@ -1114,6 +1315,68 @@ static int qca_set_speed(struct hci_uart 
>>>>>> *hu, enum qca_speed_type speed_type)
>>>>>> 	return 0;
>>>>>> }
>>>>>> +static int qca_send_crashbuffer(struct hci_uart *hu)
>>>>>> +{
>>>>>> +	struct qca_data *qca = hu->priv;
>>>>>> +	struct sk_buff *skb;
>>>>>> +	unsigned char buf[QCA_CRASHBYTE_PACKET_LEN];
>>>>>> +
>>>>>> +	bt_dev_dbg(hu->hdev, "sending soc crash buffer to controller");
>>>>>> +
>>>>>> +	/* We forcefully crash the controller, by sending 0xfb byte for
>>>>>> +	 * 1024 times. We also might have chance of losing data, To be
>>>>>> +	 * on safer side we send 1100 bytes to the SoC.
>>>>>> +	 */
>>>>>> +	memset(buf, QCA_MEMDUMP_BYTE, QCA_CRASHBYTE_PACKET_LEN);
>>>>>> +
>>>>>> +	skb = bt_skb_alloc(QCA_CRASHBYTE_PACKET_LEN, GFP_KERNEL);
>>>>>> +	if (!skb) {
>>>>>> +		bt_dev_err(hu->hdev, "Failed to allocate memory for skb 
>>>>>> packet");
>>>>>> +		return -ENOMEM;
>>>>>> +	}
>>>>>> +
>>>>>> +	skb_put_data(skb, buf, QCA_CRASHBYTE_PACKET_LEN);
>>>>>> +	hci_skb_pkt_type(skb) = HCI_COMMAND_PKT;
>>>>>> +
>>>>>> +	skb_queue_tail(&qca->txq, skb);
>>>>>> +	hci_uart_tx_wakeup(hu);
>>>>>> +
>>>>>> +	return 0;
>>>>>> +}
>>>>>> +
>>>>>> +static void qca_hw_error(struct hci_dev *hdev, u8 code)
>>>>>> +{
>>>>>> +	struct hci_uart *hu = hci_get_drvdata(hdev);
>>>>>> +	struct qca_data *qca = hu->priv;
>>>>>> +
>>>>>> +	bt_dev_info(hdev, "mem_dump_status: %d", qca->memdump_state);
>>>>>> +
>>>>>> +	if (qca->memdump_state == QCA_MEMDUMP_IDLE) {
>>>>>> +		/* If hardware error event received for other than QCA
>>>>>> +		 * soc memory dump event, then we need to crash the SOC
>>>>>> +		 * and wait here for 8 seconds to get the dump packets.
>>>>>> +		 * This will block main thread to be on hold until we
>>>>>> +		 * collect dump.
>>>>>> +		 */
>>>>>> +		mod_timer(&qca->memdump_timer,
>>>>>> +			  (jiffies + msecs_to_jiffies(qca->memdump_delay)));
>>>>> Why a timer that then ends up in interrupt context?
>>>> [Bala]: some times we only receive hardware error from the chip, so 
>>>> we are sending an special buffer to request the
>>>>       chip for memory dump. we are using an timer to check whether 
>>>> the data memdump is collected. if the timer
>>>>       over flows we are cleaning up all the assigned memory and 
>>>> unblock the main thread,
>>>>       in short this timer is a watch dog timer for the memory dump 
>>>> collection.
>>> I saw that you used a bunch of timers actually. My question is why 
>>> are
>>> these not delayed_wq instead? I wonder if we really want to keep 
>>> using
>>> timers and handling interrupt context locking if we could move
>>> everything to wq instead.
>> 
>> [Bala]: Let me brief you the requirement.
>>       This patch is used to collect the memory dump received from the 
>> chip and store it into a file.
>>       We receive chip memory dump in two cases
>>        1. When chip goes into the bad state it sends the dump followed 
>> by and hardware error event.
>>        2. Some time when we send some garbage or un recognized command 
>> to chip.
>>           it will respond with a hardware error event. So we send an 
>> 0xfb bytes to the chip requesting it to send the memory dump.
>> 
>> In both cases, chip will go into the bad state once it sends the dump. 
>> so we need to turn OFF and ON chip and perform the init procedure.
>> So keeping the above cases in the view, we register an handle for 
>> hardware error event. So that in both the cases hw error routine will 
>> execute.
>> 
>> As the dump as some specific headers we will queue the received data 
>> into work queue.
>> We are blocking the main thread until the total dump is stored into 
>> the file.
>> Here is the answer for the question, yes we check the status of the 
>> work queue but for handling case 2 we require timers.
>> But let us assume the wq is taking more time than expected. Then we 
>> also need a timer.
>> we are using timer so that we will not block the main thread 
>> indefinitely.
>> They are some cases where when we request a memory dump, after some 
>> packet sent by the chip.
>> it may go into a bad state where it will not pump the remaining dump, 
>> so we are using a timer to unblock the main thread.
>> so during timeout, we are restarting the flags to unblock the main 
>> thread.
>> 
>> 
>>>>>> +		qca_send_crashbuffer(hu);
>>>>>> +
>>>>>> +		while (qca->memdump_state != QCA_MEMDUMP_COLLECTED &&
>>>>>> +			qca->memdump_state != QCA_MEMDUMP_TIMEOUT)
>>>>>> +			;
>>>>> What is this? Busy loop? I rather not do that.
>>>> [Bala]: this busy required to collected the dump from the chip 
>>>> before we close an reopen the hci.
>>>>       >> +	} else if (qca->memdump_state == QCA_MEMDUMP_COLLECTING) 
>>>> {
>>> Why not use some wakeup handling here. I am really against any kind 
>>> of
>>> busy loops in a driver.
>> [Bala]: this busy loop is require. pls refer my comments to the above 
>> query.
>>        this will help in mainly in case 2, where we will block the 
>> main thread until we have dump collected.
>> 
>>        let us assume that we received an hardware error event, hw 
>> error routine we execute which sends an
>>        special byte to the chip requesting for memory dump. it starts 
>> sending the dump, but we move forward and turn off the
>>        chip so we will miss the dump. so here the busy loop is to wait 
>> until we have dump.
>>        but the we are not waiting indefinitely we have timer to 
>> unblock them.
> 
> I do not see how any of this is correct. Can you not use some wakeup
> helper like everybody else does. I also do not see how these two
> threads are correctly protected. It looks really hackish to me.
> 
> Regards
> 
> Marcel

Ok sure, will check and update accordingly in the incremental patch.

-- 
Regards
Balakrishna.

      reply	other threads:[~2019-01-21 14:40 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-28 13:04 [RFC,v2] Bluetooth: hci_qca: Collect controller memory dump during SSR Balakrishna Godavarthi
2018-12-29  7:27 ` Marcel Holtmann
2019-01-04  6:18   ` Balakrishna Godavarthi
2019-01-04  9:11     ` Marcel Holtmann
2019-01-16 14:01       ` Balakrishna Godavarthi
2019-01-18  8:54         ` Marcel Holtmann
2019-01-21 14:39           ` Balakrishna Godavarthi [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=fa15134608944498d9f5bc7950b98cde@codeaurora.org \
    --to=bgodavar@codeaurora.org \
    --cc=hemantg@codeaurora.org \
    --cc=johan.hedberg@gmail.com \
    --cc=linux-arm-msm@vger.kernel.org \
    --cc=linux-bluetooth@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=marcel@holtmann.org \
    --cc=mka@chromium.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.