From mboxrd@z Thu Jan 1 00:00:00 1970 From: Tomas Bortoli Subject: Re: [PATCH] net/9p/trans_fd.c: fix double list_del() and race in access Date: Mon, 23 Jul 2018 13:46:42 +0200 Message-ID: <8b2c1367-9a3b-3cd6-d141-870eb2f4f0ed@gmail.com> References: <20180720132801.22749-1-tomasbortoli@gmail.com> <20180723030251.GB24608@nautica> Mime-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Cc: ericvh@gmail.com, rminnich@sandia.gov, lucho@ionkov.net, jiangyiwen@huawei.com, davem@davemloft.net, v9fs-developer@lists.sourceforge.net, netdev@vger.kernel.org, linux-kernel@vger.kernel.org, syzkaller@googlegroups.com To: Dominique Martinet Return-path: In-Reply-To: <20180723030251.GB24608@nautica> Content-Language: en-US Sender: linux-kernel-owner@vger.kernel.org List-Id: netdev.vger.kernel.org On 07/23/2018 05:02 AM, Dominique Martinet wrote: > Tomas Bortoli wrote on Fri, Jul 20, 2018: >> This patch uses list_del_init() instead of list_del() to eliminate >> "req_list". This to prevent double list_del()'s calls to the same list >> from provoking a GPF. Furthermore, this patch fixes an access to >> "req_list" that was made without getting the relative lock. > > Please see comment about locking. > > As for list_del to list_del_init, it feels a little wrong to me, but I > don't have a better idea so let's go with that. Yes, it's not the best solution. > Do you know what happened to trigger this? one thread running > p9_conn_cancel then the other thread doing p9_fd_cancel ? > I don't see how races should be prevented. The bug is triggered in p9_fd_cancel and in this case it's due to the status of the request being REQ_STATUS_UNSENT but list_del(&req->req_list) is used 4 times in trans_fd.c: - p9_read_work() with the lock but updating the status afterwards (brings to race) - p9_conn_cancel() without the lock and updating the status afterwards (brings to race) - p9_fd_cancelled() .. ? -p9_fd_cancel() with lock, run on conditional status BOOM So, maybe we can try to see if it's the problem of syncing the status between different threads or if it's more but idk. >> Signed-off-by: Tomas Bortoli >> Reported-by: syzbot+735d926e9d1317c3310c@syzkaller.appspotmail.com >> --- >> >> net/9p/trans_fd.c | 10 ++++++---- >> 1 file changed, 6 insertions(+), 4 deletions(-) >> >> diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c >> index a64b01c56e30..131bb1f059e6 100644 >> --- a/net/9p/trans_fd.c >> +++ b/net/9p/trans_fd.c >> @@ -223,7 +223,9 @@ static void p9_conn_cancel(struct p9_conn *m, int err) >> >> list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { >> p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req); >> - list_del(&req->req_list); >> + spin_lock_irqsave(&m->client->lock, flags); >> + list_del_init(&req->req_list); >> + spin_unlock_irqrestore(&m->client->lock, flags); > > Just locking around one item if you're afraid it might change won't be > enough - list_for_each_entry_safe is only "safe" from removing the > current element from the list yourself, not from other threads messing > with it, so you'd need to lock around the whole loop if that's what > you're protecting against. > Right, I thought I had to unlock before p9_client_cb() as here: https://github.com/torvalds/linux/blob/master/net/9p/trans_fd.c#L375 However, also locking the client mutex for the whole loop doesn't seem to give problems. See patch below > (Also, since I've taken the other patchs to change spin locks on > client->lock to spin_lock instead of spin_lock_irqsave, please use that > function for new locking of that variable - in general just basing your > patchs off linux-next's master branch is a good idea.) > >> if (!req->t_err) >> req->t_err = err; >> p9_client_cb(m->client, req, REQ_STATUS_ERROR); >> @@ -369,7 +371,7 @@ static void p9_read_work(struct work_struct *work) >> spin_lock(&m->client->lock); >> if (m->req->status != REQ_STATUS_ERROR) >> status = REQ_STATUS_RCVD; >> - list_del(&m->req->req_list); >> + list_del_init(&m->req->req_list); >> spin_unlock(&m->client->lock); >> p9_client_cb(m->client, m->req, status); >> m->rc.sdata = NULL; >> @@ -684,7 +686,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) >> spin_lock(&client->lock); >> >> if (req->status == REQ_STATUS_UNSENT) { >> - list_del(&req->req_list); >> + list_del_init(&req->req_list); >> req->status = REQ_STATUS_FLSHD; >> ret = 0; >> } >> @@ -701,7 +703,7 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req) >> * remove it from the list. >> */ >> spin_lock(&client->lock); >> - list_del(&req->req_list); >> + list_del_init(&req->req_list); >> spin_unlock(&client->lock); >> >> return 0; diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index a64b01c56e30..2ae5f03d872f 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -199,15 +199,14 @@ static void p9_mux_poll_stop(struct p9_conn *m) static void p9_conn_cancel(struct p9_conn *m, int err) { struct p9_req_t *req, *rtmp; - unsigned long flags; LIST_HEAD(cancel_list); p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); - spin_lock_irqsave(&m->client->lock, flags); + spin_lock(&m->client->lock); if (m->err) { - spin_unlock_irqrestore(&m->client->lock, flags); + spin_unlock(&m->client->lock); return; } @@ -223,11 +222,12 @@ static void p9_conn_cancel(struct p9_conn *m, int err) list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req); - list_del(&req->req_list); + list_del_init(&req->req_list); if (!req->t_err) req->t_err = err; p9_client_cb(m->client, req, REQ_STATUS_ERROR); } + spin_unlock(&m->client->lock); } static __poll_t @@ -369,7 +369,7 @@ static void p9_read_work(struct work_struct *work) spin_lock(&m->client->lock); if (m->req->status != REQ_STATUS_ERROR) status = REQ_STATUS_RCVD; - list_del(&m->req->req_list); + list_del_init(&m->req->req_list); spin_unlock(&m->client->lock); p9_client_cb(m->client, m->req, status); m->rc.sdata = NULL; @@ -684,7 +684,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) spin_lock(&client->lock); if (req->status == REQ_STATUS_UNSENT) { - list_del(&req->req_list); + list_del_init(&req->req_list); req->status = REQ_STATUS_FLSHD; ret = 0; } @@ -701,7 +701,7 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req) * remove it from the list. */ spin_lock(&client->lock); - list_del(&req->req_list); + list_del_init(&req->req_list); spin_unlock(&client->lock); return 0;