From: "J. Bruce Fields" <bfields@fieldses.org>
To: Jeff Layton <jlayton@redhat.com>
Cc: linux-nfs@vger.kernel.org
Subject: Re: [PATCH v2 8/8] nfsd: keep a checksum of the first 256 bytes of request
Date: Mon, 4 Feb 2013 10:54:20 -0500 [thread overview]
Message-ID: <20130204155420.GB815@fieldses.org> (raw)
In-Reply-To: <1359983887-28535-9-git-send-email-jlayton@redhat.com>
On Mon, Feb 04, 2013 at 08:18:07AM -0500, Jeff Layton wrote:
> Now that we're allowing more DRC entries, it becomes a lot easier to hit
> problems with XID collisions. In order to mitigate those, calculate the
> crc32 of up to the first 256 bytes of each request coming in and store
> that in the cache entry, along with the total length of the request.
>
> Signed-off-by: Jeff Layton <jlayton@redhat.com>
> ---
> fs/nfsd/cache.h | 5 +++++
> fs/nfsd/nfscache.c | 44 ++++++++++++++++++++++++++++++++++++++++----
> 2 files changed, 45 insertions(+), 4 deletions(-)
>
> diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
> index 9c7232b..4822db3 100644
> --- a/fs/nfsd/cache.h
> +++ b/fs/nfsd/cache.h
> @@ -29,6 +29,8 @@ struct svc_cacherep {
> u32 c_prot;
> u32 c_proc;
> u32 c_vers;
> + unsigned int c_len;
> + u32 c_crc;
> unsigned long c_timestamp;
> union {
> struct kvec u_vec;
> @@ -73,6 +75,9 @@ enum {
> /* Cache entries expire after this time period */
> #define RC_EXPIRE (120 * HZ)
>
> +/* Checksum this amount of the request */
> +#define RC_CSUMLEN (256U)
> +
> int nfsd_reply_cache_init(void);
> void nfsd_reply_cache_shutdown(void);
> int nfsd_cache_lookup(struct svc_rqst *);
> diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
> index d16a5d6..cb655f3 100644
> --- a/fs/nfsd/nfscache.c
> +++ b/fs/nfsd/nfscache.c
> @@ -11,6 +11,7 @@
> #include <linux/slab.h>
> #include <linux/sunrpc/clnt.h>
> #include <linux/highmem.h>
> +#include <linux/crc32.h>
>
> #include "nfsd.h"
> #include "cache.h"
> @@ -24,6 +25,7 @@ static struct list_head lru_head;
> static struct kmem_cache *drc_slab;
> static unsigned int num_drc_entries;
> static unsigned int max_drc_entries;
> +static u32 crc_seed;
>
> /*
> * Calculate the hash index from an XID.
> @@ -130,6 +132,9 @@ int nfsd_reply_cache_init(void)
> INIT_LIST_HEAD(&lru_head);
> max_drc_entries = nfsd_cache_size_limit();
> num_drc_entries = 0;
> +
> + /* Is a random seed any better than some well-defined constant? */
> + get_random_bytes(&crc_seed, sizeof(crc_seed));
> return 0;
> out_nomem:
> printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
> @@ -238,12 +243,37 @@ nfsd_reply_cache_shrink(struct shrinker *shrink, struct shrink_control *sc)
> }
>
> /*
> + * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
> + */
> +static u32
> +nfsd_cache_crc(struct xdr_buf *buf)
> +{
> + u32 crc;
> + const unsigned char *p = buf->head[0].iov_base;
> + size_t csum_len = min_t(size_t, buf->head[0].iov_len + buf->page_len,
> + RC_CSUMLEN);
> + size_t len = min(buf->head[0].iov_len, csum_len);
> +
> + /* rq_arg.head first */
> + crc = crc32(crc_seed, p, len);
> + csum_len -= len;
> +
> + /* Nothing left */
> + if (!csum_len)
> + return crc;
> +
> + /* checksum the rest from the page_array */
> + p = page_address(buf->pages[0]) + buf->page_base;
If buf->page_base is large (close to PAGE_SIZE), then reads past the end
of the page when it should be continuing to the next page.
In practice page_base is always 0 here, and I think it's unlikely that
will change. But it would be worth a comment. (Or maybe even a
WARN_ON_ONCE(buf->page_base).)
> + return crc32(crc, p, csum_len);
> +}
> +
> +/*
> * Search the request hash for an entry that matches the given rqstp.
> * Must be called with cache_lock held. Returns the found entry or
> * NULL on failure.
> */
> static struct svc_cacherep *
> -nfsd_cache_search(struct svc_rqst *rqstp)
> +nfsd_cache_search(struct svc_rqst *rqstp, u32 crc)
> {
> struct svc_cacherep *rp;
> struct hlist_node *hn;
> @@ -257,6 +287,7 @@ nfsd_cache_search(struct svc_rqst *rqstp)
> hlist_for_each_entry(rp, hn, rh, c_hash) {
> if (xid == rp->c_xid && proc == rp->c_proc &&
> proto == rp->c_prot && vers == rp->c_vers &&
> + rqstp->rq_arg.len == rp->c_len && crc == rp->c_crc &&
> rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) &&
> rpc_get_port(svc_addr(rqstp)) == rpc_get_port((struct sockaddr *)&rp->c_addr))
> return rp;
> @@ -276,7 +307,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
> __be32 xid = rqstp->rq_xid;
> u32 proto = rqstp->rq_prot,
> vers = rqstp->rq_vers,
> - proc = rqstp->rq_proc;
> + proc = rqstp->rq_proc,
> + crc;
> unsigned long age;
> int type = rqstp->rq_cachetype;
> int rtn;
> @@ -287,10 +319,12 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
> return RC_DOIT;
> }
>
> + crc = nfsd_cache_crc(&rqstp->rq_arg);
> +
For a moment I was wondering whether we should delay calculating that
till we need it--but of course we need it in all cases but allocation
failure (either to match an existing entry or populate a new one). OK!
Looks fine.--b.
> spin_lock(&cache_lock);
> rtn = RC_DOIT;
>
> - rp = nfsd_cache_search(rqstp);
> + rp = nfsd_cache_search(rqstp, crc);
> if (rp)
> goto found_entry;
>
> @@ -318,7 +352,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
> * Must search again just in case someone inserted one
> * after we dropped the lock above.
> */
> - found = nfsd_cache_search(rqstp);
> + found = nfsd_cache_search(rqstp, crc);
> if (found) {
> nfsd_reply_cache_free_locked(rp);
> rp = found;
> @@ -344,6 +378,8 @@ setup_entry:
> rpc_set_port((struct sockaddr *)&rp->c_addr, rpc_get_port(svc_addr(rqstp)));
> rp->c_prot = proto;
> rp->c_vers = vers;
> + rp->c_len = rqstp->rq_arg.len;
> + rp->c_crc = crc;
>
> hash_refile(rp);
> lru_put_end(rp);
> --
> 1.7.11.7
>
next prev parent reply other threads:[~2013-02-04 15:54 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-02-04 13:17 [PATCH v2 0/8] nfsd: duplicate reply cache overhaul Jeff Layton
2013-02-04 13:18 ` [PATCH v2 1/8] nfsd: always move DRC entries to the end of LRU list when updating timestamp Jeff Layton
2013-02-04 13:18 ` [PATCH v2 2/8] nfsd: track the number of DRC entries in the cache Jeff Layton
2013-02-04 13:18 ` [PATCH v2 3/8] nfsd: dynamically allocate DRC entries Jeff Layton
2013-02-04 13:18 ` [PATCH v2 4/8] nfsd: remove the cache_disabled flag Jeff Layton
2013-02-04 13:18 ` [PATCH v2 5/8] nfsd: when updating an entry with RC_NOCACHE, just free it Jeff Layton
2013-02-04 13:18 ` [PATCH v2 6/8] nfsd: add recurring workqueue job to clean the cache Jeff Layton
2013-02-04 13:18 ` [PATCH v2 7/8] nfsd: register a shrinker for DRC cache entries Jeff Layton
2013-02-04 13:18 ` [PATCH v2 8/8] nfsd: keep a checksum of the first 256 bytes of request Jeff Layton
2013-02-04 15:54 ` J. Bruce Fields [this message]
2013-02-04 16:16 ` Jeff Layton
2013-02-04 20:20 ` J. Bruce Fields
2013-02-05 14:55 ` J. Bruce Fields
2013-02-05 15:51 ` Jeff Layton
2013-02-04 15:56 ` [PATCH v2 0/8] nfsd: duplicate reply cache overhaul J. Bruce Fields
2013-02-04 18:07 ` [PATCH 9/8] nfsd: handle arbitrary page array layouts in nfsd_cache_crc Jeff Layton
2013-02-04 18:18 ` J. Bruce Fields
2013-02-05 15:15 ` [PATCH v2 0/8] nfsd: duplicate reply cache overhaul J. Bruce Fields
2013-02-05 15:58 ` Jeff Layton
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130204155420.GB815@fieldses.org \
--to=bfields@fieldses.org \
--cc=jlayton@redhat.com \
--cc=linux-nfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox