public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: "Rodolfo Guluarte Hale" <rodolfo@host-hispano.net>
To: <linux-kernel@vger.kernel.org>
Subject: Re: dentry bloat.
Date: Sun, 9 May 2004 01:36:14 -0600	[thread overview]
Message-ID: <000c01c43598$50720f90$6700a8c0@Portatil> (raw)
In-Reply-To: Pine.LNX.4.58.0405082143340.1592@ppc970.osdl.org


----- Original Message ----- 
From: "Linus Torvalds" <torvalds@osdl.org>
To: "Andrew Morton" <akpm@osdl.org>
Cc: <dipankar@in.ibm.com>; <manfred@colorfullife.com>; <davej@redhat.com>;
<wli@holomorphy.com>; "Kernel Mailing List" <linux-kernel@vger.kernel.org>;
<maneesh@in.ibm.com>
Sent: Saturday, May 08, 2004 11:14 PM
Subject: Re: dentry bloat.


>
>
> On Sat, 8 May 2004, Andrew Morton wrote:
> >
> > erk.  OK.  Things are (much) worse than I thought.  The 24 byte limit
means
> > that 20% of my names will be externally allocated, but that's no worse
than
> > what we had before.
>
> In fact, it's better than what we had before at least on 64-bit
> archtiectures.
>
> But I'd be happy to make the DNAME_INLINE_LEN_MIN #define larger - I just
> think we should try to shrink the internal structure fields first.
>
> Btw, at least for the kernel sources, my statistics say that filename
> distribution (in a built tree, and with BK) is
>
>    1:     5.04 % (    5.04 % cum -- 2246)
>    2:     5.19 % (   10.23 % cum -- 2312)
>    3:     0.55 % (   10.79 % cum -- 247)
>    4:     3.30 % (   14.08 % cum -- 1469)
>    5:     3.35 % (   17.43 % cum -- 1492)
>    6:     4.35 % (   21.79 % cum -- 1940)
>    7:     7.55 % (   29.34 % cum -- 3365)
>    8:     9.64 % (   38.98 % cum -- 4293)
>    9:     9.17 % (   48.15 % cum -- 4084)
>   10:    10.98 % (   59.12 % cum -- 4891)
>   11:     7.65 % (   66.77 % cum -- 3406)
>   12:     7.01 % (   73.78 % cum -- 3122)
>   13:     5.16 % (   78.94 % cum -- 2298)
>   14:     3.83 % (   82.77 % cum -- 1706)
>   15:     3.47 % (   86.24 % cum -- 1545)
>   16:     2.11 % (   88.34 % cum -- 939)
>   17:     1.47 % (   89.81 % cum -- 655)
>   18:     1.06 % (   90.87 % cum -- 472)
>   19:     0.68 % (   91.55 % cum -- 303)
>   20:     0.42 % (   91.97 % cum -- 188)
>   21:     0.29 % (   92.26 % cum -- 128)
>   22:     0.24 % (   92.50 % cum -- 107)
>   23:     0.14 % (   92.64 % cum -- 63)
>
> ie we've reached 92% of all names with 24-byte inline thing.
>
> For my whole disk, I have similar stats:
>
>    1:     6.59 % (    6.59 % cum -- 71690)
>    2:     6.86 % (   13.45 % cum -- 74611)
>    3:     1.59 % (   15.04 % cum -- 17292)
>    4:     3.77 % (   18.81 % cum -- 40992)
>    5:     3.11 % (   21.92 % cum -- 33884)
>    6:     4.13 % (   26.05 % cum -- 44898)
>    7:     6.97 % (   33.01 % cum -- 75774)
>    8:     8.13 % (   41.15 % cum -- 88451)
>    9:     7.81 % (   48.96 % cum -- 84987)
>   10:     9.56 % (   58.52 % cum -- 104021)
>   11:     7.67 % (   66.19 % cum -- 83403)
>   12:     8.07 % (   74.26 % cum -- 87826)
>   13:     4.38 % (   78.65 % cum -- 47690)
>   14:     3.36 % (   82.01 % cum -- 36592)
>   15:     2.71 % (   84.71 % cum -- 29431)
>   16:     1.78 % (   86.49 % cum -- 19311)
>   17:     1.35 % (   87.84 % cum -- 14703)
>   18:     1.05 % (   88.89 % cum -- 11410)
>   19:     0.82 % (   89.71 % cum -- 8952)
>   20:     0.77 % (   90.49 % cum -- 8423)
>   21:     0.85 % (   91.34 % cum -- 9264)
>   22:     0.72 % (   92.06 % cum -- 7798)
>   23:     0.69 % (   92.75 % cum -- 7534)
>
> so it appears that I'm either a sad case with a lot of source code on my
> disk, or you have overlong filenames that brings up your stats.
>
> Or my program is broken. Entirely possible.
>
> Whee. 149 characters is my winning entry:
>
>
/usr/share/doc/HTML/en/kdelibs-3.1-apidocs/kdecore/html/classKGenericFactory
_3_01KTypeList_3_01Product_00_01ProductListTail_01_4_00_01KTypeList_3_01Pare
ntType_00_01ParentTypeListTail_01_4_01_4-members.html
>
> That's obscene.
>
> Linus
>
> -----
> /*
>  * (C) Copyright 2003 Linus Torvalds
>  *
>  * "bkr" - recusrive "bk" invocations aka "bk -r"
>  */
>
> #include <stdio.h>
> #include <stdlib.h>
> #include <sys/types.h>
> #include <sys/stat.h>
> #include <sys/param.h>
> #include <fcntl.h>
> #include <dirent.h>
> #include <string.h>
> #include <regex.h>
>
> /*
>  * Very generic directory tree handling.
>  */
> static int bkr(const char *path, int pathlen,
> void (*regcallback)(const char *path, int pathlen, const char *name, int
namelen),
> void (*dircallback)(const char *path, int pathlen, const char *name, int
namelen))
> {
> struct dirent *de;
> char fullname[MAXPATHLEN + 1];
> char *ptr = fullname + pathlen;
> DIR *base = opendir(path);
>
> if (!base)
> return 0;
> memcpy(fullname, path, pathlen);
>
> while ((de = readdir(base)) != NULL) {
> int len;
>
> len = strlen(de->d_name);
> memcpy(ptr, de->d_name, len+1);
>
> if (dircallback) {
> switch (de->d_type) {
> struct stat st;
> case DT_UNKNOWN:
> if (stat(fullname, &st))
> break;
> if (!S_ISDIR(st.st_mode))
> break;
> case DT_DIR:
> if (de->d_name[0] == '.') {
> if (len == 1)
> break;
> if (de->d_name[1] == '.' && len == 2)
> break;
> }
> ptr[len] = '/';
> ptr[len+1] = '\0';
> dircallback(fullname, pathlen + len + 1, de->d_name, len);
> continue;
> }
> }
> regcallback(fullname, pathlen + len, de->d_name, len);
> }
> closedir(base);
> return 0;
> }
>
> static int total;
> static int len[256];
>
> static void file(const char *path, int pathlen, const char *name, int
namelen)
> {
> total++;
> len[namelen]++;
> }
>
> static void dir(const char *path, int pathlen, const char *name, int
namelen)
> {
> file(path, pathlen, name, namelen);
> bkr(path, pathlen, file, dir);
> }
>
>
> int main(int argc, char **argv)
> {
> int i;
> double sum = 0.0;
>
> bkr(".", 0, file, dir);
> for (i = 0; i < 256; i++) {
> int nr = len[i];
> if (nr) {
> double this = (double) nr * 100.0 / total;
> sum += this;
> printf("%4i: %8.2f %% (%8.2f %% cum -- %d)\n", i, this, sum, nr);
> }
> }
> }
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>
>

http://www.ocioyocio.com/



  reply	other threads:[~2004-05-09  7:36 UTC|newest]

Thread overview: 62+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20040506200027.GC26679@redhat.com>
     [not found] ` <20040506150944.126bb409.akpm@osdl.org>
     [not found]   ` <409B1511.6010500@colorfullife.com>
2004-05-08  8:23     ` dentry bloat Andrew Morton
2004-05-08  9:23       ` Andrew Morton
2004-05-08 10:11         ` Andrew Morton
2004-05-08 10:12           ` Andrew Morton
2004-05-08 10:28           ` viro
2004-05-08 10:41             ` Andrew Morton
2004-05-08 10:52             ` Andrew Morton
2004-05-08 10:31           ` Manfred Spraul
2004-05-08 17:28           ` Linus Torvalds
2004-05-08 18:19             ` David S. Miller
2004-05-08 19:01             ` Andrew Morton
2004-05-08 19:13               ` Linus Torvalds
2004-05-08 19:27                 ` Andrew Morton
2004-05-08 19:27                 ` Linus Torvalds
2004-05-08 20:42                   ` Dipankar Sarma
2004-05-08 20:55                     ` Andrew Morton
2004-05-08 21:19                       ` Dipankar Sarma
2004-05-09  0:10                         ` Andrew Morton
2004-05-09  2:55                           ` Linus Torvalds
2004-05-09  3:12                             ` David S. Miller
2004-05-09  3:53                               ` Linus Torvalds
2004-05-09 21:03                                 ` Matt Mackall
2004-05-10  8:27                                   ` Helge Hafting
2004-05-10  8:32                                     ` Arjan van de Ven
2004-05-10  9:46                                       ` Andrew Morton
2004-05-10 14:54                                         ` Matt Mackall
2004-05-10 16:26                                           ` Paul E. McKenney
2004-05-10 18:34                                             ` Dipankar Sarma
2004-05-09  4:12                             ` Andrew Morton
2004-05-09  4:25                               ` Linus Torvalds
2004-05-09  4:36                                 ` Andrew Morton
2004-05-09  5:14                                   ` Linus Torvalds
2004-05-09  7:36                                     ` Rodolfo Guluarte Hale [this message]
2004-05-09  9:10                                     ` Guennadi Liakhovetski
2004-05-09  9:23                                       ` viro
2004-05-09 15:35                                       ` Linus Torvalds
2004-05-09 18:11                                         ` Matt Mackall
2004-05-09 22:08                                           ` Francois Romieu
2004-05-09 23:51                                           ` Paul Jackson
2004-05-10  7:17                                         ` Florian Weimer
2004-05-10 14:12                                         ` Rik van Riel
2004-05-09  4:43                                 ` Linus Torvalds
2004-05-09  7:28                     ` Manfred Spraul
2004-05-09 15:33                       ` Dipankar Sarma
2004-05-09 22:17                         ` viro
2004-05-09 22:27                           ` Andrew Morton
2004-05-11  5:26                             ` Maneesh Soni
2004-05-10 18:39                           ` Dipankar Sarma
2004-05-11  5:17                             ` Maneesh Soni
2004-05-08 20:13                 ` Dipankar Sarma
2004-10-06 12:58                   ` Maneesh Soni
2004-05-11 20:22                     ` Andrew Morton
2004-05-14 10:33                       ` Raghavan
2004-05-14 10:50                         ` Paul Jackson
2004-05-14 11:04                           ` Jens Axboe
2004-05-14 11:14                             ` Paul Jackson
2004-05-14 11:24                               ` Jens Axboe
2004-05-14 11:30                                 ` Paul Jackson
2004-05-14 11:24                               ` Dipankar Sarma
2004-05-14 11:18                         ` Dipankar Sarma
2004-05-14 14:44                           ` Linus Torvalds
2004-05-08 21:00               ` Dipankar Sarma

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='000c01c43598$50720f90$6700a8c0@Portatil' \
    --to=rodolfo@host-hispano.net \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox