From mboxrd@z Thu Jan 1 00:00:00 1970 From: Thomas Gleixner Subject: Re: [PATCH 1/2] LogFS proper Date: Tue, 08 May 2007 09:22:30 +0200 Message-ID: <1178608950.3042.273.camel@localhost.localdomain> References: <20070507215913.GA15054@lazybastard.org> <20070507220036.GB15054@lazybastard.org> Reply-To: tglx@linutronix.de Mime-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: QUOTED-PRINTABLE Cc: Andrew Morton , linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, Dave Kleikamp , David Chinner To: =?ISO-8859-1?Q?J=F6rn?= Engel Return-path: In-Reply-To: <20070507220036.GB15054@lazybastard.org> Sender: linux-kernel-owner@vger.kernel.org List-Id: linux-fsdevel.vger.kernel.org On Tue, 2007-05-08 at 00:00 +0200, J=C3=B6rn Engel wrote: > The filesystem itself. Very descriptive log entry. > +config LOGFS > + tristate "Log Filesystem (EXPERIMENTAL)" > + depends on EXPERIMENTAL > + select ZLIB_INFLATE > + select ZLIB_DEFLATE > + help > + Successor of JFFS2, using explicit filesystem hierarchy. Why is it a successor ? Does it build upon JFFS2 ? > + Continuing with the long tradition of calling the filesystem > + exactly what it is not, LogFS is a journaled filesystem, > + while JFFS and JFFS2 were true log-structured filesystems. > + The hybrid structure of journaled filesystems promise to > + scale better to larger sized. > + > + If unsure, say N. =2E.. > @@ -0,0 +1,14 @@ > +obj-$(CONFIG_LOGFS) +=3D logfs.o > + > +logfs-y +=3D compr.o > +logfs-y +=3D dir.o > +logfs-y +=3D file.o > +logfs-y +=3D gc.o > +logfs-y +=3D inode.o > +logfs-y +=3D journal.o > +logfs-y +=3D memtree.o > +logfs-y +=3D readwrite.o > +logfs-y +=3D segment.o > +logfs-y +=3D super.o > +logfs-y +=3D progs/fsck.o > +logfs-y +=3D progs/mkfs.o Please use either tabs or spaces. Preferrably tabs > --- /dev/null 2007-04-18 05:32:26.652341749 +0200 > +++ linux-2.6.21logfs/fs/logfs/logfs.h 2007-05-07 13:32:12.000000000 = +0200 > @@ -0,0 +1,626 @@ > +#ifndef logfs_h > +#define logfs_h > + > +#define __CHECK_ENDIAN__ > + > + > +#include > +#include > +#include > +#include > +#include > +#include > +#include Please sort includes alphabetically and seperate the=20 #include from the #include ones > +typedef __be16 be16; > +typedef __be32 be32; > +typedef __be64 be64; Why are those typedefs necessary ? > +struct btree_head { > + struct btree_node *node; > + int height; > + void *null_ptr; > +}; Please document structures > +#define packed __attribute__((__packed__)) Please use the __attribute__((__packed__)) on your structs instead of creating some extra "needs lookup" magic. > + > +#define TRACE() do { \ > + printk("trace: %s:%d: ", __FILE__, __LINE__); \ > + printk("->%s\n", __func__); \ > +} while(0) Oh no. Not again another "I'm in function X tracer".=20 > + > +#define LOGFS_MAGIC 0xb21f205ac97e8168ull > +#define LOGFS_MAGIC_U32 0xc97e8168ull why is an U32 constant ull ? > +#define LOGFS_BLOCK_SECTORS (8) > +#define LOGFS_BLOCK_BITS (9) /* 512 pointers, used for shifts */ > +#define LOGFS_BLOCKSIZE (4096ull) > +#define LOGFS_BLOCK_FACTOR (LOGFS_BLOCKSIZE / sizeof(u64)) > +#define LOGFS_BLOCK_MASK (LOGFS_BLOCK_FACTOR-1) for the whole defines: Please align them so it does not look like a jigsaw puzzle. Please avoid tail comments as it makes it harder to parse > +#define I0_BLOCKS (4+16) > +#define I1_BLOCKS LOGFS_BLOCK_FACTOR > +#define I2_BLOCKS (LOGFS_BLOCK_FACTOR * I1_BLOCKS) > +#define I3_BLOCKS (LOGFS_BLOCK_FACTOR * I2_BLOCKS) > +#define I4_BLOCKS (LOGFS_BLOCK_FACTOR * I3_BLOCKS) > +#define I5_BLOCKS (LOGFS_BLOCK_FACTOR * I4_BLOCKS) Some explanation for that magic math might be helpful > +#define I1_INDEX (4+16) same constant as IO_BLOCKS. coincidence ? > +#define I2_INDEX (5+16) > +#define I3_INDEX (6+16) > +#define I4_INDEX (7+16) > +#define I5_INDEX (8+16) #define I2_INDEX (I1_INDEX + 1) =2E... > +struct logfs_disk_super { > + be64 ds_magic; > + be32 ds_crc; /* crc32 of everything below */ > + u8 ds_ifile_levels; /* max level of ifile */ > + u8 ds_iblock_levels; /* max level of regular files */ > + u8 ds_data_levels; /* number of segments to leaf blocks */ > + u8 pad0; > + > + be64 ds_feature_incompat; > + be64 ds_feature_ro_compat; > + > + be64 ds_feature_compat; > + be64 ds_flags; > + > + be64 ds_filesystem_size; /* filesystem size in bytes */ > + u8 ds_segment_shift; /* log2 of segment size */ > + u8 ds_block_shift; /* log2 if block size */ > + u8 ds_write_shift; /* log2 of write size */ > + u8 pad1[5]; > + > + /* the segments of the primary journal. if fewer than 4 segments a= re > + * used, some fields are set to 0 */ > +#define LOGFS_JOURNAL_SEGS 4 Please avoid defines inside of structures > + be64 ds_journal_seg[LOGFS_JOURNAL_SEGS]; > + > + be64 ds_root_reserve; /* bytes reserved for root */ > + > + be64 pad2[19]; /* align to 256 bytes */ > +}packed; Please comment the structure with kernel doc comments and avoid the tai= l comments. > + > +#define LOGFS_IF_VALID 0x00000001 /* inode exists */ > +#define LOGFS_IF_EMBEDDED 0x00000002 /* data embedded in block point= ers */ > +#define LOGFS_IF_ZOMBIE 0x00000004 /* inode was already deleted */ > +#define LOGFS_IF_STILLBORN 0x40000000 /* couldn't write inode in cre= at() */ > +#define LOGFS_IF_INVALID 0x80000000 /* inode does not exist */ Are these bit values or enum type ? > +struct logfs_disk_inode { > + be16 di_mode; > + be16 di_pad; > + be32 di_flags; > + be32 di_uid; > + be32 di_gid; > + > + be64 di_ctime; > + be64 di_mtime; > + > + be32 di_refcount; > + be32 di_generation; > + be64 di_used_bytes; > + > + be64 di_size; > + be64 di_data[LOGFS_EMBEDDED_FIELDS]; > +}packed; > + > + > +#define LOGFS_MAX_NAMELEN 255 Please put define on top > +struct logfs_disk_dentry { > + be64 ino; /* inode pointer */ > + be16 namelen; > + u8 type; > + u8 name[LOGFS_MAX_NAMELEN]; > +}packed; > + > + > +#define OBJ_TOP_JOURNAL 1 /* segment header for master journal */ > +#define OBJ_JOURNAL 2 /* segment header for journal */ > +#define OBJ_OSTORE 3 /* segment header for ostore */ > +#define OBJ_BLOCK 4 /* data block */ > +#define OBJ_INODE 5 /* inode */ > +#define OBJ_DENTRY 6 /* dentry */ enum please > +struct logfs_object_header { > + be32 crc; /* checksum */ > + be16 len; /* length of object, header not included */ > + u8 type; /* node type */ > + u8 compr; /* compression type */ > + be64 ino; /* inode number */ > + be64 pos; /* file position */ > +}packed; =46or all structs: Please use kernel doc struct comments. > + > +struct logfs_segment_header { > + be32 crc; /* checksum */ > + be16 len; /* length of object, header not included */ > + u8 type; /* node type */ > + u8 level; /* GC level */ > + be32 segno; /* segment number */ > + be32 ec; /* erase count */ > + be64 gec; /* global erase count (write time) */ > +}packed; > + > +enum { > + COMPR_NONE =3D 0, > + COMPR_ZLIB =3D 1, > +}; Please name the enums and use the same enum for the according fields an= d the function arguments. > + > +/* Journal entries come in groups of 16. First group contains indiv= idual > + * entries, next groups contain one entry per level */ > +enum { > + JEG_BASE =3D 0, > + JE_FIRST =3D 1, > + > + JE_COMMIT =3D 1, /* commits all previous entries */ > + JE_ABORT =3D 2, /* aborts all previous entries */ > + JE_DYNSB =3D 3, > + JE_ANCHOR =3D 4, > + JE_ERASECOUNT =3D 5, > + JE_SPILLOUT =3D 6, > + JE_DELTA =3D 7, > + JE_BADSEGMENTS =3D 8, > + JE_AREAS =3D 9, /* area description sans wbuf */ > + JEG_WBUF =3D 0x10, /* write buffer for segments */ > + > + JE_LAST =3D 0x1f, > +}; same here > + > +////////////////////////////////////////////////////////////////////= //////////// > +////////////////////////////////////////////////////////////////////= //////////// Eew. > + > +#define LOGFS_SUPER(sb) ((struct logfs_super*)(sb->s_fs_info)) > +#define LOGFS_INODE(inode) container_of(inode, struct logfs_inode, v= fs_inode) lowercase inlines please > + > + /* 0 reserved for gc markers */ > +#define LOGFS_INO_MASTER 1 /* inode file */ > +#define LOGFS_INO_ROOT 2 /* root directory */ > +#define LOGFS_INO_ATIME 4 /* atime for all inodes */ > +#define LOGFS_INO_BAD_BLOCKS 5 /* bad blocks */ > +#define LOGFS_INO_OBSOLETE 6 /* obsolete block count */ > +#define LOGFS_INO_ERASE_COUNT 7 /* erase count */ > +#define LOGFS_RESERVED_INOS 16 enum ? > +struct logfs_super { > + //struct super_block *s_sb; /* should get removed... */ Please do so > + be64 *s_rblock; > + be64 *s_wblock[LOGFS_MAX_LEVELS]; Please comment the non obvious ones instead of the self explaining > + u64 s_free_bytes; /* number of free bytes */ > +#define journal_for_each(__i) for (__i=3D0; __i +void logfs_crash_dump(struct super_block *sb); > +#define LOGFS_BUG(sb) do { \ > + struct super_block *__sb =3D sb; \ Why do we need a local variable here ? > + logfs_crash_dump(__sb); \ > + BUG(); \ > +} while(0) > +static inline u8 logfs_type(struct inode *inode) > +{ > + return (inode->i_mode >> 12) & 15; What's 12 and 15 ? Constants perhaps ? > +} > +static inline struct logfs_disk_sum *alloc_disk_sum(struct super_blo= ck *sb) > +{ > + return kzalloc(sb->s_blocksize, GFP_ATOMIC); > +} No, please do not add another alias for kzalloc > +static inline void free_disk_sum(struct logfs_disk_sum *sum) > +{ > + kfree(sum); > +} same here > + > +/* compr.c */ > +#define logfs_compress_none logfs_memcpy > +#define logfs_uncompress_none logfs_memcpy can you please use logfs_memcpy instead ? > +int logfs_memcpy(void *in, void *out, size_t inlen, size_t outlen); > +int logfs_compress(void *in, void *out, size_t inlen, size_t outlen)= ; > +int logfs_compress_vec(struct kvec *vec, int count, void *out, size_= t outlen); > +int logfs_uncompress(void *in, void *out, size_t inlen, size_t outle= n); > +int logfs_uncompress_vec(void *in, size_t inlen, struct kvec *vec, i= nt count); are those global ? If yes, please add extern, else remove > +int __init logfs_compr_init(void); > +void __exit logfs_compr_exit(void); dito > + > +/* dir.c */ > +extern struct inode_operations logfs_dir_iops; > +extern struct file_operations logfs_dir_fops; > +int logfs_replay_journal(struct super_block *sb); dito > + > +/* file.c */ > +extern struct inode_operations logfs_reg_iops; > +extern struct file_operations logfs_reg_fops; > +extern struct address_space_operations logfs_reg_aops; > + > +int logfs_setattr(struct dentry *dentry, struct iattr *iattr); dito > + > +/* gc.c */ > +void logfs_gc_pass(struct super_block *sb); > +int logfs_init_gc(struct logfs_super *super); > +void logfs_cleanup_gc(struct logfs_super *super); same here ...................... > + > +/* inode.c */ > +/* progs/mkfs.c */ > +int logfs_fsck(struct super_block *sb); down to this place > + > +static inline u64 dev_ofs(struct super_block *sb, u32 segno, u32 ofs= ) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); Seperate variables and code by an empty line please > + return ((u64)segno << super->s_segshift) + ofs; > +} > + > + > +static inline void device_read(struct super_block *sb, u32 segno, u3= 2 ofs, > + size_t len, void *buf) > +{ > + int err =3D mtdread(sb, dev_ofs(sb, segno, ofs), len, buf); Same here. > + LOGFS_BUG_ON(err, sb); Please open code this instead of nesting mtdread into device_read and therefor avoid the error handling pathes in those places where device_read is used. > +} > + > + > +#define EOF 256 1. very intuitive name 2. why is this constant not at the top, where the other constants are 3. why 256 > + > +typedef int (*dir_callback)(struct inode *dir, struct dentry *dentry= , > + struct logfs_disk_dentry *dd, loff_t pos); Why is this in the middle of something else ? > + > +static s64 dir_seek_data(struct inode *inode, s64 pos) > +{ > + s64 new_pos =3D logfs_seek_data(inode, pos); new line please > + return max((s64)pos, new_pos - 1); max_t please=20 > +} > + > + > +static int __logfs_dir_walk(struct inode *dir, struct dentry *dentry= , > + dir_callback handler, struct logfs_disk_dentry *dd, loff_t *pos) > +{ > + struct qstr *name =3D dentry ? &dentry->d_name : NULL; > + int ret; > + > + for (; ; (*pos)++) { > + ret =3D read_dir(dir, dd, *pos); > + if (ret =3D=3D -EOF) > + return 0; > + if (ret =3D=3D -ENODATA) {/* deleted dentry */ Please move the comment away. It makes parsing hard > + *pos =3D dir_seek_data(dir, *pos); > + continue; > + } > + if (ret) > + return ret; > + BUG_ON(dd->namelen =3D=3D 0); > + > + if (name) { > + if (name->len !=3D be16_to_cpu(dd->namelen)) > + continue; > + if (memcmp(name->name, dd->name, name->len)) > + continue; > + } > + > + return handler(dir, dentry, dd, *pos); > + } > + return ret; Where do you break out of the loop ? > +} > + > + > +static int logfs_dir_walk(struct inode *dir, struct dentry *dentry, > + dir_callback handler) > +{ > + struct logfs_disk_dentry dd; > + loff_t pos =3D 0; New line please > + return __logfs_dir_walk(dir, dentry, handler, &dd, &pos); > +} > + > + > +static struct dentry *logfs_lookup(struct inode *dir, struct dentry = *dentry, > + struct nameidata *nd) > +{ > + struct dentry *ret; > + > + ret =3D ERR_PTR(logfs_dir_walk(dir, dentry, logfs_lookup_handler)); > + return ret; return ERR_PTR(.....); > +} > + > +static int logfs_unlink(struct inode *dir, struct dentry *dentry) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(dir->i_sb); > + struct inode *inode =3D dentry->d_inode; > + int ret; > + > + mutex_lock(&super->s_victim_mutex); > + super->s_victim_ino =3D inode->i_ino; > + > + /* remove dentry */ > + if (inode->i_mode & S_IFDIR) > + dir->i_nlink--; > + inode->i_ctime =3D dir->i_ctime =3D dir->i_mtime =3D CURRENT_TIME; > + ret =3D logfs_dir_walk(dir, dentry, logfs_unlink_handler); > + super->s_victim_ino =3D 0; > + if (ret) > + goto out; > + > + /* remove inode */ > + ret =3D logfs_remove_inode(inode); Please remove this goto / label construct and do if (likely(!ret)) ret =3D logfs_remove_inode(inode); instead > +out: > + mutex_unlock(&super->s_victim_mutex); > + return ret; > +} > + > + > +/* FIXME: readdir currently has it's own dir_walk code. I don't see= a good > + * way to combine the two copies */ > +#define IMPLICIT_NODES 2 > +static int __logfs_readdir(struct file *file, void *buf, filldir_t f= illdir) > +{ > + struct logfs_disk_dentry dd; > + loff_t pos =3D file->f_pos - IMPLICIT_NODES; > + int err; > + > + BUG_ON(pos<0); > + for (;; pos++) { > + struct inode *dir =3D file->f_dentry->d_inode; new line please > + err =3D read_dir(dir, &dd, pos); > + if (err =3D=3D -EOF) > + break; -EOF results in a return code 0 ? > + if (err =3D=3D -ENODATA) {/* deleted dentry */ > + pos =3D dir_seek_data(dir, pos); > + continue; > + } > + if (err) > + return err; > + BUG_ON(dd.namelen =3D=3D 0); > + > + if (filldir(buf, dd.name, be16_to_cpu(dd.namelen), pos, > + be64_to_cpu(dd.ino), dd.type)) > + break; > + } > + > + file->f_pos =3D pos + IMPLICIT_NODES; > + return 0; > +} > + > + > +static int logfs_readdir(struct file *file, void *buf, filldir_t fil= ldir) > +{ > + struct inode *inode =3D file->f_dentry->d_inode; > + int err; > + > + if (file->f_pos < 0) > + return -EINVAL; > + > + if (file->f_pos =3D=3D 0) { > + if (filldir(buf, ".", 1, 1, inode->i_ino, DT_DIR) < 0) > + return 0; > + file->f_pos++; > + } > + if (file->f_pos =3D=3D 1) { > + ino_t pino =3D parent_ino(file->f_dentry); empty line > + if (filldir(buf, "..", 2, 2, pino, DT_DIR) < 0) > + return 0; > + file->f_pos++; > + } > + > + err =3D __logfs_readdir(file, buf, filldir); > + if (err) > + printk("LOGFS readdir error=3D%x, pos=3D%llx\n", err, file->f_pos)= ; > + return err; > +} > +static int logfs_write_dir(struct inode *dir, struct dentry *dentry, > + struct inode *inode) > +{ > + struct logfs_disk_dentry dd; > + int err; > + > + memset(&dd, 0, sizeof(dd)); > + dd.ino =3D cpu_to_be64(inode->i_ino); > + dd.type =3D logfs_type(inode); > + logfs_set_name(&dd, &dentry->d_name); > + > + dir->i_ctime =3D dir->i_mtime =3D CURRENT_TIME; > + /* FIXME: the file size should actually get aligned when writing, > + * not when reading. */ Please use=20 /* * kernel style=20 * multi line comments */ > + err =3D write_dir(dir, &dd, file_end(dir)); > + if (err) > + return err; > + d_instantiate(dentry, inode); > + return 0; > +} > + > + > +static int __logfs_create(struct inode *dir, struct dentry *dentry, > + struct inode *inode, const char *dest, long destlen) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(dir->i_sb); > + struct logfs_inode *li =3D LOGFS_INODE(inode); > + int ret; > + > + mutex_lock(&super->s_victim_mutex); > + super->s_victim_ino =3D inode->i_ino; > + if (inode->i_mode & S_IFDIR) > + inode->i_nlink++; > + > + if (dest) /* symlink */ > + ret =3D logfs_inode_write(inode, dest, destlen, 0); > + else /* creat/mkdir/mknod */ > + ret =3D __logfs_write_inode(inode); Please remove this confusing tail comments > + super->s_victim_ino =3D 0; > + if (ret) { > + if (!dest) > + li->li_flags |=3D LOGFS_IF_STILLBORN; > + /* FIXME: truncate symlink */ > + inode->i_nlink--; > + iput(inode); > + goto out; > + } > + > + if (inode->i_mode & S_IFDIR) > + dir->i_nlink++; > + ret =3D logfs_write_dir(dir, dentry, inode); > + > + if (ret) { > + if (inode->i_mode & S_IFDIR) > + dir->i_nlink--; > + logfs_remove_inode(inode); > + iput(inode); > + } > +out: > + mutex_unlock(&super->s_victim_mutex); > + return ret; > +} > + > + > +/* FIXME: This should really be somewhere in the 64bit area. */ > +#define LOGFS_LINK_MAX (1<<30) Please move the define to the header file or some other useful place > +static int logfs_mkdir(struct inode *dir, struct dentry *dentry, int= mode) > +{ > + struct inode *inode; > + > + if (dir->i_nlink >=3D LOGFS_LINK_MAX) > + return -EMLINK; > + > + /* FIXME: why do we have to fill in S_IFDIR, while the mode is > + * correct for mknod, creat, etc.? Smells like the vfs *should* > + * do it for us but for some reason fails to do so. > + */ Comment style > + > +static struct inode_operations ext2_symlink_iops =3D { > + .readlink =3D generic_readlink, > + .follow_link =3D page_follow_link_light, > +}; s/ext2/logfs/ maybe ? > +static int logfs_nop_handler(struct inode *dir, struct dentry *dentr= y, > + struct logfs_disk_dentry *dd, loff_t pos) > +{ > + return 0; > +} New line > +static inline int logfs_get_dd(struct inode *dir, struct dentry *den= try, > + struct logfs_disk_dentry *dd, loff_t *pos) > +{ > + *pos =3D 0; > + return __logfs_dir_walk(dir, dentry, logfs_nop_handler, dd, pos); > +} > + > +static int logfs_delete_dd(struct inode *dir, struct logfs_disk_dent= ry *dd, > + loff_t pos) > +{ > + int err; > + > + err =3D read_dir(dir, dd, pos); > + if (err =3D=3D -EOF) /* don't expose internal errnos */ > + err =3D -EIO; Interesting. Why is EOF morphed to EIO ? > + if (err) > + return err; > + > + dir->i_ctime =3D dir->i_mtime =3D CURRENT_TIME; > + if (dd->type =3D=3D DT_DIR) > + dir->i_nlink--; > + return logfs_delete(dir, pos); > +} > + > +static int logfs_rename(struct inode *old_dir, struct dentry *old_de= ntry, > + struct inode *new_dir, struct dentry *new_dentry) > +{ > + if (new_dentry->d_inode) /* target exists */ > + return logfs_rename_target(old_dir, old_dentry, new_dir, new_dentr= y); > + else if (old_dir =3D=3D new_dir) /* local rename */ > + return logfs_rename_local(old_dir, old_dentry, new_dentry); Comment style > + return logfs_rename_cross(old_dir, old_dentry, new_dir, new_dentry)= ; > +} > + > --- /dev/null 2007-04-18 05:32:26.652341749 +0200 > +++ linux-2.6.21logfs/fs/logfs/file.c 2007-05-07 13:32:12.000000000 += 0200 > @@ -0,0 +1,82 @@ Comment missing. License missing. > +#include "logfs.h" > + > + > +static int logfs_prepare_write(struct file *file, struct page *page, > + unsigned start, unsigned end) > +{ > + if (PageUptodate(page)) > + return 0; > + > + if ((start =3D=3D 0) && (end =3D=3D PAGE_CACHE_SIZE)) > + return 0; Self explaining logic ? > + return logfs_readpage_nolock(page); > +} > + > + > +static int logfs_readpage(struct file *file, struct page *page) > +{ > + int ret =3D logfs_readpage_nolock(page); empty line > + unlock_page(page); > + return ret; > +} > + > + > +static int logfs_writepage(struct page *page, struct writeback_contr= ol *wbc) > +{ > + BUG(); Is this a permanent solution ? > + return 0; > +} > --- /dev/null 2007-04-18 05:32:26.652341749 +0200 > +++ linux-2.6.21logfs/fs/logfs/gc.c 2007-05-07 13:32:12.000000000 +02= 00 > @@ -0,0 +1,350 @@ Comment and license please. > +#include "logfs.h" > + > +#if 0 Can you please remove this ? > +/** > + * When deciding which segment to use next, calculate the resistance > + * of each segment and pick the lowest. Segments try to resist usag= e > + * if > + * o they are full, > + * o they have a high erase count or > + * o they have recently been written. > + * > + * Full segments should not get reused, as there is little space to > + * gain from them. Segments with high erase count should be left > + * aside as they can wear out sooner than others. Freshly-written > + * segments contain many blocks that will get obsoleted fairly soon, > + * so it helps to wait a little before reusing them. > + * > + * Total resistance is expressed in erase counts. Formula is: > + * > + * R =3D EC + K1*F + K2*e^(-t/theta) > + * > + * R: Resistance > + * EC: Erase count > + * K1: Constant, 10,000 might be a good value > + * K2: Constant, 1,000 might be a good value > + * F: Segment fill level > + * t: Time since segment was written to (in number of segments writt= en) > + * theta: Time constant. Total number of segments might be a good v= alue > + * > + * Since the kernel is not allowed to use floating point, the functi= on > + * decay() is used to approximate exponential decay in fixed point. > + */ Interestingly enough this unused function is better commented than anything else in this patch. > +static long decay(long t0, long t, long theta) > +{ > + long shift, fac; > + > + if (t >=3D 32*theta) > + return 0; > + > + shift =3D t/theta; > + fac =3D theta - (t%theta)/2; > + return (t0 >> shift) * fac / theta; > +} > +#endif > + > + > +static u32 logfs_valid_bytes(struct super_block *sb, u32 segno) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + struct logfs_object_header h; > + u64 ofs, ino, pos; > + u32 seg_ofs, valid, size; > + void *reserved; > + int i; > + > + /* Some segments are reserved. Just pretend they were all valid */ > + reserved =3D btree_lookup(&super->s_reserved_segments, segno); > + if (reserved) > + return super->s_segsize; > + > + /* Currently open segments */ > + /* FIXME: just reserve open areas and remove this code */ > + for (i=3D0; i + struct logfs_area *area =3D super->s_area[i]; > + if (area->a_is_open && (area->a_segno =3D=3D segno)) { > + return super->s_segsize; > + } > + } > + > + device_read(sb, segno, 0, sizeof(h), &h); See above comment about device_read() implementation. > + if (all_ff(&h, sizeof(h))) > + return 0; > + > + valid =3D 0; /* segment header not counted as valid bytes */ > + for (seg_ofs =3D sizeof(h); seg_ofs + sizeof(h) < super->s_segsize;= ) { > + device_read(sb, segno, seg_ofs, sizeof(h), &h); > + if (all_ff(&h, sizeof(h))) > + break; > + > + ofs =3D dev_ofs(sb, segno, seg_ofs); > + ino =3D be64_to_cpu(h.ino); > + pos =3D be64_to_cpu(h.pos); > + size =3D (u32)be16_to_cpu(h.len) + sizeof(h); > + //printk("%x %x (%llx, %llx, %llx)(%x, %x)\n", h.type, h.compr, of= s, ino, pos, valid, size); Please remove > + if (logfs_is_valid_block(sb, ofs, ino, pos)) > + valid +=3D size; > + seg_ofs +=3D size; > + } > + printk("valid(%x) =3D %x\n", segno, valid); > + return valid; > +} > + > +static void __logfs_gc_segment(struct super_block *sb, u32 segno) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + struct logfs_object_header h; > + struct logfs_segment_header *sh; > + u64 ofs, ino, pos; > + u32 seg_ofs; > + int level; > + > + device_read(sb, segno, 0, sizeof(h), &h); See above comment about device_read() implementation. > + sh =3D (void*)&h; Please use proper type casting ! > + level =3D sh->level; > + > + for (seg_ofs =3D sizeof(h); seg_ofs + sizeof(h) < super->s_segsize;= ) { > + ofs =3D dev_ofs(sb, segno, seg_ofs); > + device_read(sb, segno, seg_ofs, sizeof(h), &h); See above comment about device_read() implementation. > + ino =3D be64_to_cpu(h.ino); > + pos =3D be64_to_cpu(h.pos); > + if (logfs_is_valid_block(sb, ofs, ino, pos)) > + logfs_cleanse_block(sb, ofs, ino, pos, level); > + seg_ofs +=3D sizeof(h); > + seg_ofs +=3D be16_to_cpu(h.len); > + } > +} > + > +static void __add_segment(struct list_head *list, int *count, u32 se= gno, > + int valid) > +{ > + struct logfs_segment *seg =3D kzalloc(sizeof(*seg), GFP_KERNEL); empty line > + if (!seg) > + return; > + > + seg->segno =3D segno; > + seg->valid =3D valid; > + list_add(&seg->list, list); > + *count +=3D 1; > +} Also __add_segment() can fail. Why is there no return code ? > + > + > +static void add_segment(struct list_head *list, int *count, u32 segn= o, > + int valid) > +{ > + struct logfs_segment *seg; > + list_for_each_entry(seg, list, list) > + if (seg->segno =3D=3D segno) > + return; > + __add_segment(list, count, segno, valid); Can fail. Error handling ? > +} > + > + > +static void del_segment(struct list_head *list, int *count, u32 segn= o) > +{ > + struct logfs_segment *seg; Empty line > + list_for_each_entry(seg, list, list) > + if (seg->segno =3D=3D segno) { > + list_del(&seg->list); > + *count -=3D 1; > + kfree(seg); > + return; > + } > +} > + > + > +static void add_free_segment(struct super_block *sb, u32 segno) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + add_segment(&super->s_free_list, &super->s_free_count, segno, 0); > +} Empty line > +static void add_low_segment(struct super_block *sb, u32 segno, int v= alid) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); Empty line > + add_segment(&super->s_low_list, &super->s_low_count, segno, valid); Can fail > +} > +static void del_low_segment(struct super_block *sb, u32 segno) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); Empty line > + del_segment(&super->s_low_list, &super->s_low_count, segno); > +} > + > + > +static void scan_segment(struct super_block *sb, u32 segno) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + u32 full =3D super->s_segsize - sb->s_blocksize - 0x18; /* one head= er */ Please use a understandable constant instead of 0x18 > + int valid; > + > + valid =3D logfs_valid_bytes(sb, segno); > + if (valid =3D=3D 0) { > + del_low_segment(sb, segno); > + add_free_segment(sb, segno); > + } else if (valid < full) > + add_low_segment(sb, segno, valid); Can fail > +} > + > + > +static void free_all_segments(struct logfs_super *super) > +{ > + struct logfs_segment *seg, *next; > + > + list_for_each_entry_safe(seg, next, &super->s_free_list, list) { > + list_del(&seg->list); > + kfree(seg); > + } > + list_for_each_entry_safe(seg, next, &super->s_low_list, list) { > + list_del(&seg->list); > + kfree(seg); > + } > +} > + > + > +static void logfs_scan_pass(struct super_block *sb) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + int i; > + > + for (i =3D super->s_sweeper+1; i !=3D super->s_sweeper; i++) { for (i =3D super->s_sweeper + 1; i !=3D super->s_sweeper; i++) { > + if (i >=3D super->s_no_segs) > + i=3D1; /* skip superblock */ i =3D 1; and remove tail comment > + > + scan_segment(sb, i); > + > + if (super->s_free_count >=3D super->s_total_levels) { > + super->s_sweeper =3D i; > + return; > + } > + } > + scan_segment(sb, super->s_sweeper); > +} > + > +/* GC all the low-count segments. If necessary, rescan the medium. > + * If we made enough room, return */ > +static void logfs_gc_several(struct super_block *sb) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + int rounds; > + > + rounds =3D super->s_low_count; > + > + for (; rounds; rounds--) { > + if (super->s_free_count >=3D super->s_total_levels) > + return; > + if (super->s_free_count < 3) { > + logfs_scan_pass(sb); > + printk("s"); Debug leftover ? > + } > + logfs_gc_once(sb); > +#if 1 > + if (super->s_free_count >=3D super->s_total_levels) > + return; > + printk("."); > +#endif Dito ? > + } > +} > + > + > +void logfs_gc_pass(struct super_block *sb) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + int i; > + > + for (i=3D4; i; i--) { (i =3D 4; ... Please use a constant instead of 4 > + if (super->s_free_count >=3D super->s_total_levels) > + return; > + logfs_scan_pass(sb); > + > + if (super->s_free_count >=3D super->s_total_levels) > + return; > + printk("free:%8d, low:%8d, sweeper:%8lld\n", > + super->s_free_count, super->s_low_count, > + super->s_sweeper); Debug leftover ? Otherwise please add loglevel and some hint from which code this originates > + logfs_gc_several(sb); > + printk("free:%8d, low:%8d, sweeper:%8lld\n", > + super->s_free_count, super->s_low_count, > + super->s_sweeper); Same here > + } > + logfs_fsck(sb); > + LOGFS_BUG(sb); > +} > + > + > + > +void logfs_cleanup_gc(struct logfs_super *super) > +{ > + free_all_segments(super); > +} Can we add another wrapper to this please ? > --- /dev/null 2007-04-18 05:32:26.652341749 +0200 > +++ linux-2.6.21logfs/fs/logfs/inode.c 2007-05-07 13:32:12.000000000 = +0200 > @@ -0,0 +1,468 @@ Comment + license missing > +#include "logfs.h" > +#include > +#include /* for inode_lock */ Please remove the stupid comment > + > +static struct kmem_cache *logfs_inode_cache; > + > + > +static int __logfs_read_inode(struct inode *inode); > + > + > +struct inode *logfs_iget(struct super_block *sb, ino_t ino, int *coo= kie) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + struct logfs_inode *li; > + > + if (ino =3D=3D LOGFS_INO_MASTER) /* never iget this "inode"! */ comment style > + return super->s_master_inode; > + > + spin_lock(&inode_lock); > + list_for_each_entry(li, &super->s_freeing_list, li_freeing_list) > + if (li->vfs_inode.i_ino =3D=3D ino) { > + spin_unlock(&inode_lock); > + *cookie =3D 1; > + return &li->vfs_inode; > + } > + spin_unlock(&inode_lock); > + > + *cookie =3D 0; > + return __logfs_iget(sb, ino); > +} > + > + > +void logfs_iput(struct inode *inode, int cookie) > +{ > + if (inode->i_ino =3D=3D LOGFS_INO_MASTER) /* never iput it either! = */ comment style > + return; > + > + if (cookie) > + return; > + > + iput(inode); > +} > + > + > +static void logfs_init_inode(struct inode *inode) > +{ > + struct logfs_inode *li =3D LOGFS_INODE(inode); > + int i; > + > + li->li_flags =3D LOGFS_IF_VALID; > + li->li_used_bytes =3D 0; > + inode->i_uid =3D 0; > + inode->i_gid =3D 0; > + inode->i_size =3D 0; > + inode->i_blocks =3D 0; > + inode->i_ctime =3D CURRENT_TIME; > + inode->i_mtime =3D CURRENT_TIME; > + inode->i_nlink =3D 1; > + INIT_LIST_HEAD(&li->li_freeing_list); > + > + for (i=3D0; i + li->li_data[i] =3D 0; > + > + return; > +} > + > + > +struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino) > +{ > + struct inode *inode; > + > + inode =3D logfs_alloc_inode(sb); > + if (!inode) > + return ERR_PTR(-ENOMEM); > + > + logfs_init_inode(inode); > + inode->i_mode =3D 0; > + inode->i_ino =3D ino; > + inode->i_sb =3D sb; > + > + /* This is a blatant copy of alloc_inode code. We'd need alloc_ino= de > + * to be nonstatic, alas. */ > + { > + static const struct address_space_operations empty_aops; > + struct address_space * const mapping =3D &inode->i_data; Please remove the brackets and move the variables to the top of the fucntion > + mapping->a_ops =3D &empty_aops; > + mapping->host =3D inode; > + mapping->flags =3D 0; > + mapping_set_gfp_mask(mapping, GFP_HIGHUSER); > + mapping->assoc_mapping =3D NULL; > + mapping->backing_dev_info =3D &default_backing_dev_info; > + inode->i_mapping =3D mapping; > + } > + > + return inode; > +} > + > + > +static struct timespec be64_to_timespec(be64 betime) > +{ > + u64 time =3D be64_to_cpu(betime); > + struct timespec tsp; Empty line > + tsp.tv_sec =3D time >> 32; > + tsp.tv_nsec =3D time & 0xffffffff; > + return tsp; > +} > + > + > +static be64 timespec_to_be64(struct timespec tsp) > +{ > + u64 time =3D ((u64)tsp.tv_sec << 32) + (tsp.tv_nsec & 0xffffffff); tsp.tv_nsec & 0xffffffff ???? timespecs need to be normalized, so tv_nsec can never be greater than 999999999 =3D=3D 0x3B9AC9FF > + return cpu_to_be64(time); > +} > + > + > +static void logfs_disk_to_inode(struct logfs_disk_inode *di, struct = inode*inode) > +{ > + struct logfs_inode *li =3D LOGFS_INODE(inode); > + int i; > + > + inode->i_mode =3D be16_to_cpu(di->di_mode); > + li->li_flags =3D be32_to_cpu(di->di_flags); > + inode->i_uid =3D be32_to_cpu(di->di_uid); > + inode->i_gid =3D be32_to_cpu(di->di_gid); > + inode->i_size =3D be64_to_cpu(di->di_size); > + logfs_set_blocks(inode, be64_to_cpu(di->di_used_bytes)); > + inode->i_ctime =3D be64_to_timespec(di->di_ctime); > + inode->i_mtime =3D be64_to_timespec(di->di_mtime); > + inode->i_nlink =3D be32_to_cpu(di->di_refcount); > + inode->i_generation =3D be32_to_cpu(di->di_generation); > + > + switch (inode->i_mode & S_IFMT) { > + case S_IFCHR: /* fall through */ Sigh. Could you please add useful comments ? > + case S_IFBLK: /* fall through */ > + case S_IFIFO: > + inode->i_rdev =3D be64_to_cpu(di->di_data[0]); > + break; > + default: > + for (i=3D0; i + li->li_data[i] =3D be64_to_cpu(di->di_data[i]); > + break; > + } > +} > + > + > +static void logfs_inode_to_disk(struct inode *inode, struct logfs_di= sk_inode*di) > +{ > + struct logfs_inode *li =3D LOGFS_INODE(inode); > + int i; > + > + di->di_mode =3D cpu_to_be16(inode->i_mode); > + di->di_pad =3D 0; > + di->di_flags =3D cpu_to_be32(li->li_flags); > + di->di_uid =3D cpu_to_be32(inode->i_uid); > + di->di_gid =3D cpu_to_be32(inode->i_gid); > + di->di_size =3D cpu_to_be64(i_size_read(inode)); > + di->di_used_bytes =3D cpu_to_be64(li->li_used_bytes); > + di->di_ctime =3D timespec_to_be64(inode->i_ctime); > + di->di_mtime =3D timespec_to_be64(inode->i_mtime); > + di->di_refcount =3D cpu_to_be32(inode->i_nlink); > + di->di_generation =3D cpu_to_be32(inode->i_generation); > + > + switch (inode->i_mode & S_IFMT) { > + case S_IFCHR: /* fall through */ See above > + case S_IFBLK: /* fall through */ > + case S_IFIFO: > + di->di_data[0] =3D cpu_to_be64(inode->i_rdev); > + break; > + default: > + for (i=3D0; i + di->di_data[i] =3D cpu_to_be64(li->li_data[i]); > + break; > + } > +} > + > + > +static int logfs_read_disk_inode(struct logfs_disk_inode *di, > + struct inode *inode) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(inode->i_sb); > + ino_t ino =3D inode->i_ino; > + int ret; > + > + BUG_ON(!super->s_master_inode); > + ret =3D logfs_inode_read(super->s_master_inode, di, sizeof(*di), in= o); > + if (ret) > + return ret; > + > + if ( !(be32_to_cpu(di->di_flags) & LOGFS_IF_VALID)) > + return -EIO; > + > + if (be32_to_cpu(di->di_flags) & LOGFS_IF_INVALID) > + return -EIO; > + > + return 0; > +} > + > + > +static int __logfs_read_inode(struct inode *inode) > +{ > + struct logfs_inode *li =3D LOGFS_INODE(inode); > + struct logfs_disk_inode di; > + int ret; > + > + ret =3D logfs_read_disk_inode(&di, inode); > + /* FIXME: move back to mkfs when format has settled */ > + if (ret =3D=3D -ENODATA && inode->i_ino =3D=3D LOGFS_INO_ROOT) { > + memset(&di, 0, sizeof(di)); > + di.di_flags =3D cpu_to_be32(LOGFS_IF_VALID); > + di.di_mode =3D cpu_to_be16(S_IFDIR | 0755); > + di.di_refcount =3D cpu_to_be32(2); > + ret =3D 0; > + } > + if (ret) > + return ret; > + logfs_disk_to_inode(&di, inode); > + > + if ( !(li->li_flags&LOGFS_IF_VALID) || (li->li_flags&LOGFS_IF_INVAL= ID)) > + return -EIO; Is this really an IO error ? > + switch (inode->i_mode & S_IFMT) { > + case S_IFDIR: > + inode->i_op =3D &logfs_dir_iops; > + inode->i_fop =3D &logfs_dir_fops; > + break; > + case S_IFREG: > + inode->i_op =3D &logfs_reg_iops; > + inode->i_fop =3D &logfs_reg_fops; > + inode->i_mapping->a_ops =3D &logfs_reg_aops; > + break; > + default: > + ; > + } > + > + return 0; > +} > + > +int __logfs_write_inode(struct inode *inode) > +{ > + struct logfs_disk_inode old, new; /* FIXME: move these off the stac= k */ > + > + BUG_ON(inode->i_ino =3D=3D LOGFS_INO_MASTER); > + > + /* read and compare the inode first. If it hasn't changed, don't > + * bother writing it. */ Comment style > + logfs_inode_to_disk(inode, &new); > + if (logfs_read_disk_inode(&old, inode)) > + return logfs_write_disk_inode(&new, inode); > + if (memcmp(&old, &new, sizeof(old))) > + return logfs_write_disk_inode(&new, inode); > + return 0; > +} > + > + > + > +/** Do not use kernel doc comment start sequence for non kernel doc comment= s please=20 > + * We need to remember which inodes are currently being dropped. Th= ey > + * would deadlock the cleaner, if it were to iget() them. So > + * logfs_drop_inode() adds them to super->s_freeing_list, > + * logfs_destroy_inode() removes them again and logfs_iget() checks = the > + * list. > + */ > +static void logfs_destroy_inode(struct inode *inode) > + > +static u64 logfs_get_ino(struct super_block *sb) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + u64 ino; > + > + /* FIXME: ino allocation should work in two modes: > + * o nonsparse - ifile is mostly occupied, just append > + * o sparse - ifile has lots of holes, fill them up > + */ Comment style > + spin_lock(&super->s_ino_lock); > + ino =3D super->s_last_ino; /* ifile shouldn't be too sparse */ > + super->s_last_ino++; > + spin_unlock(&super->s_ino_lock); > + return ino; > +} > + > --- /dev/null 2007-04-18 05:32:26.652341749 +0200 > +++ linux-2.6.21logfs/fs/logfs/journal.c 2007-05-07 13:32:12.00000000= 0 +0200 > @@ -0,0 +1,696 @@ Comment and license missing > +#include "logfs.h" > + > + > +static void clear_retired(struct super_block *sb) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + int i; > + > + for (i=3D0; i + super->s_retired[i].used =3D 0; > + super->s_first.used =3D 0; > +} > + > + > +static void clear_speculatives(struct super_block *sb) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + int i; > + > + for (i=3D0; i + super->s_speculative[i].used =3D 0; > +} > + > + > +static void retire_speculatives(struct super_block *sb) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + int i; > + > + for (i=3D0; i + struct logfs_journal_entry *spec =3D super->s_speculative + i; > + struct logfs_journal_entry *retired =3D super->s_retired + i; empty line > + if (! spec->used) > + continue; > + if (retired->used && (spec->version <=3D retired->version)) > + continue; > + retired->used =3D 1; > + retired->version =3D spec->version; > + retired->offset =3D spec->offset; > + retired->len =3D spec->len; > + } > + clear_speculatives(sb); > +} > + > + > +static void __logfs_scan_journal(struct super_block *sb, void *block= , > + u32 segno, u64 block_ofs, int block_index) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + struct logfs_journal_header *h; > + struct logfs_area *area =3D super->s_journal_area; > + > + for (h =3D block; (void*)h - block < sb->s_blocksize; h++) { > + struct logfs_journal_entry *spec, *retired; > + unsigned long ofs =3D (void*)h - block; > + unsigned long remainder =3D sb->s_blocksize - ofs; > + u16 len =3D be16_to_cpu(h->h_len); > + u16 type =3D be16_to_cpu(h->h_type); > + s16 version =3D be16_to_cpu(h->h_version); > + > + if ((len < 16) || (len > remainder)) > + continue; > + if ((type < JE_FIRST) || (type > JE_LAST)) > + continue; > + if (h->h_crc !=3D logfs_crc32(h, len, 4)) > + continue; > + > + if (!super->s_first.used) { /* remember first version */ Comment style > + super->s_first.used =3D 1; > + super->s_first.version =3D version; > + } > + version -=3D super->s_first.version; > + > + if (abs(version) > 1<<14) /* all versions should be near */ > + LOGFS_BUG(sb); > + > + spec =3D &super->s_speculative[type]; > + retired =3D &super->s_retired[type]; > + switch (type) { > + default: /* store speculative entry */ Comment style > + if (spec->used && (version <=3D spec->version)) > + break; > + spec->used =3D 1; > + spec->version =3D version; > + spec->offset =3D block_ofs + ofs; > + spec->len =3D len; > + break; > + case JE_COMMIT: /* retire speculative entries */ Comment style > + if (retired->used && (version <=3D retired->version)) > + break; > + retired->used =3D 1; > + retired->version =3D version; > + retired->offset =3D block_ofs + ofs; > + retired->len =3D len; > + retire_speculatives(sb); > + /* and set up journal area */ > + area->a_segno =3D segno; > + area->a_used_objects =3D block_index; > + area->a_is_open =3D 0; /* never reuse same segment after > + mount - wasteful but safe */ Comment style > + break; > + } > + } > +} > + > + > +static int logfs_scan_journal(struct super_block *sb) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + void *block =3D super->s_compressed_je; > + u64 ofs; > + u32 segno; > + int i, k, err; > + > + clear_speculatives(sb); > + clear_retired(sb); > + journal_for_each(i) { > + segno =3D super->s_journal_seg[i]; > + if (!segno) > + continue; > + for (k=3D0; ks_no_blocks; k++) { k =3D 0;.......... > + ofs =3D logfs_block_ofs(sb, segno, k); > + err =3D mtdread(sb, ofs, sb->s_blocksize, block); > + if (err) > + return err; > + __logfs_scan_journal(sb, block, segno, ofs, k); > + } > + } > + return 0; > +} > + > +static void logfs_calc_free(struct super_block *sb) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + u64 no_segs =3D super->s_no_segs; > + u64 no_blocks =3D super->s_no_blocks; > + u64 blocksize =3D sb->s_blocksize; > + u64 free; > + int i, reserved_segs; > + > + reserved_segs =3D 1; /* super_block */ > + reserved_segs +=3D super->s_bad_segments; > + journal_for_each(i) > + if (super->s_journal_seg[i]) > + reserved_segs++; > + > + free =3D no_segs * no_blocks * blocksize; /* total size */ > + free -=3D reserved_segs * no_blocks * blocksize; /* sb & journal */ > + free -=3D (no_segs - reserved_segs) * blocksize; /* block summary *= / > + free -=3D super->s_used_bytes; /* stored data */ > + super->s_free_bytes =3D free; comments all over the function > +} > + > +static void reserve_sb_and_journal(struct super_block *sb) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + struct btree_head *head =3D &super->s_reserved_segments; > + int i, err; > + > + err =3D btree_insert(head, 0, (void*)1); What stands 1 for ? > + BUG_ON(err); > + > + journal_for_each(i) { > + if (! super->s_journal_seg[i]) > + continue; > + err =3D btree_insert(head, super->s_journal_seg[i], (void*)1); > + BUG_ON(err); > + } > +} > + > +static void logfs_read_anchor(struct super_block *sb, struct logfs_a= nchor *da) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + struct inode *inode =3D super->s_master_inode; > + struct logfs_inode *li =3D LOGFS_INODE(inode); > + int i; > + > + super->s_last_ino =3D be64_to_cpu(da->da_last_ino); > + li->li_flags =3D LOGFS_IF_VALID; > + i_size_write(inode, be64_to_cpu(da->da_size)); > + li->li_used_bytes =3D be64_to_cpu(da->da_used_bytes); > + > + for (i=3D0; i + li->li_data[i] =3D be64_to_cpu(da->da_data[i]); > +} > + > +static void logfs_read_areas(struct super_block *sb, struct logfs_je= _areas *a) > +{ > + struct logfs_area *area; > + int i; > + > + for (i=3D0; i + area =3D LOGFS_SUPER(sb)->s_area[i]; > + area->a_used_bytes =3D be32_to_cpu(a->used_bytes[i]); > + area->a_segno =3D be32_to_cpu(a->segno[i]); > + if (area->a_segno) > + area->a_is_open =3D 1; > + } > +} > + > +/* FIXME: make sure there are enough per-area objects in journal */ > +static int logfs_read_journal(struct super_block *sb) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + void *block =3D super->s_compressed_je; > + void *scratch =3D super->s_je; > + int i, err, level; > + struct logfs_area *area; > + > + for (i=3D0; i + struct logfs_journal_entry *je =3D super->s_retired + i; > + if (!super->s_retired[i].used) if (!super->s_retired[i].used) { > + switch (i) { > + case JE_COMMIT: > + case JE_DYNSB: > + case JE_ANCHOR: > + printk("LogFS: Missing journal entry %x?\n", > + i); > + return -EIO; > + default: > + continue; > + } } > + err =3D mtdread(sb, je->offset, sb->s_blocksize, block); > + if (err) > + return err; > + level =3D i & 0xf; what is 0xf ? > + area =3D super->s_area[level]; > + switch (i & ~0xf) { > + case JEG_BASE: > + switch (i) { Represents I an enum or a bitfield or both ? > + case JE_COMMIT: > + /* just reads the latest version number */ > + logfs_read_commit(super, block); > + break; > + case JE_DYNSB: > + logfs_read_dynsb(sb, unpack(block, scratch)); > + break; > + > +static void journal_get_free_segment(struct logfs_area *area) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(area->a_sb); > + int i; > + > + journal_for_each(i) { > + if (area->a_segno !=3D super->s_journal_seg[i]) > + continue; > +empty_seg: > + i++; > + if (i =3D=3D LOGFS_JOURNAL_SEGS) > + i =3D 0; > + if (!super->s_journal_seg[i]) > + goto empty_seg; Does this loop for ever or is there a guranteed exit ? Please use a do while loop instead of the goto > + area->a_segno =3D super->s_journal_seg[i]; > + ++(super->s_journal_ec[i]); > + return; > + } > + BUG(); > +} > + > + > +/** > + * logfs_get_free_entry - return free space for journal entry > + */ > +static s64 logfs_get_free_entry(struct super_block *sb) > +{ > + s64 ret; > + > + mutex_lock(&LOGFS_SUPER(sb)->s_log_mutex); > + ret =3D __logfs_get_free_entry(sb); > + mutex_unlock(&LOGFS_SUPER(sb)->s_log_mutex); > + BUG_ON(ret <=3D 0); /* not sure, but it's safer to BUG than to acce= pt */ It might be safer to do proper error handling. > + return ret; > +} > + > +static size_t logfs_write_header(struct logfs_super *super, > + struct logfs_journal_header *h, size_t datalen, u16 type) > +{ > + size_t len =3D datalen + sizeof(*h); Empty line > + return __logfs_write_header(super, h, len, datalen, type, COMPR_NON= E); > +} > + > + > +static void *logfs_write_bb(struct super_block *sb, void *h, > + u16 *type, size_t *len) > +{ > + *type =3D JE_BADSEGMENTS; > + *len =3D sb->s_blocksize; > + return LOGFS_SUPER(sb)->s_bb_array; > +} > + > + > +static inline size_t logfs_journal_erasecount_size(struct logfs_supe= r *super) > +{ > + return LOGFS_JOURNAL_SEGS * sizeof(be32); > +} E,pty line > +static void *logfs_write_erasecount(struct super_block *sb, void *_e= c, > + u16 *type, size_t *len) > +{ > + > +static void *__logfs_write_anchor(struct super_block *sb, void *_da, > + u16 *type, size_t *len) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + struct logfs_anchor *da =3D _da; > + struct inode *inode =3D super->s_master_inode; > + struct logfs_inode *li =3D LOGFS_INODE(inode); > + int i; > + > + da->da_last_ino =3D cpu_to_be64(super->s_last_ino); > + da->da_size =3D cpu_to_be64(i_size_read(inode)); > + da->da_used_bytes =3D cpu_to_be64(li->li_used_bytes); > + for (i=3D0; i + da->da_data[i] =3D cpu_to_be64(li->li_data[i]); > + *type =3D JE_ANCHOR; > + *len =3D sizeof(*da); > + return da; > +} > + > + > +static void *logfs_write_areas(struct super_block *sb, void *_a, > + u16 *type, size_t *len) > +{ > + struct logfs_area *area; > + struct logfs_je_areas *a =3D _a; > + int i; > + > + for (i=3D0; i<16; i++) { /* FIXME: have all 16 areas */ > + a->used_bytes[i] =3D 0; > + a->segno[i] =3D 0; > + } memset perhaps ? > + for (i=3D0; i + area =3D LOGFS_SUPER(sb)->s_area[i]; > + a->used_bytes[i] =3D cpu_to_be32(area->a_used_bytes); > + a->segno[i] =3D cpu_to_be32(area->a_segno); > + } > + *type =3D JE_AREAS; > + *len =3D sizeof(*a); > + return a; > +} > + > +int logfs_write_anchor(struct inode *inode) > +{ > + struct super_block *sb =3D inode->i_sb; > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + void *block =3D super->s_compressed_je; > + u64 ofs; > + size_t jpos; > + int i, ret; > + > + ofs =3D logfs_get_free_entry(sb); > + BUG_ON(ofs >=3D super->s_size); > + > + memset(block, 0, sb->s_blocksize); > + jpos =3D 0; > + for (i=3D0; i + super->s_sum_index =3D i; > + jpos +=3D logfs_write_je(sb, jpos, logfs_write_wbuf); > + } > + jpos +=3D logfs_write_je(sb, jpos, logfs_write_bb); > + jpos +=3D logfs_write_je(sb, jpos, logfs_write_erasecount); > + jpos +=3D logfs_write_je(sb, jpos, __logfs_write_anchor); > + jpos +=3D logfs_write_je(sb, jpos, logfs_write_dynsb); > + jpos +=3D logfs_write_je(sb, jpos, logfs_write_areas); > + jpos +=3D logfs_write_je(sb, jpos, logfs_write_commit); > + > + BUG_ON(jpos > sb->s_blocksize); > + > + ret =3D mtdwrite(sb, ofs, sb->s_blocksize, block); > + if (ret) > + return ret; > + return 0; Interesting way to reyl on compiler smartness > +} > + > +int logfs_init_journal(struct super_block *sb) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + int ret; > + > + mutex_init(&super->s_log_mutex); > + > + super->s_je =3D kzalloc(sb->s_blocksize, GFP_KERNEL); > + if (!super->s_je) > + goto err0; > + > + super->s_compressed_je =3D kzalloc(sb->s_blocksize, GFP_KERNEL); > + if (!super->s_compressed_je) > + goto err1; > + > + super->s_bb_array =3D kzalloc(sb->s_blocksize, GFP_KERNEL); > + if (!super->s_bb_array) > + goto err2; > + > + super->s_master_inode =3D logfs_new_meta_inode(sb, LOGFS_INO_MASTER= ); > + if (!super->s_master_inode) > + goto err3; > + > + super->s_master_inode->i_nlink =3D 1; /* lock it in ram */ > + > + /* logfs_scan_journal() is looking for the latest journal entries, = but > + * doesn't copy them into data structures yet. logfs_read_journal(= ) > + * then re-reads those entries and copies their contents over. */ > + ret =3D logfs_scan_journal(sb); > + if (ret) > + return ret; what about the allocated buffers ? > + ret =3D logfs_read_journal(sb); > + if (ret) > + return ret; dito > + reserve_sb_and_journal(sb); > + logfs_calc_free(sb); > + > + super->s_journal_area->a_ops =3D &journal_area_ops; > + return 0; > +err3: > + kfree(super->s_bb_array); > +err2: > + kfree(super->s_compressed_je); > +err1: > + kfree(super->s_je); > +err0: > + return -ENOMEM; > +} > + > + > --- /dev/null 2007-04-18 05:32:26.652341749 +0200 > +++ linux-2.6.21logfs/fs/logfs/readwrite.c 2007-05-07 20:37:05.000000= 000 +0200 > @@ -0,0 +1,1125 @@ > +/** > + * fs/logfs/readwrite.c > + * > + * Actually contains five sets of very similar functions: > + * read read blocks from a file > + * write write blocks to a file > + * valid check whether a block still belongs to a file > + * truncate truncate a file > + * rewrite move existing blocks of a file to a new location (gc help= er) License ? > + */ > +#include "logfs.h" > + > + > +static int logfs_read_empty(void *buf, int read_zero) > +{ > + if (!read_zero) > + return -ENODATA; > + > + memset(buf, 0, PAGE_CACHE_SIZE); Is buf guaranteed to be at least sizeof(PAGE_CACHE_SIZE) ? > + return 0; > +} > +static int logfs_read_direct(struct inode *inode, pgoff_t index, voi= d *buf, > + int read_zero) > +{ > + struct logfs_inode *li =3D LOGFS_INODE(inode); > + u64 block; > + > + block =3D li->li_data[index]; > + if (!block) > + return logfs_read_empty(buf, read_zero); > + > + //printk("ino=3D%lx, index=3D%lx, blocks=3D%llx\n", inode->i_ino, i= ndex, block); Please remove > + return logfs_segment_read(inode->i_sb, buf, block); > +} > + > + > + > +static unsigned long get_bits(u64 val, int skip, int no) > +{ > + u64 ret =3D val; > + > + ret >>=3D skip * no; > + ret <<=3D 64 - no; > + ret >>=3D 64 - no; > + BUG_ON((unsigned long)ret !=3D ret); ???? > + return ret; > +} > + > + > + > +static u64 seek_data_loop(struct inode *inode, u64 pos, int count) > +{ > + struct logfs_inode *li =3D LOGFS_INODE(inode); > + struct logfs_super *super =3D LOGFS_SUPER(inode->i_sb); > + be64 *rblock; > + u64 bofs =3D li->li_data[I1_INDEX + count]; > + int bits =3D LOGFS_BLOCK_BITS; > + int i, ret, slot; > + > + BUG_ON(!bofs); > + > + rblock =3D logfs_get_rblock(super); > + > + for (i=3Dcount; i>=3D0; i--) { > + ret =3D logfs_segment_read(inode->i_sb, rblock, bofs); > + if (ret) > + goto out; break; > + slot =3D get_bits(pos, i, bits); > + while (slot < LOGFS_BLOCK_FACTOR && rblock[slot] =3D=3D 0) { > + slot++; > + pos +=3D 1 << (LOGFS_BLOCK_BITS * i); > + } > + if (slot >=3D LOGFS_BLOCK_FACTOR) > + goto out; break; > + bofs =3D be64_to_cpu(rblock[slot]); > + } > +out: > + logfs_put_rblock(super); > + return pos; > +} > + > +static int logfs_is_valid_loop(struct inode *inode, pgoff_t index, > + int count, u64 ofs) > +{ > + struct logfs_inode *li =3D LOGFS_INODE(inode); > + struct logfs_super *super =3D LOGFS_SUPER(inode->i_sb); > + be64 *rblock; > + u64 bofs =3D li->li_data[I1_INDEX + count]; > + int bits =3D LOGFS_BLOCK_BITS; > + int i, ret; > + > + if (!bofs) > + return 0; > + > + if (bofs =3D=3D ofs) > + return 1; > + > + rblock =3D logfs_get_rblock(super); > + > + for (i=3Dcount; i>=3D0; i--) { .... > + ret =3D logfs_segment_read(inode->i_sb, rblock, bofs); > + if (ret) > + goto fail; please use break and do a return !ret; > + bofs =3D be64_to_cpu(rblock[get_bits(index, i, bits)]); > + if (!bofs) > + goto fail; > + > + if (bofs =3D=3D ofs) { > + ret =3D 1; > + goto out; > + } > + } > + > +fail: > + ret =3D 0; =09 > +out: > + logfs_put_rblock(super); > + return ret; > +} > + > + > +static int __logfs_is_valid_block(struct inode *inode, pgoff_t index= , u64 ofs) > +{ > + struct logfs_inode *li =3D LOGFS_INODE(inode); > + > + //printk("%lx, %x, %x\n", inode->i_ino, inode->i_nlink, atomic_read= (&inode->i_count)); Sigh > + if ((inode->i_nlink =3D=3D 0) && atomic_read(&inode->i_count) =3D=3D= 1) > + return 0; > + > + if (li->li_flags & LOGFS_IF_EMBEDDED) > + return 0; > + > + if (index < I0_BLOCKS) > + return logfs_is_valid_direct(li, index, ofs); > + else if (index < I1_BLOCKS) > + return logfs_is_valid_loop(inode, index, 0, ofs); > + else if (index < I2_BLOCKS) > + return logfs_is_valid_loop(inode, index, 1, ofs); > + else if (index < I3_BLOCKS) > + return logfs_is_valid_loop(inode, index, 2, ofs); > + > + BUG(); > + return 0; > +} > + > + > +int logfs_is_valid_block(struct super_block *sb, u64 ofs, u64 ino, u= 64 pos) > +{ > + struct inode *inode; > + int ret, cookie; > + > + /* Umount closes a segment with free blocks remaining. Those > + * blocks are by definition invalid. */ > + if (ino =3D=3D -1) > + return 0; > + > + if ((u64)(u_long)ino !=3D ino) { > + printk("%llx, %llx, %llx\n", ofs, ino, pos); more sigh > + LOGFS_BUG(sb); > + } > + inode =3D logfs_iget(sb, ino, &cookie); > + if (!inode) > + return 0; > + > +#if 0 > + /* Any data belonging to dirty inodes must be considered valid unti= l > + * the inode is written back. If we prematurely deleted old blocks > + * and crashed before the inode is written, the filesystem goes boo= m. > + */ > + if (inode->i_state & I_DIRTY) > + ret =3D 2; > + else There seems to be a patternm, that unused code is surprisingly well commented. > +#endif > + ret =3D __logfs_is_valid_block(inode, pos, ofs); > + > + logfs_iput(inode, cookie); > + return ret; > +} > + > + > + > +/** > + * logfs_file_read - generic_file_read for in-kernel buffers > + */ > +static ssize_t __logfs_inode_read(struct inode *inode, char *buf, si= ze_t count, > + loff_t *ppos, int read_zero) > +{ > + void *block_data =3D NULL; > + loff_t size =3D i_size_read(inode); > + int err =3D -ENOMEM; > + > + pr_debug("read from %lld, count %zd\n", *ppos, count); Loglevel missing > + if (*ppos >=3D size) > + return 0; > + if (count > size - *ppos) > + count =3D size - *ppos; > + > + BUG_ON(logfs_index(*ppos) !=3D logfs_index(*ppos + count - 1)); > + > + block_data =3D kzalloc(LOGFS_BLOCKSIZE, GFP_KERNEL); > + if (!block_data) > + goto fail; > + > + err =3D logfs_read_block(inode, logfs_index(*ppos), block_data, > + read_zero); > + if (err) > + goto fail; > + > + memcpy(buf, block_data + (*ppos % LOGFS_BLOCKSIZE), count); > + *ppos +=3D count; > + kfree(block_data); > + return count; err =3D count; and fall trough ? > +fail: > + kfree(block_data); > + return err; > +} > + > +static int logfs_alloc_bytes(struct inode *inode, int bytes) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(inode->i_sb); > + > + if (!bytes) > + return 0; > + > + if (super->s_free_bytes < bytes + super->s_gc_reserve) { > + //TRACE(); Sigh. > + return -ENOSPC; > + } > + > + /* Actual allocation happens later. Make sure we don't drop the > + * lock before then! */ > + > + return 0; > +} > + > + > +/* > + * File is too large for embedded data when called. Move data to fi= rst > + * block and clear embedded area > + */ > +static int logfs_move_embedded(struct inode *inode, be64 **wblocks) > +{ > + struct logfs_inode *li =3D LOGFS_INODE(inode); > + void *buf; > + s64 block; > + int i; > + > + if (! (li->li_flags & LOGFS_IF_EMBEDDED)) > + return 0; > + > + if (logfs_alloc_blocks(inode, 1)) { > + //TRACE(); more sigh > + return -ENOSPC; > + } > + > + buf =3D wblocks[0]; > + > + memcpy(buf, li->li_data, LOGFS_EMBEDDED_SIZE); > + block =3D logfs_segment_write(inode, buf, 0, 0, 1); > + if (block < 0) > + return block; > + > + li->li_data[0] =3D block; > + > + li->li_flags &=3D ~LOGFS_IF_EMBEDDED; > + for (i=3D1; i + li->li_data[i] =3D 0; > + > + return logfs_dirty_inode(inode); > +} > + > + > +static int logfs_write_direct(struct inode *inode, pgoff_t index, vo= id *buf) > +{ > + struct logfs_inode *li =3D LOGFS_INODE(inode); > + s64 block; > + > + if (li->li_data[index] =3D=3D 0) { > + if (logfs_alloc_blocks(inode, 1)) { > + //TRACE(); again =09 > + return -ENOSPC; > + } > + } > + block =3D logfs_segment_write(inode, buf, index, 0, 1); > + if (block < 0) > + return block; > + > + if (li->li_data[index]) > + logfs_segment_delete(inode, li->li_data[index], index, 0); > + li->li_data[index] =3D block; > + > + return logfs_dirty_inode(inode); > +} > + > + > +static int logfs_write_loop(struct inode *inode, pgoff_t index, void= *buf, > + be64 **wblocks, int count) > +{ > + struct logfs_inode *li =3D LOGFS_INODE(inode); > + u64 bofs =3D li->li_data[I1_INDEX + count]; > + s64 block; > + int bits =3D LOGFS_BLOCK_BITS; > + int allocs =3D 0; > + int i, ret; > + > + for (i=3Dcount; i>=3D0; i--) { > + if (bofs) { > + ret =3D logfs_segment_read(inode->i_sb, wblocks[i], bofs); > + if (ret) > + return ret; > + } else { > + allocs++; > + memset(wblocks[i], 0, LOGFS_BLOCKSIZE); > + } > + bofs =3D be64_to_cpu(wblocks[i][get_bits(index, i, bits)]); > + } > + > + if (! wblocks[0][get_bits(index, 0, bits)]) > + allocs++; > + if (logfs_alloc_blocks(inode, allocs)) { > + //TRACE(); yet more > + return -ENOSPC; > + } > + > + block =3D logfs_segment_write(inode, buf, index, 0, allocs); > + allocs =3D allocs ? allocs-1 : 0; > + if (block < 0) > + return block; > + > + for (i=3D0; i<=3Dcount; i++) { i =3D 0; .... > + wblocks[i][get_bits(index, i, bits)] =3D cpu_to_be64(block); > + block =3D logfs_segment_write(inode, wblocks[i], index, i+1, > + allocs); > + allocs =3D allocs ? allocs-1 : 0; > + if (block < 0) > + return block; > + } > + > + li->li_data[I1_INDEX + count] =3D block; > + > + return logfs_dirty_inode(inode); > +} > + > + > + > + > +int logfs_rewrite_block(struct inode *inode, pgoff_t index, u64 ofs,= int level) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(inode->i_sb); > + be64 **wblocks; > + void *buf; > + int ret; > + > + //printk("(%lx, %lx, %llx, %x)\n", inode->i_ino, index, ofs, level)= ; yay ! > + wblocks =3D super->s_wblock; > + buf =3D wblocks[LOGFS_MAX_INDIRECT]; > + ret =3D __logfs_rewrite_block(inode, index, buf, wblocks, level); > + return ret; > +} > + > + > +/** Please do not use /** here, it is the start sequence for kernel doc comments > + * Three cases exist: > + * size <=3D pos - remove full block > + * size >=3D pos + chunk - do nothing > + * pos < size < pos + chunk - truncate, rewrite > + */ > +static s64 __logfs_truncate_i0(struct inode *inode, u64 size, u64 bo= fs, > + u64 pos, be64 **wblocks) > +{ > + size_t len =3D size - pos; > + void *buf =3D wblocks[LOGFS_MAX_INDIRECT]; > + int err; > + > + if (size <=3D pos) { /* remove whole block */ > + logfs_segment_delete(inode, bofs, > + pos >> inode->i_sb->s_blocksize_bits, 0); > + return 0; > + } > + > + /* truncate this block, rewrite it */ > + err =3D logfs_segment_read(inode->i_sb, buf, bofs); > + if (err) > + return err; > + > + memset(buf + len, 0, LOGFS_BLOCKSIZE - len); > + return logfs_segment_write_pos(inode, buf, pos, 0, 0); > +} > + > + > +/* FIXME: move to super */ Please do so > +static u64 logfs_factor[] =3D { > + LOGFS_BLOCKSIZE, > + LOGFS_I1_SIZE, > + LOGFS_I2_SIZE, > + LOGFS_I3_SIZE > +}; > + > + > +static ssize_t __logfs_inode_write(struct inode *inode, const char *= buf, > + size_t count, loff_t *ppos) > +{ > + void *block_data =3D NULL; > + int err =3D -ENOMEM; > + > + pr_debug("write to 0x%llx, count %zd\n", *ppos, count); > + > + BUG_ON(logfs_index(*ppos) !=3D logfs_index(*ppos + count - 1)); > + > + block_data =3D kzalloc(LOGFS_BLOCKSIZE, GFP_KERNEL); > + if (!block_data) > + goto fail; > + > + err =3D logfs_read_block(inode, logfs_index(*ppos), block_data, 1); > + if (err) > + goto fail; > + > + memcpy(block_data + (*ppos % LOGFS_BLOCKSIZE), buf, count); > + > + if (i_size_read(inode) < *ppos + count) > + i_size_write(inode, *ppos + count); > + > + err =3D logfs_write_buf(inode, logfs_index(*ppos), block_data); > + if (err) > + goto fail; > + > + *ppos +=3D count; > + pr_debug("write to %lld, count %zd\n", *ppos, count); Please add some hint, where this comes from > + kfree(block_data); > + return count; err =3D count; fall trhough ? > +fail: > + kfree(block_data); > + return err; > +} > + > + > +int logfs_inode_read(struct inode *inode, void *buf, size_t n, loff_= t _pos) > +{ > + loff_t pos =3D _pos << inode->i_sb->s_blocksize_bits; > + ssize_t ret; > + > + if (pos >=3D i_size_read(inode)) > + return -EOF; > + ret =3D __logfs_inode_read(inode, buf, n, &pos, 0); > + if (ret < 0) > + return ret; > + ret =3D ret=3D=3Dn ? 0 : -EIO; return ret =3D=3D n ? ..... perhaps ? > + return ret; > +} > + > + > + > +int logfs_init_rw(struct logfs_super *super) > +{ > + int i; > + > + mutex_init(&super->s_r_mutex); > + mutex_init(&super->s_w_mutex); > + super->s_rblock =3D kmalloc(LOGFS_BLOCKSIZE, GFP_KERNEL); > + if (!super->s_wblock) > + return -ENOMEM; > + for (i=3D0; i<=3DLOGFS_MAX_INDIRECT; i++) { i =3D 0; ... > + super->s_wblock[i] =3D kmalloc(LOGFS_BLOCKSIZE, GFP_KERNEL); > + if (!super->s_wblock) { > + logfs_cleanup_rw(super); > + return -ENOMEM; > + } > + } > + > + return 0; > +} > + > + > +void logfs_cleanup_rw(struct logfs_super *super) > +{ > + int i; > + > + for (i=3D0; i<=3DLOGFS_MAX_INDIRECT; i++) dito > + kfree(super->s_wblock[i]); > + kfree(super->s_rblock); > +} > --- /dev/null 2007-04-18 05:32:26.652341749 +0200 > +++ linux-2.6.21logfs/fs/logfs/super.c 2007-05-07 13:32:12.000000000 = +0200 > @@ -0,0 +1,490 @@ Comment, license please > +#include "logfs.h" > + > + > +#define FAIL_ON(cond) do { if (unlikely((cond))) return -EINVAL; } w= hile(0) Please open code > +int mtdread(struct super_block *sb, loff_t ofs, size_t len, void *bu= f) > +{ > + struct mtd_info *mtd =3D LOGFS_SUPER(sb)->s_mtd; > + size_t retlen; > + int ret; > + > + ret =3D mtd->read(mtd, ofs, len, &retlen, buf); > + if (ret || (retlen !=3D len)) { > + printk("ret: %x\n", ret); > + printk("retlen: %x, len: %x\n", retlen, len); > + printk("ofs: %llx, mtd->size: %x\n", ofs, mtd->size); Sigh > + dump_stack(); > + return -EIO; > + } > + > + return 0; > +} > + > + > +static void check(void *buf, size_t len) > +{ > + char value[8] =3D {0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a}; > + void *poison =3D buf, *end =3D buf + len; > + > + while (poison) { > + poison =3D memchr(poison, value[0], end-poison); > + if (!poison || poison + 8 > end) > + return; > + if (! memcmp(poison, value, 8)) { > + printk("%p %p %p\n", buf, poison, end); More sigh > + BUG(); > + } > + poison++; > + } > +} > + > + > +int mtdwrite(struct super_block *sb, loff_t ofs, size_t len, void *b= uf) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + struct mtd_info *mtd =3D super->s_mtd; > + struct inode *inode =3D super->s_dev_inode; > + size_t retlen; > + loff_t page_start, page_end; > + int ret; > + > + if (0) /* FIXME: this should be a debugging option */ > + check(buf, len); > + > + //printk("write ofs=3D%llx, len=3D%x\n", ofs, len); hrmpf > + BUG_ON((ofs >=3D mtd->size) || (len > mtd->size - ofs)); > + BUG_ON(ofs !=3D (ofs >> super->s_writeshift) << super->s_writeshift= ); > + //BUG_ON(len !=3D (len >> super->s_blockshift) << super->s_blockshi= ft); hrmpf > + /* FIXME: fix all callers to write PAGE_CACHE_SIZE'd chunks */ > + BUG_ON(len > PAGE_CACHE_SIZE); > + page_start =3D ofs & PAGE_CACHE_MASK; > + page_end =3D PAGE_CACHE_ALIGN(ofs + len) - 1; > + truncate_inode_pages_range(&inode->i_data, page_start, page_end); > + ret =3D mtd->write(mtd, ofs, len, &retlen, buf); > + if (ret || (retlen !=3D len)) > + return -EIO; > + > + return 0; > +} > + > + > +static DECLARE_COMPLETION(logfs_erase_complete); empty line > +static void logfs_erase_callback(struct erase_info *ei) > +{ > + complete(&logfs_erase_complete); > +} dito > +int mtderase(struct super_block *sb, loff_t ofs, size_t len) > +{ > + struct mtd_info *mtd =3D LOGFS_SUPER(sb)->s_mtd; > + struct inode *inode =3D LOGFS_SUPER(sb)->s_dev_inode; > + struct erase_info ei; > + int ret; > + > + BUG_ON(len % mtd->erasesize); > + > + truncate_inode_pages_range(&inode->i_data, ofs, ofs+len-1); > + if (mtd->block_isbad(mtd, ofs)) > + return -EIO; this actually leads to a double check of block_isbad for blocks which are not bad.=20 > + memset(&ei, 0, sizeof(ei)); > + ei.mtd =3D mtd; > + ei.addr =3D ofs; > + ei.len =3D len; > + ei.callback =3D logfs_erase_callback; > + ret =3D mtd->erase(mtd, &ei); > + if (ret) > + return -EIO; > + > + wait_for_completion(&logfs_erase_complete); > + if (ei.state !=3D MTD_ERASE_DONE) > + return -EIO; > + return 0; > +} > + > + > + > +void *logfs_device_getpage(struct super_block *sb, u64 offset, > + struct page **page) > +{ > + struct inode *inode =3D LOGFS_SUPER(sb)->s_dev_inode; > + > + *page =3D read_cache_page(inode->i_mapping, offset >> PAGE_CACHE_SH= IFT, > + logfs_readdevice, NULL); > + BUG_ON(IS_ERR(*page)); /* TODO: use mempool here */ For the BUG ? > + return kmap(*page); > +} > + > + > +static int logfs_get_sb_final(struct super_block *sb, struct vfsmoun= t *mnt) > +{ > + struct inode *rootdir; > + int err; > + > + /* root dir */ > + rootdir =3D iget(sb, LOGFS_INO_ROOT); > + if (!rootdir) > + goto fail; > + > + sb->s_root =3D d_alloc_root(rootdir); > + if (!sb->s_root) > + goto fail; > + > +#if 1 > + err =3D logfs_fsck(sb); > +#else > + err =3D 0; > +#endif Please cleanup > + if (err) { > + printk(KERN_ERR "LOGFS: fsck failed, refusing to mount\n"); > + goto fail; > + } > + > + return simple_set_mnt(mnt, sb); > + > +fail: > + iput(LOGFS_SUPER(sb)->s_master_inode); > + return -EIO; > +} > + > + > + > + > + > +static int logfs_get_sb(struct file_system_type *type, int flags, > + const char *devname, void *data, struct vfsmount *mnt) > +{ > + ulong mtdnr; > + struct mtd_info *mtd; > + > +#if 0 > + if (!devname) > + return ERR_PTR(-EINVAL); > + if (strncmp(devname, "mtd", 3)) > + return ERR_PTR(-EINVAL); > + > + { > + char *garbage; > + mtdnr =3D simple_strtoul(devname+3, &garbage, 0); > + if (*garbage) > + return ERR_PTR(-EINVAL); > + } > +#else > + mtdnr =3D 0; > +#endif > + Please cleanup > + mtd =3D get_mtd_device(NULL, mtdnr); > + if (!mtd) > + return -EINVAL; > + > + return logfs_get_sb_mtd(type, flags, mtd, mnt); > +} > + > +-- /dev/null 2007-04-18 05:32:26.652341749 +0200 > +++ linux-2.6.21logfs/fs/logfs/progs/mkfs.c 2007-05-07 13:32:12.00000= 0000 +0200 why needs this to be in a sub directory ? And shouldn't this be user space tools - or what I'm missing here ? > @@ -0,0 +1,319 @@ Comment, license > +#include "../logfs.h" > + > +#define OFS_SB 0 > +#define OFS_JOURNAL 1 > +#define OFS_ROOTDIR 3 > +#define OFS_IFILE 4 > +#define OFS_COUNT 5 enum ? > +static u64 segment_offset[OFS_COUNT]; > + > +static u64 fssize; > +static u64 no_segs; > +static u64 free_blocks; > + > +static u32 segsize; > +static u32 blocksize; > +static int segshift; > +static int blockshift; > +static int writeshift; > + > +static u32 blocks_per_seg; > +static u16 version; > + > +static be32 bb_array[1024]; > +static int bb_count; > + > + > +#if 0 > +/* rootdir */ > +static int make_rootdir(struct super_block *sb) > +{ > + struct logfs_disk_inode *di; > + int ret; > + > + di =3D kzalloc(blocksize, GFP_KERNEL); > + if (!di) > + return -ENOMEM; > + > + di->di_flags =3D cpu_to_be32(LOGFS_IF_VALID); > + di->di_mode =3D cpu_to_be16(S_IFDIR | 0755); > + di->di_refcount =3D cpu_to_be32(2); > + ret =3D mtdwrite(sb, segment_offset[OFS_ROOTDIR], blocksize, di); > + kfree(di); > + return ret; > +} > + > + > +/* summary */ > +static int make_summary(struct super_block *sb) > +{ > + struct logfs_disk_sum *sum; > + u64 sum_ofs; > + int ret; > + > + sum =3D kzalloc(LOGFS_BLOCKSIZE, GFP_KERNEL); > + if (!sum) > + return -ENOMEM; > + memset(sum, 0xff, LOGFS_BLOCKSIZE); > + > + sum->oids[0].ino =3D cpu_to_be64(LOGFS_INO_MASTER); > + sum->oids[0].pos =3D cpu_to_be64(LOGFS_INO_ROOT); > + sum_ofs =3D segment_offset[OFS_ROOTDIR]; > + sum_ofs +=3D segsize - blocksize; > + sum->level =3D LOGFS_MAX_LEVELS; > + ret =3D mtdwrite(sb, sum_ofs, LOGFS_BLOCKSIZE, sum); > + kfree(sum); > + return ret; > +} > +#endif Please remove > + > +/* journal */ > +static size_t __write_header(struct logfs_journal_header *h, size_t = len, > + size_t datalen, u16 type, u8 compr) > +{ > + h->h_len =3D cpu_to_be16(len); > + h->h_type =3D cpu_to_be16(type); > + h->h_version =3D cpu_to_be16(++version); > + h->h_datalen =3D cpu_to_be16(datalen); > + h->h_compr =3D compr; > + h->h_pad[0] =3D 'h'; > + h->h_pad[1] =3D 'a'; > + h->h_pad[2] =3D 't'; > + h->h_crc =3D logfs_crc32(h, len, 4); > + return len; > +} > +static size_t write_header(struct logfs_journal_header *h, size_t da= talen, > + u16 type) > +{ > + size_t len =3D datalen + sizeof(*h); > + return __write_header(h, len, datalen, type, COMPR_NONE); > +} > +static size_t je_badsegments(void *data, u16 *type) > +{ > + memcpy(data, bb_array, blocksize); > + *type =3D JE_BADSEGMENTS; > + return blocksize; > +} > +static size_t je_anchor(void *_da, u16 *type) > +{ > + struct logfs_anchor *da =3D _da; > + > + memset(da, 0, sizeof(*da)); > + da->da_last_ino =3D cpu_to_be64(LOGFS_RESERVED_INOS); > + da->da_size =3D cpu_to_be64((LOGFS_INO_ROOT+1) * blocksize); > +#if 0 > + da->da_used_bytes =3D cpu_to_be64(blocksize); > + da->da_data[LOGFS_INO_ROOT] =3D cpu_to_be64(3*segsize); > +#else > + da->da_data[LOGFS_INO_ROOT] =3D 0; > +#endif Please cleanup > + *type =3D JE_ANCHOR; > + return sizeof(*da); > +} Empty line > +static size_t je_dynsb(void *_dynsb, u16 *type) > +{ > + struct logfs_dynsb *dynsb =3D _dynsb; > + > + memset(dynsb, 0, sizeof(*dynsb)); > + dynsb->ds_used_bytes =3D cpu_to_be64(blocksize); > + *type =3D JE_DYNSB; > + return sizeof(*dynsb); > +} Same > +static size_t je_commit(void *h, u16 *type) > +{ > + *type =3D JE_COMMIT; > + return 0; > +} Same > +static size_t write_je(size_t jpos, void *scratch, void *header, > + size_t (*write)(void *scratch, u16 *type)) > +{ > + void *data; > + ssize_t len, max, compr_len, pad_len, full_len; > + u16 type; > + u8 compr =3D COMPR_ZLIB; > + > + header +=3D jpos; > + data =3D header + sizeof(struct logfs_journal_header); > + > + len =3D write(scratch, &type); > + if (len =3D=3D 0) > + return write_header(header, 0, type); > + > + max =3D blocksize - jpos; > + compr_len =3D logfs_compress(scratch, data, len, max); > + if ((compr_len < 0) || (type =3D=3D JE_ANCHOR)) { > + compr_len =3D logfs_memcpy(scratch, data, len, max); > + compr =3D COMPR_NONE; > + } > + BUG_ON(compr_len < 0); > + > + pad_len =3D ALIGN(compr_len, 16); > + memset(data + compr_len, 0, pad_len - compr_len); > + full_len =3D pad_len + sizeof(struct logfs_journal_header); > + > + return __write_header(header, full_len, len, type, compr); > +} Same > +static int make_journal(struct super_block *sb) > +{ > + void *journal, *scratch; > + size_t jpos; > + int ret; > + > + journal =3D kzalloc(2*blocksize, GFP_KERNEL); > + if (!journal) > + return -ENOMEM; > + > + scratch =3D journal + blocksize; > + > + jpos =3D 0; > + /* erasecount is not written - implicitly set to 0 */ > + /* neither are summary, index, wbuf */ > + jpos +=3D write_je(jpos, scratch, journal, je_badsegments); > + jpos +=3D write_je(jpos, scratch, journal, je_anchor); > + jpos +=3D write_je(jpos, scratch, journal, je_dynsb); > + jpos +=3D write_je(jpos, scratch, journal, je_commit); > + ret =3D mtdwrite(sb, segment_offset[OFS_JOURNAL], blocksize, journa= l); > + kfree(journal); > + return ret; > +} > + > + > +/* superblock */ > +static int make_super(struct super_block *sb, struct logfs_disk_supe= r *ds) > +{ > + void *sector; > + int ret; > + > + sector =3D kzalloc(4096, GFP_KERNEL); > + if (!sector) > + return -ENOMEM; > + > + memset(ds, 0, sizeof(*ds)); > + > + ds->ds_magic =3D cpu_to_be64(LOGFS_MAGIC); > +#if 0 /* sane defaults */ > + ds->ds_ifile_levels =3D 3; /* 2+1, 1GiB */ > + ds->ds_iblock_levels =3D 4; /* 3+1, 512GiB */ > + ds->ds_data_levels =3D 3; /* old, young, unknown */ > +#else > + ds->ds_ifile_levels =3D 1; /* 0+1, 80kiB */ > + ds->ds_iblock_levels =3D 4; /* 3+1, 512GiB */ > + ds->ds_data_levels =3D 1; /* unknown */ > +#endif Please cleanup > + ds->ds_feature_incompat =3D 0; > + ds->ds_feature_ro_compat=3D 0; > + > + ds->ds_feature_compat =3D 0; > + ds->ds_flags =3D 0; > + > + ds->ds_filesystem_size =3D cpu_to_be64(fssize); > + ds->ds_segment_shift =3D segshift; > + ds->ds_block_shift =3D blockshift; > + ds->ds_write_shift =3D writeshift; > + > + ds->ds_journal_seg[0] =3D cpu_to_be64(1); > + ds->ds_journal_seg[1] =3D cpu_to_be64(2); > + ds->ds_journal_seg[2] =3D 0; > + ds->ds_journal_seg[3] =3D 0; > + > + ds->ds_root_reserve =3D 0; > + > + ds->ds_crc =3D logfs_crc32(ds, sizeof(*ds), 12); > + > + memcpy(sector, ds, sizeof(*ds)); > + ret =3D mtdwrite(sb, segment_offset[OFS_SB], 4096, sector); > + kfree(sector); > + return ret; > +} > + > + > +int logfs_mkfs(struct super_block *sb, struct logfs_disk_super *ds) > +{ > + int ret =3D 0; > + > + segshift =3D 17; > + blockshift =3D 12; > + writeshift =3D 8; > + > + segsize =3D 1 << segshift; > + blocksize =3D 1 << blockshift; > + version =3D 0; > + > + getsize(sb, &fssize, &no_segs); > + > + /* 3 segs for sb and journal, > + * 1 block per seg extra, > + * 1 block for rootdir > + */ > + blocks_per_seg =3D 1 << (segshift - blockshift); > + free_blocks =3D (no_segs - 3) * (blocks_per_seg - 1) - 1; > + > + ret =3D bad_block_scan(sb); > + if (ret) > + return ret; > + > + { > + int i; > + for (i=3D0; i + printk("%x->%llx\n", i, segment_offset[i]); > + } > + > +#if 0 > + ret =3D make_rootdir(sb); > + if (ret) > + return ret; > + > + ret =3D make_summary(sb); > + if (ret) > + return ret; > +#endif Same > + ret =3D make_journal(sb); > + if (ret) > + return ret; > + > + ret =3D make_super(sb, ds); > + if (ret) > + return ret; > + > + return 0; > +} > --- /dev/null 2007-04-18 05:32:26.652341749 +0200 > +++ linux-2.6.21logfs/fs/logfs/progs/fsck.c 2007-05-07 13:32:12.00000= 0000 +0200 > @@ -0,0 +1,323 @@ Comment, license > +#include "../logfs.h" > + > +static u64 used_bytes; > +static u64 free_bytes; > +static u64 last_ino; > +static u64 *inode_bytes; > +static u64 *inode_links; > + > + > +/** > + * Pass 1: blocks > + */ > + > + > +static void safe_read(struct super_block *sb, u32 segno, u32 ofs, > + size_t len, void *buf) > +{ > + BUG_ON(wbuf_read(sb, dev_ofs(sb, segno, ofs), len, buf)); > +} Empty line > +static u32 logfs_free_bytes(struct super_block *sb, u32 segno) > +{ > +static void logfsck_blocks(struct super_block *sb) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + int i; > + int free; > + > + for (i=3D0; is_no_segs; i++) { > + free =3D logfs_free_bytes(sb, i); > + free_bytes +=3D free; > + printk(" %3x", free); > + if (i % 8 =3D=3D 7) > + printk(" : "); > + if (i % 16 =3D=3D 15) > + printk("\n"); > + } > + printk("\n"); printk with loglevels and identifiable origin please > + > + > +static s64 dir_seek_data(struct inode *inode, s64 pos) > +{ > + s64 new_pos =3D logfs_seek_data(inode, pos); new line > + return max((s64)pos, new_pos - 1); > +} > + > + > +static int __logfsck_dirs(struct inode *dir) > +{ > + struct inode *inode; > + loff_t pos; > + u64 ino; > + u8 type; > + int cookie, err, ret =3D 0; > + > + for (pos=3D0; ; pos++) { > + err =3D read_one_dd(dir, pos, &ino, &type); > + //yield(); great. cond_resched() if you really need to > + if (err =3D=3D -ENODATA) { /* dentry was deleted */ > + pos =3D dir_seek_data(dir, pos); > + continue; > + } > + if (err =3D=3D -EOF) > + break; > + if (err) > + goto error0; > + > + err =3D -EIO; > + if (ino > last_ino) { > + printk("ino %llx > last_ino %llx\n", ino, last_ino); loglevel ..... > + goto error0; > + } > + inode =3D logfs_iget(dir->i_sb, ino, &cookie); > + if (!inode) { > + printk("Could not find inode #%llx\n", ino); > + goto error0; > + } > + if (type !=3D logfs_type(inode)) { > + printk("dd type %x !=3D inode type %x\n", type, > + logfs_type(inode)); dito > + goto error1; > + } > + inode_links[ino]++; > + err =3D 0; > + if (type =3D=3D DT_DIR) { > + inode_links[dir->i_ino]++; > + inode_links[ino]++; > + err =3D __logfsck_dirs(inode); > + } > +error1: > + logfs_iput(inode, cookie); > +error0: > + if (!ret) > + ret =3D err; > + continue; > + } > + return 1; > +} > + > + > +/** > + * Pass 3: inodes > + */ > + > + > +static int logfs_check_inode(struct inode *inode) > +{ > + struct logfs_inode *li =3D LOGFS_INODE(inode); > + u64 bytes0 =3D li->li_used_bytes; > + u64 bytes1 =3D inode_bytes[inode->i_ino]; > + u64 links0 =3D inode->i_nlink; > + u64 links1 =3D inode_links[inode->i_ino]; > + > + if (bytes0 || bytes1 || links0 || links1 > + || inode->i_ino =3D=3D LOGFS_SUPER(inode->i_sb)->s_last_ino) > + printk("%lx: %llx(%llx) bytes, %llx(%llx) links\n", > + inode->i_ino, bytes0, bytes1, links0, links1); Sigh > + used_bytes +=3D bytes0; > + return (bytes0 =3D=3D bytes1) && (links0 =3D=3D links1); > +} > + > + > +static int logfs_check_ino(struct super_block *sb, u64 ino) > +{ > + struct inode *inode; > + int ret, cookie; > + > + //yield(); See above instance of //yield(); > + inode =3D logfs_iget(sb, ino, &cookie); > + if (!inode) > + return 1; > + ret =3D logfs_check_inode(inode); > + logfs_iput(inode, cookie); > + return ret; > +} > + > + > + > +static int logfsck_stats(struct super_block *sb) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + u64 ostore_segs, total, expected; > + int i, reserved_segs; > + > + reserved_segs =3D 1; /* super_block */ > + journal_for_each(i) > + if (super->s_journal_seg[i]) > + reserved_segs++; > + reserved_segs +=3D super->s_bad_segments; > + > + ostore_segs =3D super->s_no_segs - reserved_segs; > + expected =3D ostore_segs << super->s_segshift; > + total =3D free_bytes + used_bytes; > + > + printk("free:%8llx, used:%8llx, total:%8llx", > + free_bytes, used_bytes, expected); loglevel > + if (total > expected) > + printk(" + %llx\n", total - expected); > + else if (total < expected) > + printk(" - %llx\n", expected - total); > + else > + printk("\n"); > + > + return total =3D=3D expected; > +} > + > + > +static int __logfs_fsck(struct super_block *sb) > +{ > + int ret; > + int err =3D 0; > + > + /* pass 1: check blocks */ > + logfsck_blocks(sb); > + /* pass 2: check directories */ > + ret =3D logfsck_dirs(sb); > + if (!ret) { > + printk("Pass 2: directory check failed\n"); same > + err =3D -EIO; > + } > + /* pass 3: check inodes */ > + ret =3D logfsck_inodes(sb); > + if (!ret) { > + printk("Pass 3: inode check failed\n"); same > + err =3D -EIO; > + } > + /* Pass 4: Total blocks */ > + ret =3D logfsck_stats(sb); > + if (!ret) { > + printk("Pass 4: statistic check failed\n"); same > + err =3D -EIO; > + } > + > + return err; > +} > + > + > +int logfs_fsck(struct super_block *sb) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + int ret =3D -ENOMEM; > + > + used_bytes =3D 0; > + free_bytes =3D 0; > + last_ino =3D super->s_last_ino; > + inode_bytes =3D kzalloc(last_ino * sizeof(be64), GFP_KERNEL); > + if (!inode_bytes) > + goto out0; return ret; > + inode_links =3D kzalloc(last_ino * sizeof(be64), GFP_KERNEL); > + if (!inode_links) > + goto out1; > + > + ret =3D __logfs_fsck(sb); > + > + kfree(inode_links); > + inode_links =3D NULL; > +out1: > + kfree(inode_bytes); > + inode_bytes =3D NULL; > +out0: > + return ret; > +} > --- /dev/null 2007-04-18 05:32:26.652341749 +0200 > +++ linux-2.6.21logfs/fs/logfs/Locking 2007-05-07 13:32:12.000000000 = +0200 > @@ -0,0 +1,45 @@ Can you move this into documentation please > --- /dev/null 2007-04-18 05:32:26.652341749 +0200 > +++ linux-2.6.21logfs/fs/logfs/compr.c 2007-05-07 13:32:12.000000000 = +0200 > @@ -0,0 +1,198 @@ Comment, license > +#include "logfs.h" > +#include > +#include > + > +#define COMPR_LEVEL 3 > + > +static DEFINE_MUTEX(compr_mutex); > +static struct z_stream_s stream; > + > + > +int logfs_memcpy(void *in, void *out, size_t inlen, size_t outlen) > +{ > + if (outlen < inlen) > + return -EIO; > + memcpy(out, in, inlen); > + return inlen; > +} > + > + > +int logfs_compress_vec(struct kvec *vec, int count, void *out, size_= t outlen) > +{ > + int i, ret; > + > + mutex_lock(&compr_mutex); > + ret =3D zlib_deflateInit(&stream, COMPR_LEVEL); > + if (ret !=3D Z_OK) > + goto error; > + > + stream.total_in =3D 0; > + stream.total_out =3D 0; > + > + for (i=3D0; i + stream.next_in =3D vec[i].iov_base; > + stream.avail_in =3D vec[i].iov_len; > + stream.next_out =3D out + stream.total_out; > + stream.avail_out =3D outlen - stream.total_out; > + > + ret =3D zlib_deflate(&stream, Z_NO_FLUSH); > + if (ret !=3D Z_OK) > + goto error; > + /* if (stream.total_out >=3D outlen) > + goto error; */ ??? > + } > + > + stream.next_in =3D vec[count-1].iov_base; > + stream.avail_in =3D vec[count-1].iov_len; > + stream.next_out =3D out + stream.total_out; > + stream.avail_out =3D outlen - stream.total_out; > + > + ret =3D zlib_deflate(&stream, Z_FINISH); > + if (ret !=3D Z_STREAM_END) > + goto error; > + /* if (stream.total_out >=3D outlen) > + goto error; */ ??? > + ret =3D zlib_deflateEnd(&stream); > + if (ret !=3D Z_OK) > + goto error; > + > + if (stream.total_out >=3D stream.total_in) > + goto error; > + > + ret =3D stream.total_out; > + mutex_unlock(&compr_mutex); > + return ret; > +error: > + mutex_unlock(&compr_mutex); > + return -EIO; > +} > + > + > +int logfs_uncompress_vec(void *in, size_t inlen, struct kvec *vec, i= nt count) > +{ > + int i, ret; > + > + mutex_lock(&compr_mutex); > + ret =3D zlib_inflateInit(&stream); > + if (ret !=3D Z_OK) > + goto error; > + > + stream.total_in =3D 0; > + stream.total_out =3D 0; > + > + for (i=3D0; i + stream.next_in =3D in + stream.total_in; > + stream.avail_in =3D inlen - stream.total_in; > + stream.next_out =3D vec[i].iov_base; > + stream.avail_out =3D vec[i].iov_len; > + > + ret =3D zlib_inflate(&stream, Z_NO_FLUSH); > + if (ret !=3D Z_OK) > + goto error; > + } > + stream.next_in =3D in + stream.total_in; > + stream.avail_in =3D inlen - stream.total_in; > + stream.next_out =3D vec[count-1].iov_base; > + stream.avail_out =3D vec[count-1].iov_len; > + > + ret =3D zlib_inflate(&stream, Z_FINISH); > + if (ret !=3D Z_STREAM_END) > + goto error; > + > + ret =3D zlib_inflateEnd(&stream); > + if (ret !=3D Z_OK) > + goto error; > + > + mutex_unlock(&compr_mutex); > + return ret; > +error: > + mutex_unlock(&compr_mutex); > + return -EIO; Sigh. Can you please make this a bit more clever ? > +} > + > + > +int logfs_uncompress(void *in, void *out, size_t inlen, size_t outle= n) > +{ > + int ret; > + > + mutex_lock(&compr_mutex); > + ret =3D zlib_inflateInit(&stream); > + if (ret !=3D Z_OK) > + goto error; > + > + stream.next_in =3D in; > + stream.avail_in =3D inlen; > + stream.total_in =3D 0; > + stream.next_out =3D out; > + stream.avail_out =3D outlen; > + stream.total_out =3D 0; > + > + ret =3D zlib_inflate(&stream, Z_FINISH); > + if (ret !=3D Z_STREAM_END) > + goto error; > + > + ret =3D zlib_inflateEnd(&stream); > + if (ret !=3D Z_OK) > + goto error; > + > + mutex_unlock(&compr_mutex); > + return ret; > +error: > + mutex_unlock(&compr_mutex); > + return -EIO; Same here > +} > + > +int __init logfs_compr_init(void) > +{ > + size_t size =3D max(zlib_deflate_workspacesize(), > + zlib_inflate_workspacesize()); > + printk("deflate size: %x\n", zlib_deflate_workspacesize()); > + printk("inflate size: %x\n", zlib_inflate_workspacesize()); loglevel > + stream.workspace =3D vmalloc(size); > + if (!stream.workspace) > + return -ENOMEM; > + return 0; > +} > + > +void __exit logfs_compr_exit(void) > +{ > + vfree(stream.workspace); > +} > --- /dev/null 2007-04-18 05:32:26.652341749 +0200 > +++ linux-2.6.21logfs/fs/logfs/segment.c 2007-05-07 20:41:17.00000000= 0 +0200 > @@ -0,0 +1,533 @@ Comment, license > +#include "logfs.h" > + > + > + > +#define HEADER_SIZE sizeof(struct logfs_object_header) empty line > +s64 __logfs_segment_write(struct inode *inode, void *buf, u64 pos, i= nt level, > + int alloc, int len, int compr) > +{ > + struct logfs_area *area; > + struct super_block *sb =3D inode->i_sb; > + u64 ofs; > + u64 ino =3D inode->i_ino; > + int err; > + struct logfs_object_header h; > + > + h.crc =3D cpu_to_be32(0xcccccccc); > + h.len =3D cpu_to_be16(len); > + h.type =3D OBJ_BLOCK; > + h.compr =3D compr; > + h.ino =3D cpu_to_be64(inode->i_ino); > + h.pos =3D cpu_to_be64(pos); > + > + level =3D adj_level(ino, level); > + area =3D get_area(sb, level); > + ofs =3D __logfs_get_free_bytes(area, ino, pos, len + HEADER_SIZE); > + LOGFS_BUG_ON(ofs <=3D 0, sb); > + //printk("alloc: (%llx, %llx, %llx, %x)\n", ino, pos, ret, level); clean up > + err =3D buf_write(area, ofs, &h, sizeof(h)); > + if (!err) > + err =3D buf_write(area, ofs + HEADER_SIZE, buf, len); > + BUG_ON(err); > + if (err) > + return err; > + if (alloc) { > + int acc_len =3D (level=3D=3D0) ? len : sb->s_blocksize; > + logfs_consume_bytes(inode, acc_len + HEADER_SIZE); > + } > + > + logfs_close_area(area); /* FIXME merge with open_area */ > + > + //printk(" (%llx, %llx, %llx)\n", ofs, ino, pos); same > + return ofs; > +} > + > + > + > + > +int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf= ) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + struct logfs_area *area; > + u32 segno =3D ofs >> super->s_segshift; > + int i, err; > + > + err =3D mtdread(sb, ofs, len, buf); > + if (err) > + return err; > + > + for (i=3D0; i + area =3D super->s_area[i]; > + if (area->a_segno =3D=3D segno) { > + fixup_from_wbuf(sb, area, buf, ofs, len); > + break; > + } > + } > + return 0; > +} > + > + > +int logfs_segment_read(struct super_block *sb, void *buf, u64 ofs) > +{ > + struct logfs_object_header *h; > + u16 len; > + int err, bs =3D sb->s_blocksize; > + > + mutex_lock(&compr_mutex); > + err =3D wbuf_read(sb, ofs, bs+24, compressor_buf); > + if (err) > + goto out; > + h =3D (void*)compressor_buf; please use proper typecasts > + len =3D be16_to_cpu(h->len); > + > + switch (h->compr) { > + case COMPR_NONE: > + logfs_memcpy(compressor_buf+24, buf, bs, bs); > + break; > + case COMPR_ZLIB: > + err =3D logfs_uncompress(compressor_buf+24, buf, len, bs); > + BUG_ON(err); > + break; > + default: > + LOGFS_BUG(sb); > + } > +out: > + mutex_unlock(&compr_mutex); > + return err; > +} > + > + > +static u64 logfs_block_mask[] =3D { > + ~0, > + ~(I1_BLOCKS-1), > + ~(I2_BLOCKS-1), > + ~(I3_BLOCKS-1) > +}; Empty line please > +static int check_pos(struct super_block *sb, u64 pos1, u64 pos2, int= level) > +{ > + LOGFS_BUG_ON( (pos1 & logfs_block_mask[level]) !=3D > + (pos2 & logfs_block_mask[level]), sb); > +} empty line > +int logfs_segment_delete(struct inode *inode, u64 ofs, u64 pos, int = level) > +{ > + struct super_block *sb =3D inode->i_sb; > + struct logfs_object_header *h; > + u16 len; > + int err; > + > + > + mutex_lock(&compr_mutex); > + err =3D wbuf_read(sb, ofs, 4096+24, compressor_buf); > + LOGFS_BUG_ON(err, sb); > + h =3D (void*)compressor_buf; proper typecast > + len =3D be16_to_cpu(h->len); > + check_pos(sb, pos, be64_to_cpu(h->pos), level); > + mutex_unlock(&compr_mutex); > + > + level =3D adj_level(inode->i_ino, level); > + len =3D (level=3D=3D0) ? len : sb->s_blocksize; > + logfs_remove_bytes(inode, len + sizeof(*h)); > + return 0; > +} > + > + > +int logfs_open_area(struct logfs_area *area) > +{ > + if (area->a_is_open) > + return 0; /* nothing to do */ yeah, another really helpful comment > + area->a_ops->get_free_segment(area); > + area->a_used_objects =3D 0; > + area->a_used_bytes =3D 0; > + area->a_ops->get_erase_count(area); > + > + area->a_ops->clear_blocks(area); > + area->a_is_open =3D 1; > + > + return area->a_ops->erase_segment(area); > +} > + > +static void ostore_get_free_segment(struct logfs_area *area) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(area->a_sb); > + struct logfs_segment *seg; > + > + BUG_ON(list_empty(&super->s_free_list)); > + > + seg =3D list_entry(super->s_free_list.prev, struct logfs_segment, l= ist); > + list_del(&seg->list); > + area->a_segno =3D seg->segno; > + kfree(seg); > + super->s_free_count -=3D 1; get_free_segment actually kfree's a segment ? Please use a less misleading function name > +} > + > + > +static void ostore_get_erase_count(struct logfs_area *area) > +{ > + struct logfs_segment_header h; > + > + device_read(area->a_sb, area->a_segno, 0, sizeof(h), &h); error handling > + area->a_erase_count =3D be32_to_cpu(h.ec) + 1; > +} > + > + > + > +static int ostore_erase_segment(struct logfs_area *area) > +{ > + struct logfs_segment_header h; > + u64 ofs; > + int err; > + > + err =3D logfs_erase_segment(area->a_sb, area->a_segno); > + if (err) > + return err; > + > + h.len =3D 0; > + h.type =3D OBJ_OSTORE; > + h.level =3D area->a_level; > + h.segno =3D cpu_to_be32(area->a_segno); > + h.ec =3D cpu_to_be32(area->a_erase_count); > + h.gec =3D cpu_to_be64(LOGFS_SUPER(area->a_sb)->s_gec); > + h.crc =3D logfs_crc32(&h, sizeof(h), 4); > + /* FIXME: write it out */ isn't that what buf_write() does ? > + ofs =3D dev_ofs(area->a_sb, area->a_segno, 0); > + area->a_used_bytes =3D sizeof(h); > + return buf_write(area, ofs, &h, sizeof(h)); > +} > + > + > +static void flush_buf(struct logfs_area *area) > +{ > + struct super_block *sb =3D area->a_sb; > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + u32 used, free; > + u64 ofs; > + u32 writemask =3D super->s_writesize - 1; > + int err; > + > + ofs =3D dev_ofs(sb, area->a_segno, area->a_used_bytes); > + ofs &=3D ~writemask; > + used =3D area->a_used_bytes & writemask; > + free =3D super->s_writesize - area->a_used_bytes; > + free &=3D writemask; > + //printk("flush(%llx, %x, %x)\n", ofs, used, free); sigh > + if (used =3D=3D 0) > + return; > + > + TRACE(); sigh more > + memset(area->a_wbuf + used, 0xff, free); > + err =3D mtdwrite(sb, ofs, super->s_writesize, area->a_wbuf); > + LOGFS_BUG_ON(err, sb); > +} > + > + > + > +int logfs_init_areas(struct super_block *sb) > +{ > + struct logfs_super *super =3D LOGFS_SUPER(sb); > + int i; > + > + super->s_journal_area =3D kzalloc(sizeof(struct logfs_area), GFP_KE= RNEL); > + if (!super->s_journal_area) > + return -ENOMEM; > + super->s_journal_area->a_sb =3D sb; > + > + for (i=3D0; i + super->s_area[i] =3D init_ostore_area(sb, i); > + if (!super->s_area[i]) > + goto err; > + } > + return 0; > + > +err: > + for (i--; i>=3D0; i--) same here > + cleanup_ostore_area(super->s_area[i]); > + kfree(super->s_journal_area); > + return -ENOMEM; > +} > + > + > +void logfs_cleanup_areas(struct logfs_super *super) > +{ > + int i; > + > + for (i=3D0; i + cleanup_ostore_area(super->s_area[i]); > + kfree(super->s_journal_area); > +} > --- /dev/null 2007-04-18 05:32:26.652341749 +0200 > +++ linux-2.6.21logfs/fs/logfs/memtree.c 2007-05-07 13:32:12.00000000= 0 +0200 > @@ -0,0 +1,199 @@ > +/* In-memory B+Tree. */ license and a little bit more description > +#include "logfs.h" > + > +#define BTREE_NODES 16 /* 32bit, 128 byte cacheline */ > +//#define BTREE_NODES 8 /* 32bit, 64 byte cacheline */ Please cleanup > +void *btree_lookup(struct btree_head *head, long val) > +{ > + int i, height =3D head->height; > + struct btree_node *node =3D head->node; > + > + if (val =3D=3D 0) > + return head->null_ptr; > + > + if (height =3D=3D 0) > + return NULL; > + > + for ( ; height > 1; height--) { > + for (i=3D0; i + if (node[i].val <=3D val) > + break; > + node =3D node[i].node; > + } > + > + for (i=3D0; i + if (node[i].val =3D=3D val) > + return node[i].node; > + > + return NULL; > +} > + > + > +static void find_pos(struct btree_node *node, long val, int *pos, in= t *fill) > +{ > + int i; > + > + for (i=3D0; i + if (node[i].val <=3D val) > + break; > + *pos =3D i; > + for (i=3D*pos; i + if (node[i].val =3D=3D 0) > + break; > + *fill =3D i; > +} > + > + > +static struct btree_node *find_level(struct btree_head *head, long v= al, > + int level) > +{ > + struct btree_node *node =3D head->node; > + int i, height =3D head->height; > + > + for ( ; height > level; height--) { > + for (i=3D0; i + if (node[i].val <=3D val) > + break; > + node =3D node[i].node; > + } > + return node; > +} > + > + > + > +static int btree_remove_level(struct btree_head *head, long val, int= level) > +{ > + struct btree_node *node; > + int i, pos, fill; > + > + if (val =3D=3D 0) { /* 0 identifies empty slots, so special-case th= is */ > + head->null_ptr =3D NULL; > + return 0; > + } > + > + node =3D find_level(head, val, level); > + find_pos(node, val, &pos, &fill); > + if (level =3D=3D 1) > + BUG_ON(node[pos].val !=3D val); > + > + /* remove and shift */ > + for (i=3Dpos; i + node[i].val =3D node[i+1].val; > + node[i].node =3D node[i+1].node; > + } > + node[fill-1].val =3D 0; > + node[fill-1].node =3D NULL; > + > + if (fill-1 < BTREE_NODES/2) { > + /* XXX */ YYYY perhaps ? > + } > + if (fill-1 =3D=3D 0) { > + btree_remove_level(head, val, level+1); > + kfree(node); > + return 0; > + } > + > + return 0; > +}