Embedded Linux development

Embedded Linux development
 help / color / mirror / Atom feed

* [PATCH 4/7] include/linux/printk.h: Add pr_<level>_once macros
From: Joe Perches @ 2010-12-06  5:44 UTC (permalink / raw)
  To: Andrew Morton, linux-kernel
  Cc: Paul Gortmaker, Matt Mackall, David Woodhouse, linux-embedded
In-Reply-To: <cover.1291611190.git.joe@perches.com>

Move printk_once definitions and add an #ifdef CONFIG_PRINTK
Add pr_<level>_once so printks can use pr_fmt

Signed-off-by: Joe Perches <joe@perches.com>
---
 include/linux/printk.h |   59 +++++++++++++++++++++++++++++++++++------------
 1 files changed, 44 insertions(+), 15 deletions(-)

diff --git a/include/linux/printk.h b/include/linux/printk.h
index e3858f2..8a9a2ee 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -111,18 +111,6 @@ extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 extern int printk_delay_msec;
 extern int dmesg_restrict;
 
-/*
- * Print a one-time message (analogous to WARN_ONCE() et al):
- */
-#define printk_once(x...) ({			\
-	static bool __print_once;		\
-						\
-	if (!__print_once) {			\
-		__print_once = true;		\
-		printk(x);			\
-	}					\
-})
-
 void log_buf_kexec_setup(void);
 #else
 static inline __attribute__ ((format (printf, 1, 0)))
@@ -145,9 +133,6 @@ static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 	return false;
 }
 
-/* No effect, but we still get type checking even in the !PRINTK case: */
-#define printk_once(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
-
 static inline void log_buf_kexec_setup(void)
 {
 }
@@ -214,6 +199,50 @@ extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
 #endif
 
 /*
+ * Print a one-time message (analogous to WARN_ONCE() et al):
+ */
+
+#ifdef CONFIG_PRINTK
+#define printk_once(fmt, ...)			\
+({						\
+	static bool __print_once;		\
+						\
+	if (!__print_once) {			\
+		__print_once = true;		\
+		printk(fmt, ##__VA_ARGS__);	\
+	}					\
+})
+#else
+#define printk_once(fmt, ...)			\
+	no_printk(fmt, ##__VA_ARGS__)
+#endif
+
+#define pr_emerg_once(fmt, ...)					\
+	printk_once(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_alert_once(fmt, ...)					\
+	printk_once(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_crit_once(fmt, ...)					\
+	printk_once(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_err_once(fmt, ...)					\
+	printk_once(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warn_once(fmt, ...)					\
+	printk_once(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_notice_once(fmt, ...)				\
+	printk_once(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_info_once(fmt, ...)					\
+	printk_once(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_cont_once(fmt, ...)					\
+	printk_once(KERN_CONT pr_fmt(fmt), ##__VA_ARGS__)
+/* If you are writing a driver, please use dev_dbg instead */
+#if defined(DEBUG)
+#define pr_debug_once(fmt, ...)					\
+	printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#else
+#define pr_debug_once(fmt, ...)					\
+	no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#endif
+
+/*
  * ratelimited messages with local ratelimit_state,
  * no local ratelimit_state used in the !PRINTK case
  */
-- 
1.7.3.2.245.g03276.dirty

^ permalink raw reply related

* [PATCH 5/7] include/linux/printk.h lib/hexdump.c: Neatening and add CONFIG_PRINTK guard
From: Joe Perches @ 2010-12-06  5:44 UTC (permalink / raw)
  To: Andrew Morton, linux-kernel
  Cc: Paul Gortmaker, Matt Mackall, David Woodhouse, linux-embedded
In-Reply-To: <cover.1291611190.git.joe@perches.com>

Move prototypes and align arguments.
Add CONFIG_PRINTK guard for print_hex functions

Signed-off-by: Joe Perches <joe@perches.com>
---
 include/linux/printk.h |   42 ++++++++++++++++++++++++++++--------------
 lib/hexdump.c          |    2 ++
 2 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 8a9a2ee..a705a91 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -140,20 +140,6 @@ static inline void log_buf_kexec_setup(void)
 
 extern void dump_stack(void) __cold;
 
-enum {
-	DUMP_PREFIX_NONE,
-	DUMP_PREFIX_ADDRESS,
-	DUMP_PREFIX_OFFSET
-};
-extern void hex_dump_to_buffer(const void *buf, size_t len,
-				int rowsize, int groupsize,
-				char *linebuf, size_t linebuflen, bool ascii);
-extern void print_hex_dump(const char *level, const char *prefix_str,
-				int prefix_type, int rowsize, int groupsize,
-				const void *buf, size_t len, bool ascii);
-extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
-			const void *buf, size_t len);
-
 #ifndef pr_fmt
 #define pr_fmt(fmt) fmt
 #endif
@@ -285,4 +271,32 @@ extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
 	no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
 #endif
 
+
+enum {
+	DUMP_PREFIX_NONE,
+	DUMP_PREFIX_ADDRESS,
+	DUMP_PREFIX_OFFSET
+};
+extern void hex_dump_to_buffer(const void *buf, size_t len,
+			       int rowsize, int groupsize,
+			       char *linebuf, size_t linebuflen, bool ascii);
+#ifdef CONFIG_PRINTK
+extern void print_hex_dump(const char *level, const char *prefix_str,
+			   int prefix_type, int rowsize, int groupsize,
+			   const void *buf, size_t len, bool ascii);
+extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
+				 const void *buf, size_t len);
+#else
+static inline void print_hex_dump(const char *level, const char *prefix_str,
+				  int prefix_type, int rowsize, int groupsize,
+				  const void *buf, size_t len, bool ascii)
+{
+}
+static inline void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
+					const void *buf, size_t len)
+{
+}
+
+#endif
+
 #endif
diff --git a/lib/hexdump.c b/lib/hexdump.c
index b66b2bd..f5fe6ba 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -154,6 +154,7 @@ nil:
 }
 EXPORT_SYMBOL(hex_dump_to_buffer);
 
+#ifdef CONFIG_PRINTK
 /**
  * print_hex_dump - print a text hex dump to syslog for a binary blob of data
  * @level: kernel log level (e.g. KERN_DEBUG)
@@ -238,3 +239,4 @@ void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
 		       buf, len, true);
 }
 EXPORT_SYMBOL(print_hex_dump_bytes);
+#endif
-- 
1.7.3.2.245.g03276.dirty

^ permalink raw reply related

* [PATCH 6/7] include/linux/printk.h: Organize printk_ratelimited macros
From: Joe Perches @ 2010-12-06  5:44 UTC (permalink / raw)
  To: Andrew Morton, linux-kernel
  Cc: Paul Gortmaker, Matt Mackall, David Woodhouse, linux-embedded
In-Reply-To: <cover.1291611190.git.joe@perches.com>

Use no_printk for !CONFIG_PRINTK printk_ratelimited.
Whitespace cleanup.

Signed-off-by: Joe Perches <joe@perches.com>
---
 include/linux/printk.h |   25 ++++++++++++-------------
 1 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/include/linux/printk.h b/include/linux/printk.h
index a705a91..b4be1b1 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -233,7 +233,8 @@ extern void dump_stack(void) __cold;
  * no local ratelimit_state used in the !PRINTK case
  */
 #ifdef CONFIG_PRINTK
-#define printk_ratelimited(fmt, ...)  ({				\
+#define printk_ratelimited(fmt, ...)					\
+({									\
 	static DEFINE_RATELIMIT_STATE(_rs,				\
 				      DEFAULT_RATELIMIT_INTERVAL,	\
 				      DEFAULT_RATELIMIT_BURST);		\
@@ -242,36 +243,34 @@ extern void dump_stack(void) __cold;
 		printk(fmt, ##__VA_ARGS__);				\
 })
 #else
-/* No effect, but we still get type checking even in the !PRINTK case: */
-#define printk_ratelimited printk
+#define printk_ratelimited(fmt, ...)					\
+	no_printk(fmt, ##__VA_ARGS__)
 #endif
 
-#define pr_emerg_ratelimited(fmt, ...) \
+#define pr_emerg_ratelimited(fmt, ...)					\
 	printk_ratelimited(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_alert_ratelimited(fmt, ...) \
+#define pr_alert_ratelimited(fmt, ...)					\
 	printk_ratelimited(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_crit_ratelimited(fmt, ...) \
+#define pr_crit_ratelimited(fmt, ...)					\
 	printk_ratelimited(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_err_ratelimited(fmt, ...) \
+#define pr_err_ratelimited(fmt, ...)					\
 	printk_ratelimited(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_warning_ratelimited(fmt, ...) \
+#define pr_warn_ratelimited(fmt, ...)					\
 	printk_ratelimited(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_warn_ratelimited pr_warning_ratelimited
-#define pr_notice_ratelimited(fmt, ...) \
+#define pr_notice_ratelimited(fmt, ...)					\
 	printk_ratelimited(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_info_ratelimited(fmt, ...) \
+#define pr_info_ratelimited(fmt, ...)					\
 	printk_ratelimited(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
 /* no pr_cont_ratelimited, don't do that... */
 /* If you are writing a driver, please use dev_dbg instead */
 #if defined(DEBUG)
-#define pr_debug_ratelimited(fmt, ...) \
+#define pr_debug_ratelimited(fmt, ...)					\
 	printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
 #else
 #define pr_debug_ratelimited(fmt, ...) \
 	no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
 #endif
 
-
 enum {
 	DUMP_PREFIX_NONE,
 	DUMP_PREFIX_ADDRESS,
-- 
1.7.3.2.245.g03276.dirty

^ permalink raw reply related

* [PATCH 7/7] include/linux/printk.h: Use tab not spaces for indent
From: Joe Perches @ 2010-12-06  5:44 UTC (permalink / raw)
  To: Andrew Morton, linux-kernel
  Cc: Paul Gortmaker, Matt Mackall, David Woodhouse, linux-embedded
In-Reply-To: <cover.1291611190.git.joe@perches.com>

Signed-off-by: Joe Perches <joe@perches.com>
---
 include/linux/printk.h |   14 +++++++-------
 1 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/linux/printk.h b/include/linux/printk.h
index b4be1b1..41388e3 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -145,20 +145,20 @@ extern void dump_stack(void) __cold;
 #endif
 
 #define pr_emerg(fmt, ...) \
-        printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
+	printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_alert(fmt, ...) \
-        printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
+	printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_crit(fmt, ...) \
-        printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
+	printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_err(fmt, ...) \
-        printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
+	printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_warning(fmt, ...) \
-        printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
+	printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_warn pr_warning
 #define pr_notice(fmt, ...) \
-        printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
+	printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_info(fmt, ...) \
-        printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
+	printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_cont(fmt, ...) \
 	printk(KERN_CONT fmt, ##__VA_ARGS__)
 
-- 
1.7.3.2.245.g03276.dirty

^ permalink raw reply related

* Re: [PATCH 0/7] printk: add pr_<level>_once, guard print_hex_dump
From: Matt Mackall @ 2010-12-06 15:37 UTC (permalink / raw)
  To: Joe Perches
  Cc: Andrew Morton, Paul Gortmaker, David Woodhouse, linux-embedded,
	linux-kernel
In-Reply-To: <cover.1291611190.git.joe@perches.com>

On Sun, 2010-12-05 at 21:44 -0800, Joe Perches wrote:
> There are many uses of printk_once(KERN_<level>.
> Add pr_<level>_once macros to avoid printk_once(KERN_<level> pr_fmt(fmt).
> Add an #ifdef CONFIG_PRINTK for print_hex_dump and static inline void
> functions for the #else cases to reduce embedded code size.
> Neaten and organize the rest of the code.

Looks fine to me. I'd missed the introduction of the pr_<level> macros
and I'm not sure if I like the idea, but this is a tidy and
well-presented cleanup and extension.

Acked-by: Matt Mackall <mpm@selenic.com>

-- 
Mathematics is the supreme nostalgia of our time.


^ permalink raw reply

* Re: [PATCH 0/7] printk: add pr_<level>_once, guard print_hex_dump
From: Joe Perches @ 2010-12-06 18:12 UTC (permalink / raw)
  To: Matt Mackall
  Cc: Andrew Morton, Paul Gortmaker, David Woodhouse, linux-embedded,
	linux-kernel
In-Reply-To: <1291649869.3065.2095.camel@calx>

On Mon, 2010-12-06 at 09:37 -0600, Matt Mackall wrote:
> On Sun, 2010-12-05 at 21:44 -0800, Joe Perches wrote:
> > There are many uses of printk_once(KERN_<level>.
> > Add pr_<level>_once macros to avoid printk_once(KERN_<level> pr_fmt(fmt).
> > Add an #ifdef CONFIG_PRINTK for print_hex_dump and static inline void
> > functions for the #else cases to reduce embedded code size.
> > Neaten and organize the rest of the code.
> Looks fine to me. I'd missed the introduction of the pr_<level> macros
> and I'm not sure if I like the idea, but this is a tidy and
> well-presented cleanup and extension.
> Acked-by: Matt Mackall <mpm@selenic.com>

The #ifdef CONFIG_PRINTK guard for print_hex_dump saves ~200
bytes in an x86 !CONFIG_PRINTK

There could be ~500 bytes more saved if hex_dump_to_buffer
was compiled out.

It's a more invasive change, so I didn't want to submit it
just now, but it could be something like below.

It requires the modules that use hex_dump_to_buffer, there
aren't many, to Kconfig select HEX_DUMP_TO_BUFFER so it's
not very pretty nor simple.

Thoughts?

---

 drivers/isdn/hardware/mISDN/Kconfig  |    2 +-
 drivers/media/video/hdpvr/Kconfig    |    1 +
 drivers/mfd/Kconfig                  |    1 +
 drivers/net/wireless/iwlwifi/Kconfig |    1 +
 drivers/scsi/osd/Kconfig             |    1 +
 include/linux/printk.h               |   14 ++++++++++++++
 init/Kconfig                         |    7 +++++++
 lib/Kconfig                          |    4 ++++
 lib/Kconfig.debug                    |    1 +
 lib/hexdump.c                        |    2 ++
 10 files changed, 33 insertions(+), 1 deletions(-)

diff --git a/drivers/isdn/hardware/mISDN/Kconfig b/drivers/isdn/hardware/mISDN/Kconfig
index eadc1cd..243eadf 100644
--- a/drivers/isdn/hardware/mISDN/Kconfig
+++ b/drivers/isdn/hardware/mISDN/Kconfig
@@ -90,4 +90,4 @@ config MISDN_IPAC
 config MISDN_ISAR
 	tristate
 	depends on MISDN
-
+	select HEX_DUMP_TO_BUFFER
diff --git a/drivers/media/video/hdpvr/Kconfig b/drivers/media/video/hdpvr/Kconfig
index de247f3..851a45f 100644
--- a/drivers/media/video/hdpvr/Kconfig
+++ b/drivers/media/video/hdpvr/Kconfig
@@ -2,6 +2,7 @@
 config VIDEO_HDPVR
 	tristate "Hauppauge HD PVR support"
 	depends on VIDEO_DEV
+	select HEX_DUMP_TO_BUFFER
 	---help---
 	  This is a video4linux driver for Hauppauge's HD PVR USB device.
 
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 3a7b891..982b27a 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -418,6 +418,7 @@ config MFD_WM8994
 config MFD_PCF50633
 	tristate "Support for NXP PCF50633"
 	depends on I2C
+	select HEX_DUMP_TO_BUFFER
 	help
 	  Say yes here if you have NXP PCF50633 chip on your board.
 	  This core driver provides register access and IRQ handling
diff --git a/drivers/net/wireless/iwlwifi/Kconfig b/drivers/net/wireless/iwlwifi/Kconfig
index ed42457..4074fa0 100644
--- a/drivers/net/wireless/iwlwifi/Kconfig
+++ b/drivers/net/wireless/iwlwifi/Kconfig
@@ -2,6 +2,7 @@ config IWLWIFI
 	tristate "Intel Wireless Wifi"
 	depends on PCI && MAC80211
 	select FW_LOADER
+	select HEX_DUMP_TO_BUFFER
 
 menu "Debugging Options"
 	depends on IWLWIFI
diff --git a/drivers/scsi/osd/Kconfig b/drivers/scsi/osd/Kconfig
index 861b5ce..c43df39 100644
--- a/drivers/scsi/osd/Kconfig
+++ b/drivers/scsi/osd/Kconfig
@@ -18,6 +18,7 @@
 config SCSI_OSD_INITIATOR
 	tristate "OSD-Initiator library"
 	depends on SCSI
+	select HEX_DUMP_TO_BUFFER
 	help
 		Enable the OSD-Initiator library (libosd.ko).
 		NOTE: You must also select CRYPTO_SHA1 + CRYPTO_HMAC and their
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 41388e3..38d918d 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -276,9 +276,23 @@ enum {
 	DUMP_PREFIX_ADDRESS,
 	DUMP_PREFIX_OFFSET
 };
+
+#ifdef CONFIG_HEX_DUMP_TO_BUFFER
 extern void hex_dump_to_buffer(const void *buf, size_t len,
 			       int rowsize, int groupsize,
 			       char *linebuf, size_t linebuflen, bool ascii);
+#else
+static inline
+void hex_dump_to_buffer(const void *buf, size_t len,
+			int rowsize, int groupsize,
+			char *linebuf, size_t linebuflen, bool ascii)
+{
+#ifndef CONFIG_EMBEDDED
+#error "Kconfig must select CONFIG_HEX_DUMP_TO_BUFFER"
+#endif
+}
+#endif
+
 #ifdef CONFIG_PRINTK
 extern void print_hex_dump(const char *level, const char *prefix_str,
 			   int prefix_type, int rowsize, int groupsize,
diff --git a/init/Kconfig b/init/Kconfig
index 3eb22ad..5ab5ad8 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -950,6 +950,13 @@ config PRINTK
 	  very difficult to diagnose system problems, saying N here is
 	  strongly discouraged.
 
+config HEX_DUMP_TO_BUFFER
+       default y
+       bool "Enable support for hexdump" if EMBEDDED
+       help
+	  This option enables normal hex_dump support.
+	  Saying N here is strongly discouraged.
+
 config BUG
 	bool "BUG() support" if EMBEDDED
 	default y
diff --git a/lib/Kconfig b/lib/Kconfig
index 3d498b2..2c95d1c 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -5,6 +5,10 @@
 config BINARY_PRINTF
 	def_bool n
 
+config HEX_DUMP_TO_BUFFER
+       bool
+       default n
+
 menu "Library routines"
 
 config RAID6_PQ
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 968d183..e3b0238 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -380,6 +380,7 @@ config DEBUG_KMEMLEAK
 	select STACKTRACE if STACKTRACE_SUPPORT
 	select KALLSYMS
 	select CRC32
+	select HEX_DUMP_TO_BUFFER
 	help
 	  Say Y here if you want to enable the memory leak
 	  detector. The memory allocation/freeing is traced in a way
diff --git a/lib/hexdump.c b/lib/hexdump.c
index f5fe6ba..88d70f4 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -49,6 +49,7 @@ void hex2bin(u8 *dst, const char *src, size_t count)
 }
 EXPORT_SYMBOL(hex2bin);
 
+#ifdef CONFIG_HEX_DUMP_TO_BUFFER
 /**
  * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory
  * @buf: data blob to dump
@@ -153,6 +154,7 @@ nil:
 	linebuf[lx++] = '\0';
 }
 EXPORT_SYMBOL(hex_dump_to_buffer);
+#endif
 
 #ifdef CONFIG_PRINTK
 /**


^ permalink raw reply related

* Re: [PATCH 0/7] printk: add pr_<level>_once, guard print_hex_dump
From: Matt Mackall @ 2010-12-06 18:16 UTC (permalink / raw)
  To: Joe Perches
  Cc: Andrew Morton, Paul Gortmaker, David Woodhouse, linux-embedded,
	linux-kernel
In-Reply-To: <1291659120.17494.179.camel@Joe-Laptop>

On Mon, 2010-12-06 at 10:12 -0800, Joe Perches wrote:
> On Mon, 2010-12-06 at 09:37 -0600, Matt Mackall wrote:
> > On Sun, 2010-12-05 at 21:44 -0800, Joe Perches wrote:
> > > There are many uses of printk_once(KERN_<level>.
> > > Add pr_<level>_once macros to avoid printk_once(KERN_<level> pr_fmt(fmt).
> > > Add an #ifdef CONFIG_PRINTK for print_hex_dump and static inline void
> > > functions for the #else cases to reduce embedded code size.
> > > Neaten and organize the rest of the code.
> > Looks fine to me. I'd missed the introduction of the pr_<level> macros
> > and I'm not sure if I like the idea, but this is a tidy and
> > well-presented cleanup and extension.
> > Acked-by: Matt Mackall <mpm@selenic.com>
> 
> The #ifdef CONFIG_PRINTK guard for print_hex_dump saves ~200
> bytes in an x86 !CONFIG_PRINTK
> 
> There could be ~500 bytes more saved if hex_dump_to_buffer
> was compiled out.

Can't say I'm excited by this approach. .5k is under my threshold for
this level of invasiveness.

-- 
Mathematics is the supreme nostalgia of our time.


^ permalink raw reply

* Re: [PATCH 0/7] printk: add pr_<level>_once, guard print_hex_dump
From: Joe Perches @ 2010-12-06 18:51 UTC (permalink / raw)
  To: Matt Mackall
  Cc: Andrew Morton, Paul Gortmaker, David Woodhouse, linux-embedded,
	linux-kernel
In-Reply-To: <1291659380.3065.2214.camel@calx>

On Mon, 2010-12-06 at 12:16 -0600, Matt Mackall wrote:
> On Mon, 2010-12-06 at 10:12 -0800, Joe Perches wrote:
> > There could be ~500 bytes more saved if hex_dump_to_buffer
> > was compiled out.
> Can't say I'm excited by this approach. .5k is under my threshold for
> this level of invasiveness.

Mine too, but I thought I'd mention it.



^ permalink raw reply

* CELF open project proposals for 2011 - Request for proposals
From: Tim Bird @ 2010-12-07 11:53 UTC (permalink / raw)
  To: celinux-dev@tree.celinuxforum.org, linux-embedded@vger.kernel.org

In addition to its other activities, each year, the CE Linux Forum
performs contract work to enhance embedded Linux.  We would like
to invite you to consider what areas of embedded Linux technology
and embedded Linux development could be improved, and submit
a proposal.

Any ideas or projects that you  think it would be good
for CELF to fund would be appreciated.

Please see more details and full instructions at:
http://elinux.org/CELF_Open_Project_Proposal_2011

Regards,
  -- Tim Bird
Architecture Group Chair
CE Linux Forum

^ permalink raw reply

* [PATCH 0/2] Squashfs: add XZ compression support
From: Phillip Lougher @ 2010-12-09  6:05 UTC (permalink / raw)
  To: Linux Kernel Development, linux-fsdevel, Linux Embedded Maillist
  Cc: Lasse Collin

Hi,

Following the recent posting of patches by Lasse Collin to add XZ (LZMA2)
support to the kernel (http://thread.gmane.org/gmane.linux.kernel/1071297),
I have added support for this to Squashfs.

Advantages of Squashfs XZ over the Squashfs LZMA implementation:

- Significantly better decompressor API supporting multi-call decoding,
   which requires less buffer overhead.
- Greater data robustness due to XZ's CRC32 check.
- BCJ filters which can produce smaller Squashfs images.

The following two patches add Squashfs kernel support.  A git tree with
these patches including Lasse Collin's patches is available here:

http://git.kernel.org/?p=linux/kernel/git/pkl/squashfs-xz.git;a=summary

XZ support has (obviously) also been added to the squashfs tools (Mksquashfs
& Unsquashfs).  These changes are available from the Squashfs CVS repository
(http://sourceforge.net/projects/squashfs/develop).

To build the Squashfs tools, edit the Makefile to enable XZ support
(by default it is disabled).

XZ compression can be specified by using the -comp option, e.g.
% mksquashfs xxx img.sqsh -comp xz

XZ BCJ filters (which can improve the compression of executable code
on certain architectures) are supported by using the -Xbcj option, e.g.

% mksquashfs xxx img.sqsh -comp xz -Xbcj x86

will compress blocks using XZ with no filter, and then XZ with the x86
filter in turn, and choose the best compression.

Multiple filters can be specified which is useful in cases where the source
file system has executable code from a mixture of different architectures,
and again each filter will be tried for each block and the best compression
used, e.g.

% mksquashfs xxx img.sqsh -comp xz -Xbcj x86,arm

will try both the x86 and arm BCJ filters.

Phillip

^ permalink raw reply

* [PATCH 1/2] Squashfs: add XZ compression support
From: Phillip Lougher @ 2010-12-09  6:08 UTC (permalink / raw)
  To: Linux Kernel Development, linux-fsdevel, Linux Embedded Maillist
  Cc: Lasse Collin


Add XZ decompressor wrapper code.

Signed-off-by: Phillip Lougher <phillip@lougher.demon.co.uk>
---
  fs/squashfs/squashfs_fs.h |    1 +
  fs/squashfs/xz_wrapper.c  |  153 +++++++++++++++++++++++++++++++++++++++++++++
  2 files changed, 154 insertions(+), 0 deletions(-)
  create mode 100644 fs/squashfs/xz_wrapper.c

diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index c5137fc..39533fe 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -238,6 +238,7 @@ struct meta_index {
  #define ZLIB_COMPRESSION	1
  #define LZMA_COMPRESSION	2
  #define LZO_COMPRESSION		3
+#define XZ_COMPRESSION		4

  struct squashfs_super_block {
  	__le32			s_magic;
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
new file mode 100644
index 0000000..053fe35
--- /dev/null
+++ b/fs/squashfs/xz_wrapper.c
@@ -0,0 +1,153 @@
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * xz_wrapper.c
+ */
+
+
+#include <linux/mutex.h>
+#include <linux/buffer_head.h>
+#include <linux/slab.h>
+#include <linux/xz.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+#include "decompressor.h"
+
+struct squashfs_xz {
+	struct xz_dec *state;
+	struct xz_buf buf;
+};
+
+static void *squashfs_xz_init(struct squashfs_sb_info *msblk)
+{
+        int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE);
+
+        struct squashfs_xz *stream = kmalloc(sizeof(*stream), GFP_KERNEL);
+        if (stream == NULL)
+                goto failed;
+	stream->state = xz_dec_init(XZ_PREALLOC, block_size);
+	if (stream->state == NULL)
+		goto failed;
+
+	return stream;
+
+failed:
+	ERROR("Failed to allocate xz workspace\n");
+	kfree(stream);
+	return NULL;
+}
+
+
+static void squashfs_xz_free(void *strm)
+{
+	struct squashfs_xz *stream = strm;
+
+	if (stream) {
+		xz_dec_end(stream->state);
+		kfree(stream);
+	}
+}
+
+
+static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer,
+	struct buffer_head **bh, int b, int offset, int length, int srclength,
+	int pages)
+{
+	enum xz_ret xz_err;
+	int avail, total = 0, k = 0, page = 0;
+	struct squashfs_xz *stream = msblk->stream;
+
+	mutex_lock(&msblk->read_data_mutex);
+
+	xz_dec_reset(stream->state);
+	stream->buf.in_pos = 0;
+	stream->buf.in_size = 0;
+	stream->buf.out_pos = 0;
+	stream->buf.out_size = PAGE_CACHE_SIZE;
+	stream->buf.out = buffer[page++];
+
+	do {
+		if (stream->buf.in_pos == stream->buf.in_size && k < b) {
+			avail = min(length, msblk->devblksize - offset);
+			length -= avail;
+			wait_on_buffer(bh[k]);
+			if (!buffer_uptodate(bh[k]))
+				goto release_mutex;
+
+			if (avail == 0) {
+				offset = 0;
+				put_bh(bh[k++]);
+				continue;
+			}
+
+			stream->buf.in = bh[k]->b_data + offset;
+			stream->buf.in_size = avail;
+			stream->buf.in_pos = 0;
+			offset = 0;
+		}
+
+		if (stream->buf.out_pos == stream->buf.out_size
+							&& page < pages) {
+			stream->buf.out = buffer[page++];
+			stream->buf.out_pos = 0;
+			total += PAGE_CACHE_SIZE;
+		}
+
+		xz_err = xz_dec_run(stream->state, &stream->buf);
+
+		if (stream->buf.in_pos == stream->buf.in_size && k < b)
+			put_bh(bh[k++]);
+	} while (xz_err == XZ_OK);
+
+	if (xz_err != XZ_STREAM_END) {
+		ERROR("xz_dec_run error, data probably corrupt\n");
+		goto release_mutex;
+	}
+
+	if (k < b) {
+		ERROR("xz_uncompress error, input remaining\n");
+		goto release_mutex;
+	}
+
+	total += stream->buf.out_pos;
+	mutex_unlock(&msblk->read_data_mutex);
+	return total;
+
+release_mutex:
+	mutex_unlock(&msblk->read_data_mutex);
+
+	for (; k < b; k++)
+		put_bh(bh[k]);
+
+	return -EIO;
+}
+
+const struct squashfs_decompressor squashfs_xz_comp_ops = {
+	.init = squashfs_xz_init,
+	.free = squashfs_xz_free,
+	.decompress = squashfs_xz_uncompress,
+	.id = XZ_COMPRESSION,
+	.name = "xz",
+	.supported = 1
+};
+
-- 
1.6.3.3

^ permalink raw reply related

* [PATCH 2/2] Squashfs: Add XZ compression configuration option
From: Phillip Lougher @ 2010-12-09  6:11 UTC (permalink / raw)
  To: Linux Kernel Development, linux-fsdevel, Linux Embedded Maillist
  Cc: Lasse Collin


Signed-off-by: Phillip Lougher <phillip@lougher.demon.co.uk>
---
  fs/squashfs/Kconfig        |   16 ++++++++++++++++
  fs/squashfs/Makefile       |    1 +
  fs/squashfs/decompressor.c |   11 +++++++++++
  fs/squashfs/squashfs.h     |    3 +++
  4 files changed, 31 insertions(+), 0 deletions(-)

diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index e5f63da..e96d99a 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -53,6 +53,22 @@ config SQUASHFS_LZO

  	  If unsure, say N.

+config SQUASHFS_XZ
+	bool "Include support for XZ compressed file systems"
+	depends on SQUASHFS
+	default n
+	select XZ_DEC
+	help
+	  Saying Y here includes support for reading Squashfs file systems
+	  compressed with XZ compresssion.  XZ gives better compression than
+	  the default zlib compression, at the expense of greater CPU and
+	  memory overhead.
+
+	  XZ is not the standard compression used in Squashfs and so most
+	  file systems will be readable without selecting this option.
+
+	  If unsure, say N.
+
  config SQUASHFS_EMBEDDED
  	bool "Additional option for memory-constrained systems"
  	depends on SQUASHFS
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
index 7672bac..cecf2be 100644
--- a/fs/squashfs/Makefile
+++ b/fs/squashfs/Makefile
@@ -7,3 +7,4 @@ squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
  squashfs-y += namei.o super.o symlink.o zlib_wrapper.o decompressor.o
  squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o
  squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o
+squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index 24af9ce..ac333b8 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -46,6 +46,12 @@ static const struct squashfs_decompressor squashfs_lzo_unsupported_comp_ops = {
  };
  #endif

+#ifndef CONFIG_SQUASHFS_XZ
+static const struct squashfs_decompressor squashfs_xz_unsupported_comp_ops = {
+	NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0
+};
+#endif
+
  static const struct squashfs_decompressor squashfs_unknown_comp_ops = {
  	NULL, NULL, NULL, 0, "unknown", 0
  };
@@ -58,6 +64,11 @@ static const struct squashfs_decompressor *decompressor[] = {
  #else
  	&squashfs_lzo_unsupported_comp_ops,
  #endif
+#ifdef CONFIG_SQUASHFS_XZ
+	&squashfs_xz_comp_ops,
+#else
+	&squashfs_xz_unsupported_comp_ops,
+#endif
  	&squashfs_unknown_comp_ops
  };

diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 5d45569..1096e2e 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -107,3 +107,6 @@ extern const struct squashfs_decompressor squashfs_zlib_comp_ops;

  /* lzo_wrapper.c */
  extern const struct squashfs_decompressor squashfs_lzo_comp_ops;
+
+/* xz_wrapper.c */
+extern const struct squashfs_decompressor squashfs_xz_comp_ops;
-- 
1.6.3.3

^ permalink raw reply related

* Re: [PATCH 2/2] Squashfs: Add XZ compression configuration option
From: Geert Uytterhoeven @ 2010-12-09  7:02 UTC (permalink / raw)
  To: Phillip Lougher
  Cc: Linux Kernel Development, linux-fsdevel, Linux Embedded Maillist,
	Lasse Collin, Andy Whitcroft
In-Reply-To: <4D007315.1090704@lougher.demon.co.uk>

On Thu, Dec 9, 2010 at 07:11, Phillip Lougher
<phillip@lougher.demon.co.uk> wrote:
> --- a/fs/squashfs/Kconfig
> +++ b/fs/squashfs/Kconfig
> @@ -53,6 +53,22 @@ config SQUASHFS_LZO
>
>          If unsure, say N.
>
> +config SQUASHFS_XZ
> +       bool "Include support for XZ compressed file systems"
> +       depends on SQUASHFS
> +       default n

"default n" is the default, no reason to specify it.

Do we need a checkpatch test for this?

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply

* Re: [PATCH 2/2] Squashfs: Add XZ compression configuration option
From: Lasse Collin @ 2010-12-09  9:09 UTC (permalink / raw)
  To: Phillip Lougher
  Cc: Linux Kernel Development, linux-fsdevel, Linux Embedded Maillist
In-Reply-To: <4D007315.1090704@lougher.demon.co.uk>

On 2010-12-09 Phillip Lougher wrote:
> +config SQUASHFS_XZ
> +       bool "Include support for XZ compressed file systems"
> +       depends on SQUASHFS
> +       default n
> +       select XZ_DEC

Should "select XZ_DEC" be replaced with "depends on XZ_DEC"? XZ_DEC 
requires CRC32, so if "select XZ_DEC" is used, there needs to be also 
"select CRC32".

XZ_DEC may optionally use other XZ_DEC_* symbols, which the user will 
want to choose when building for an embedded system. With "depends on 
XZ_DEC" the user will see that there's more than a single option that 
affects the details of the XZ support in Squashfs.

-- 
Lasse Collin  |  IRC: Larhzu @ IRCnet & Freenode

^ permalink raw reply

* Re: [PATCH 2/2] Squashfs: Add XZ compression configuration option
From: Phillip Lougher @ 2010-12-10  6:23 UTC (permalink / raw)
  To: Geert Uytterhoeven
  Cc: Linux Kernel Development, linux-fsdevel, Linux Embedded Maillist,
	Lasse Collin, Andy Whitcroft
In-Reply-To: <AANLkTimUYrb8EMomgmsW+v7ttbkUH0P57SBD5M4_AGmU@mail.gmail.com>

Geert Uytterhoeven wrote:
> On Thu, Dec 9, 2010 at 07:11, Phillip Lougher
> <phillip@lougher.demon.co.uk> wrote:
>> --- a/fs/squashfs/Kconfig
>> +++ b/fs/squashfs/Kconfig
>> @@ -53,6 +53,22 @@ config SQUASHFS_LZO
>>
>>          If unsure, say N.
>>
>> +config SQUASHFS_XZ
>> +       bool "Include support for XZ compressed file systems"
>> +       depends on SQUASHFS
>> +       default n
> 
> "default n" is the default, no reason to specify it.
> 

Yes, thanks for pointing that out.

It seems there's a lot of "default n"s in the mainline kernel

% find . -name "Kconfig"| xargs grep "default n" | wc
     485    1535   18753

including two in arch/m68k ;-)

Phillip


^ permalink raw reply

* Re: [PATCH 2/2] Squashfs: Add XZ compression configuration option
From: Phillip Lougher @ 2010-12-10  7:30 UTC (permalink / raw)
  To: Lasse Collin
  Cc: Linux Kernel Development, linux-fsdevel, Linux Embedded Maillist
In-Reply-To: <201012091109.32084.lasse.collin@tukaani.org>

Lasse Collin wrote:
> On 2010-12-09 Phillip Lougher wrote:
>> +config SQUASHFS_XZ
>> +       bool "Include support for XZ compressed file systems"
>> +       depends on SQUASHFS
>> +       default n
>> +       select XZ_DEC
> 
> Should "select XZ_DEC" be replaced with "depends on XZ_DEC"? XZ_DEC 
> requires CRC32, so if "select XZ_DEC" is used, there needs to be also 
> "select CRC32".
> 

XZ_DEC selects CRC32, kbuild handles these nested selects quite happily,
so if something selects XZ_DEC it knows it has to also select CRC32.

Depends on has quite different semantics to selects.  If SQUASHFS_XZ
was made to depend on XZ_DEC then the option simply won't appear unless
the user knew to select XZ_DEC first (as it's default n).  This would
prove extremely confusing, and probably lead to most people thinking
Squashfs didn't have XZ support, which is somewhat undesirable.

> XZ_DEC may optionally use other XZ_DEC_* symbols, which the user will 
> want to choose when building for an embedded system. With "depends on 
> XZ_DEC" the user will see that there's more than a single option that 
> affects the details of the XZ support in Squashfs.
>

With depends on XZ_DEC the user will simply not see that Squashfs has
XZ support (as the option won't appear unless XZ_DEC is explicitly
selected).

With selects XZ_DEC users will see that Squashfs has XZ support, if
enabled, they'll simply see that XZ_DEC has been automatically
selected in the "Library routines" sub-menu.  If EMBEDDED is not
selected the XZ_DEC options will be automatically selected (as
they're only user selectable if EMBEDDED is selected).  If
EMBEDDED is selected, then they'll have the choice then to decide
which options they wish to de-select.

I think this is preferable to needing XZ_DEC to be selected before
the SQUASHFS_XZ option even appears.

Phillip

^ permalink raw reply

* Re: [PATCH 2/2] Squashfs: Add XZ compression configuration option
From: Lasse Collin @ 2010-12-10 10:37 UTC (permalink / raw)
  To: Phillip Lougher
  Cc: Linux Kernel Development, linux-fsdevel, Linux Embedded Maillist
In-Reply-To: <4D01D70A.5050508@lougher.demon.co.uk>

On 2010-12-10 Phillip Lougher wrote:
> Lasse Collin wrote:
> > On 2010-12-09 Phillip Lougher wrote:
> >> +config SQUASHFS_XZ
> >> +       bool "Include support for XZ compressed file systems"
> >> +       depends on SQUASHFS
> >> +       default n
> >> +       select XZ_DEC
> > 
> > Should "select XZ_DEC" be replaced with "depends on XZ_DEC"? XZ_DEC
> > requires CRC32, so if "select XZ_DEC" is used, there needs to be
> > also "select CRC32".
> 
> XZ_DEC selects CRC32, kbuild handles these nested selects quite
> happily, so if something selects XZ_DEC it knows it has to also
> select CRC32.

OK, I had misunderstood the notice about "select" and dependencies in 
Documentation/kbuild/kconfig-language.txt.

> Depends on has quite different semantics to selects.  If SQUASHFS_XZ
> was made to depend on XZ_DEC then the option simply won't appear
> unless the user knew to select XZ_DEC first (as it's default n). 
> This would prove extremely confusing, and probably lead to most
> people thinking Squashfs didn't have XZ support, which is somewhat
> undesirable.

Good point. I always keep the inactive options visible in xconfig so I 
missed this. I will need to update my XZ boot-time support patch to use 
"select" instead of "depends on" too. Thanks!

-- 
Lasse Collin  |  IRC: Larhzu @ IRCnet & Freenode

^ permalink raw reply

* [PATCH v3] Decompressors: Add boot-time XZ support
From: Lasse Collin @ 2010-12-10 18:45 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-embedded, H. Peter Anvin, Alain Knaff, Albin Tonnerre,
	Phillip Lougher, Andrew Morton
In-Reply-To: <201012022114.37483.lasse.collin@tukaani.org>

From: Lasse Collin <lasse.collin@tukaani.org>

This implements the API defined in <linux/decompress/generic.h>
which is used for kernel, initramfs, and initrd decompression.
This patch together with the first patch is enough for
XZ-compressed initramfs and initrd; XZ-compressed kernel will
need arch-specific changes.

The buffering requirements described in decompress_unxz.c are
stricter than with gzip, so the relevant changes should be done
to the arch-specific code when adding support for XZ-compressed
kernel. Similarly, the heap size in arch-specific pre-boot code
may need to be increased (30 KiB is enough).

The XZ decompressor needs memmove(), memeq() (memcmp() == 0),
and memzero() (memset(ptr, 0, size)), which aren't available in
all arch-specific pre-boot environments. I'm including simple
versions in decompress_unxz.c, but a cleaner solution would
naturally be nicer.

Signed-off-by: Lasse Collin <lasse.collin@tukaani.org>
---

Compared to the previous version of this patch, this makes the
XZ-compressed initramfs Kconfig option behave the same way as
the other compression methods. Now the option isn't hidden if
the xz_dec module hasn't been enabled separately. Support for
XZ-compressed initramfs is now enabled by default if !EMBEDDED.

 include/linux/decompress/unxz.h |   19 +
 init/Kconfig                    |   20 +-
 lib/Kconfig                     |    4 
 lib/Makefile                    |    1 
 lib/decompress.c                |    5 
 lib/decompress_unxz.c           |  397 ++++++++++++++++++++++++++++++++++++++++
 scripts/gen_initramfs_list.sh   |    2 
 usr/Kconfig                     |   18 +
 usr/Makefile                    |    5 
 9 files changed, 469 insertions(+), 2 deletions(-)

diff -uprN linux-2.6.37-rc4.orig/include/linux/decompress/unxz.h linux-2.6.37-rc4/include/linux/decompress/unxz.h
--- linux-2.6.37-rc4.orig/include/linux/decompress/unxz.h	1970-01-01 02:00:00.000000000 +0200
+++ linux-2.6.37-rc4/include/linux/decompress/unxz.h	2010-12-02 17:09:31.000000000 +0200
@@ -0,0 +1,19 @@
+/*
+ * Wrapper for decompressing XZ-compressed kernel, initramfs, and initrd
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+#ifndef DECOMPRESS_UNXZ_H
+#define DECOMPRESS_UNXZ_H
+
+int unxz(unsigned char *in, int in_size,
+	 int (*fill)(void *dest, unsigned int size),
+	 int (*flush)(void *src, unsigned int size),
+	 unsigned char *out, int *in_used,
+	 void (*error)(char *x));
+
+#endif
diff -uprN linux-2.6.37-rc4.orig/init/Kconfig linux-2.6.37-rc4/init/Kconfig
--- linux-2.6.37-rc4.orig/init/Kconfig	2010-11-30 19:49:55.000000000 +0200
+++ linux-2.6.37-rc4/init/Kconfig	2010-11-30 22:01:13.000000000 +0200
@@ -130,13 +130,16 @@ config HAVE_KERNEL_BZIP2
 config HAVE_KERNEL_LZMA
 	bool
 
+config HAVE_KERNEL_XZ
+	bool
+
 config HAVE_KERNEL_LZO
 	bool
 
 choice
 	prompt "Kernel compression mode"
 	default KERNEL_GZIP
-	depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_LZO
+	depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO
 	help
 	  The linux kernel is a kind of self-extracting executable.
 	  Several compression algorithms are available, which differ
@@ -181,6 +184,21 @@ config KERNEL_LZMA
 	  two. Compression is slowest.	The kernel size is about 33%
 	  smaller with LZMA in comparison to gzip.
 
+config KERNEL_XZ
+	bool "XZ"
+	depends on HAVE_KERNEL_XZ
+	help
+	  XZ uses the LZMA2 algorithm and instruction set specific
+	  BCJ filters which can improve compression ratio of executable
+	  code. The size of the kernel is about 30% smaller with XZ in
+	  comparison to gzip. On architectures for which there is a BCJ
+	  filter (i386, x86_64, ARM, IA-64, PowerPC, and SPARC), XZ
+	  will create a few percent smaller kernel than plain LZMA.
+
+	  The speed is about the same as with LZMA: The decompression
+	  speed of XZ is better than that of bzip2 but worse than gzip
+	  and LZO. Compression is slow.
+
 config KERNEL_LZO
 	bool "LZO"
 	depends on HAVE_KERNEL_LZO
diff -uprN linux-2.6.37-rc4.orig/lib/Kconfig linux-2.6.37-rc4/lib/Kconfig
--- linux-2.6.37-rc4.orig/lib/Kconfig	2010-10-20 23:30:22.000000000 +0300
+++ linux-2.6.37-rc4/lib/Kconfig	2010-12-10 15:26:12.000000000 +0200
@@ -120,6 +120,10 @@ config DECOMPRESS_BZIP2
 config DECOMPRESS_LZMA
 	tristate
 
+config DECOMPRESS_XZ
+	select XZ_DEC
+	tristate
+
 config DECOMPRESS_LZO
 	select LZO_DECOMPRESS
 	tristate
diff -uprN linux-2.6.37-rc4.orig/lib/Makefile linux-2.6.37-rc4/lib/Makefile
--- linux-2.6.37-rc4.orig/lib/Makefile	2010-10-20 23:30:22.000000000 +0300
+++ linux-2.6.37-rc4/lib/Makefile	2010-12-02 17:30:12.000000000 +0200
@@ -74,6 +74,7 @@ obj-$(CONFIG_RAID6_PQ) += raid6/
 lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o
 lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o
 lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o
+lib-$(CONFIG_DECOMPRESS_XZ) += decompress_unxz.o
 lib-$(CONFIG_DECOMPRESS_LZO) += decompress_unlzo.o
 
 obj-$(CONFIG_TEXTSEARCH) += textsearch.o
diff -uprN linux-2.6.37-rc4.orig/lib/decompress.c linux-2.6.37-rc4/lib/decompress.c
--- linux-2.6.37-rc4.orig/lib/decompress.c	2010-10-20 23:30:22.000000000 +0300
+++ linux-2.6.37-rc4/lib/decompress.c	2010-11-21 10:42:11.000000000 +0200
@@ -8,6 +8,7 @@
 
 #include <linux/decompress/bunzip2.h>
 #include <linux/decompress/unlzma.h>
+#include <linux/decompress/unxz.h>
 #include <linux/decompress/inflate.h>
 #include <linux/decompress/unlzo.h>
 
@@ -23,6 +24,9 @@
 #ifndef CONFIG_DECOMPRESS_LZMA
 # define unlzma NULL
 #endif
+#ifndef CONFIG_DECOMPRESS_XZ
+# define unxz NULL
+#endif
 #ifndef CONFIG_DECOMPRESS_LZO
 # define unlzo NULL
 #endif
@@ -36,6 +40,7 @@ static const struct compress_format {
 	{ {037, 0236}, "gzip", gunzip },
 	{ {0x42, 0x5a}, "bzip2", bunzip2 },
 	{ {0x5d, 0x00}, "lzma", unlzma },
+	{ {0xfd, 0x37}, "xz", unxz },
 	{ {0x89, 0x4c}, "lzo", unlzo },
 	{ {0, 0}, NULL, NULL }
 };
diff -uprN linux-2.6.37-rc4.orig/lib/decompress_unxz.c linux-2.6.37-rc4/lib/decompress_unxz.c
--- linux-2.6.37-rc4.orig/lib/decompress_unxz.c	1970-01-01 02:00:00.000000000 +0200
+++ linux-2.6.37-rc4/lib/decompress_unxz.c	2010-12-01 14:52:05.000000000 +0200
@@ -0,0 +1,397 @@
+/*
+ * Wrapper for decompressing XZ-compressed kernel, initramfs, and initrd
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+/*
+ * Important notes about in-place decompression
+ *
+ * At least on x86, the kernel is decompressed in place: the compressed data
+ * is placed to the end of the output buffer, and the decompressor overwrites
+ * most of the compressed data. There must be enough safety margin to
+ * guarantee that the write position is always behind the read position.
+ *
+ * The safety margin for XZ with LZMA2 or BCJ+LZMA2 is calculated below.
+ * Note that the margin with XZ is bigger than with Deflate (gzip)!
+ *
+ * The worst case for in-place decompression is that the beginning of
+ * the file is compressed extremely well, and the rest of the file is
+ * uncompressible. Thus, we must look for worst-case expansion when the
+ * compressor is encoding uncompressible data.
+ *
+ * The structure of the .xz file in case of a compresed kernel is as follows.
+ * Sizes (as bytes) of the fields are in parenthesis.
+ *
+ *    Stream Header (12)
+ *    Block Header:
+ *      Block Header (8-12)
+ *      Compressed Data (N)
+ *      Block Padding (0-3)
+ *      CRC32 (4)
+ *    Index (8-20)
+ *    Stream Footer (12)
+ *
+ * Normally there is exactly one Block, but let's assume that there are
+ * 2-4 Blocks just in case. Because Stream Header and also Block Header
+ * of the first Block don't make the decompressor produce any uncompressed
+ * data, we can ignore them from our calculations. Block Headers of possible
+ * additional Blocks have to be taken into account still. With these
+ * assumptions, it is safe to assume that the total header overhead is
+ * less than 128 bytes.
+ *
+ * Compressed Data contains LZMA2 or BCJ+LZMA2 encoded data. Since BCJ
+ * doesn't change the size of the data, it is enough to calculate the
+ * safety margin for LZMA2.
+ *
+ * LZMA2 stores the data in chunks. Each chunk has a header whose size is
+ * a maximum of 6 bytes, but to get round 2^n numbers, let's assume that
+ * the maximum chunk header size is 8 bytes. After the chunk header, there
+ * may be up to 64 KiB of actual payload in the chunk. Often the payload is
+ * quite a bit smaller though; to be safe, let's assume that an average
+ * chunk has only 32 KiB of payload.
+ *
+ * The maximum uncompressed size of the payload is 2 MiB. The minimum
+ * uncompressed size of the payload is in practice never less than the
+ * payload size itself. The LZMA2 format would allow uncompressed size
+ * to be less than the payload size, but no sane compressor creates such
+ * files. LZMA2 supports storing uncompressible data in uncompressed form,
+ * so there's never a need to create payloads whose uncompressed size is
+ * smaller than the compressed size.
+ *
+ * The assumption, that the uncompressed size of the payload is never
+ * smaller than the payload itself, is valid only when talking about
+ * the payload as a whole. It is possible that the payload has parts where
+ * the decompressor consumes more input than it produces output. Calculating
+ * the worst case for this would be tricky. Instead of trying to do that,
+ * let's simply make sure that the decompressor never overwrites any bytes
+ * of the payload which it is currently reading.
+ *
+ * Now we have enough information to calculate the safety margin. We need
+ *   - 128 bytes for the .xz file format headers;
+ *   - 8 bytes per every 32 KiB of uncompressed size (one LZMA2 chunk header
+ *     per chunk, each chunk having average payload size of 32 KiB); and
+ *   - 64 KiB (biggest possible LZMA2 chunk payload size) to make sure that
+ *     the decompressor never overwrites anything from the LZMA2 chunk
+ *     payload it is currently reading.
+ *
+ * We get the following formula:
+ *
+ *    safety_margin = 128 + uncompressed_size * 8 / 32768 + 65536
+ *                  = 128 + (uncompressed_size >> 12) + 65536
+ *
+ * For comparision, according to arch/x86/boot/compressed/misc.c, the
+ * equivalent formula for Deflate is this:
+ *
+ *    safety_margin = 18 + (uncompressed_size >> 12) + 32768
+ *
+ * Thus, when updating Deflate-only in-place kernel decompressor to
+ * support XZ, the fixed overhead has to be increased from 18+32768 bytes
+ * to 128+65536 bytes.
+ */
+
+/*
+ * STATIC is defined to "static" if we are being built for kernel
+ * decompression (pre-boot code). <linux/decompress/mm.h> will define
+ * STATIC to empty if it wasn't already defined. Since we will need to
+ * know later if we are being used for kernel decompression, we define
+ * XZ_PREBOOT here.
+ */
+#ifdef STATIC
+#	define XZ_PREBOOT
+#endif
+#ifdef __KERNEL__
+#	include <linux/decompress/mm.h>
+#endif
+#define XZ_EXTERN STATIC
+
+#ifndef XZ_PREBOOT
+#	include <linux/slab.h>
+#	include <linux/xz.h>
+#else
+/*
+ * Use the internal CRC32 code instead of kernel's CRC32 module, which
+ * is not available in early phase of booting.
+ */
+#define XZ_INTERNAL_CRC32 1
+
+/*
+ * For boot time use, we enable only the BCJ filter of the current
+ * architecture or none if no BCJ filter is available for the architecture.
+ */
+#ifdef CONFIG_X86
+#	define XZ_DEC_X86
+#endif
+#ifdef CONFIG_PPC
+#	define XZ_DEC_POWERPC
+#endif
+#ifdef CONFIG_ARM
+#	define XZ_DEC_ARM
+#endif
+#ifdef CONFIG_IA64
+#	define XZ_DEC_IA64
+#endif
+#ifdef CONFIG_SPARC
+#	define XZ_DEC_SPARC
+#endif
+
+/*
+ * This will get the basic headers so that memeq() and others
+ * can be defined.
+ */
+#include "xz/xz_private.h"
+
+/*
+ * Replace the normal allocation functions with the versions from
+ * <linux/decompress/mm.h>. vfree() needs to support vfree(NULL)
+ * when XZ_DYNALLOC is used, but the pre-boot free() doesn't support it.
+ * Workaround it here because the other decompressors don't need it.
+ */
+#undef kmalloc
+#undef kfree
+#undef vmalloc
+#undef vfree
+#define kmalloc(size, flags) malloc(size)
+#define kfree(ptr) free(ptr)
+#define vmalloc(size) malloc(size)
+#define vfree(ptr) do { if (ptr != NULL) free(ptr); } while (0)
+
+/*
+ * FIXME: Not all basic memory functions are provided in architecture-specific
+ * files (yet). We define our own versions here for now, but this should be
+ * only a temporary solution.
+ *
+ * memeq and memzero are not used much and any remotely sane implementation
+ * is fast enough. memcpy/memmove speed matters in multi-call mode, but
+ * the kernel image is decompressed in single-call mode, in which only
+ * memcpy speed can matter and only if there is a lot of uncompressible data
+ * (LZMA2 stores uncompressible chunks in uncompressed form). Thus, the
+ * functions below should just be kept small; it's probably not worth
+ * optimizing for speed.
+ */
+
+#ifndef memeq
+static bool memeq(const void *a, const void *b, size_t size)
+{
+	const uint8_t *x = a;
+	const uint8_t *y = b;
+	size_t i;
+
+	for (i = 0; i < size; ++i)
+		if (x[i] != y[i])
+			return false;
+
+	return true;
+}
+#endif
+
+#ifndef memzero
+static void memzero(void *buf, size_t size)
+{
+	uint8_t *b = buf;
+	uint8_t *e = b + size;
+
+	while (b != e)
+		*b++ = '\0';
+}
+#endif
+
+#ifndef memmove
+/* Not static to avoid a conflict with the prototype in the Linux headers. */
+void *memmove(void *dest, const void *src, size_t size)
+{
+	uint8_t *d = dest;
+	const uint8_t *s = src;
+	size_t i;
+
+	if (d < s) {
+		for (i = 0; i < size; ++i)
+			d[i] = s[i];
+	} else if (d > s) {
+		i = size;
+		while (i-- > 0)
+			d[i] = s[i];
+	}
+
+	return dest;
+}
+#endif
+
+/*
+ * Since we need memmove anyway, would use it as memcpy too.
+ * Commented out for now to avoid breaking things.
+ */
+/*
+#ifndef memcpy
+#	define memcpy memmove
+#endif
+*/
+
+#include "xz/xz_crc32.c"
+#include "xz/xz_dec_stream.c"
+#include "xz/xz_dec_lzma2.c"
+#include "xz/xz_dec_bcj.c"
+
+#endif /* XZ_PREBOOT */
+
+/* Size of the input and output buffers in multi-call mode */
+#define XZ_IOBUF_SIZE 4096
+
+/*
+ * This function implements the API defined in <linux/decompress/generic.h>.
+ *
+ * This wrapper will automatically choose single-call or multi-call mode
+ * of the native XZ decoder API. The single-call mode can be used only when
+ * both input and output buffers are available as a single chunk, i.e. when
+ * fill() and flush() won't be used.
+ */
+STATIC int INIT unxz(unsigned char *in, int in_size,
+		     int (*fill)(void *dest, unsigned int size),
+		     int (*flush)(void *src, unsigned int size),
+		     unsigned char *out, int *in_used,
+		     void (*error)(char *x))
+{
+	struct xz_buf b;
+	struct xz_dec *s;
+	enum xz_ret ret;
+	bool must_free_in = false;
+
+#if XZ_INTERNAL_CRC32
+	xz_crc32_init();
+#endif
+
+	if (in_used != NULL)
+		*in_used = 0;
+
+	if (fill == NULL && flush == NULL)
+		s = xz_dec_init(XZ_SINGLE, 0);
+	else
+		s = xz_dec_init(XZ_DYNALLOC, (uint32_t)-1);
+
+	if (s == NULL)
+		goto error_alloc_state;
+
+	if (flush == NULL) {
+		b.out = out;
+		b.out_size = (size_t)-1;
+	} else {
+		b.out_size = XZ_IOBUF_SIZE;
+		b.out = malloc(XZ_IOBUF_SIZE);
+		if (b.out == NULL)
+			goto error_alloc_out;
+	}
+
+	if (in == NULL) {
+		must_free_in = true;
+		in = malloc(XZ_IOBUF_SIZE);
+		if (in == NULL)
+			goto error_alloc_in;
+	}
+
+	b.in = in;
+	b.in_pos = 0;
+	b.in_size = in_size;
+	b.out_pos = 0;
+
+	if (fill == NULL && flush == NULL) {
+		ret = xz_dec_run(s, &b);
+	} else {
+		do {
+			if (b.in_pos == b.in_size && fill != NULL) {
+				if (in_used != NULL)
+					*in_used += b.in_pos;
+
+				b.in_pos = 0;
+
+				in_size = fill(in, XZ_IOBUF_SIZE);
+				if (in_size < 0) {
+					/*
+					 * This isn't an optimal error code
+					 * but it probably isn't worth making
+					 * a new one either.
+					 */
+					ret = XZ_BUF_ERROR;
+					break;
+				}
+
+				b.in_size = in_size;
+			}
+
+			ret = xz_dec_run(s, &b);
+
+			if (flush != NULL && (b.out_pos == b.out_size
+					|| (ret != XZ_OK && b.out_pos > 0))) {
+				/*
+				 * Setting ret here may hide an error
+				 * returned by xz_dec_run(), but probably
+				 * it's not too bad.
+				 */
+				if (flush(b.out, b.out_pos) != (int)b.out_pos)
+					ret = XZ_BUF_ERROR;
+
+				b.out_pos = 0;
+			}
+		} while (ret == XZ_OK);
+
+		if (must_free_in)
+			free(in);
+
+		if (flush != NULL)
+			free(b.out);
+	}
+
+	if (in_used != NULL)
+		*in_used += b.in_pos;
+
+	xz_dec_end(s);
+
+	switch (ret) {
+	case XZ_STREAM_END:
+		return 0;
+
+	case XZ_MEM_ERROR:
+		/* This can occur only in multi-call mode. */
+		error("XZ decompressor ran out of memory");
+		break;
+
+	case XZ_FORMAT_ERROR:
+		error("Input is not in the XZ format (wrong magic bytes)");
+		break;
+
+	case XZ_OPTIONS_ERROR:
+		error("Input was encoded with settings that are not "
+				"supported by this XZ decoder");
+		break;
+
+	case XZ_DATA_ERROR:
+	case XZ_BUF_ERROR:
+		error("XZ-compressed data is corrupt");
+		break;
+
+	default:
+		error("Bug in the XZ decompressor");
+		break;
+	}
+
+	return -1;
+
+error_alloc_in:
+	if (flush != NULL)
+		free(b.out);
+
+error_alloc_out:
+	xz_dec_end(s);
+
+error_alloc_state:
+	error("XZ decompressor ran out of memory");
+	return -1;
+}
+
+/*
+ * This macro is used by architecture-specific files to decompress
+ * the kernel image.
+ */
+#define decompress unxz
diff -uprN linux-2.6.37-rc4.orig/scripts/gen_initramfs_list.sh linux-2.6.37-rc4/scripts/gen_initramfs_list.sh
--- linux-2.6.37-rc4.orig/scripts/gen_initramfs_list.sh	2010-10-20 23:30:22.000000000 +0300
+++ linux-2.6.37-rc4/scripts/gen_initramfs_list.sh	2010-11-21 10:42:11.000000000 +0200
@@ -243,6 +243,8 @@ case "$arg" in
 		echo "$output_file" | grep -q "\.gz$" && compr="gzip -9 -f"
 		echo "$output_file" | grep -q "\.bz2$" && compr="bzip2 -9 -f"
 		echo "$output_file" | grep -q "\.lzma$" && compr="lzma -9 -f"
+		echo "$output_file" | grep -q "\.xz$" && \
+				compr="xz --check=crc32 --lzma2=dict=1MiB"
 		echo "$output_file" | grep -q "\.lzo$" && compr="lzop -9 -f"
 		echo "$output_file" | grep -q "\.cpio$" && compr="cat"
 		shift
diff -uprN linux-2.6.37-rc4.orig/usr/Kconfig linux-2.6.37-rc4/usr/Kconfig
--- linux-2.6.37-rc4.orig/usr/Kconfig	2010-11-30 19:49:56.000000000 +0200
+++ linux-2.6.37-rc4/usr/Kconfig	2010-12-10 18:52:33.000000000 +0200
@@ -72,6 +72,15 @@ config RD_LZMA
 	  Support loading of a LZMA encoded initial ramdisk or cpio buffer
 	  If unsure, say N.
 
+config RD_XZ
+	bool "Support initial ramdisks compressed using XZ" if EMBEDDED
+	default !EMBEDDED
+	depends on BLK_DEV_INITRD
+	select DECOMPRESS_XZ
+	help
+	  Support loading of a XZ encoded initial ramdisk or cpio buffer.
+	  If unsure, say N.
+
 config RD_LZO
 	bool "Support initial ramdisks compressed using LZO" if EMBEDDED
 	default !EMBEDDED
@@ -139,6 +148,15 @@ config INITRAMFS_COMPRESSION_LZMA
 	  three. Compression is slowest. The initramfs size is about 33%
 	  smaller with LZMA in comparison to gzip.
 
+config INITRAMFS_COMPRESSION_XZ
+	bool "XZ"
+	depends on RD_XZ
+	help
+	  XZ uses the LZMA2 algorithm. The initramfs size is about 30%
+	  smaller with XZ in comparison to gzip. Decompression speed
+	  is better than that of bzip2 but worse than gzip and LZO.
+	  Compression is slow.
+
 config INITRAMFS_COMPRESSION_LZO
 	bool "LZO"
 	depends on RD_LZO
diff -uprN linux-2.6.37-rc4.orig/usr/Makefile linux-2.6.37-rc4/usr/Makefile
--- linux-2.6.37-rc4.orig/usr/Makefile	2010-11-30 19:49:56.000000000 +0200
+++ linux-2.6.37-rc4/usr/Makefile	2010-11-30 19:51:30.000000000 +0200
@@ -15,6 +15,9 @@ suffix_$(CONFIG_INITRAMFS_COMPRESSION_BZ
 # Lzma
 suffix_$(CONFIG_INITRAMFS_COMPRESSION_LZMA)   = .lzma
 
+# XZ
+suffix_$(CONFIG_INITRAMFS_COMPRESSION_XZ)     = .xz
+
 # Lzo
 suffix_$(CONFIG_INITRAMFS_COMPRESSION_LZO)   = .lzo
 
@@ -50,7 +53,7 @@ endif
 quiet_cmd_initfs = GEN     $@
       cmd_initfs = $(initramfs) -o $@ $(ramfs-args) $(ramfs-input)
 
-targets := initramfs_data.cpio.gz initramfs_data.cpio.bz2 initramfs_data.cpio.lzma initramfs_data.cpio.lzo initramfs_data.cpio
+targets := initramfs_data.cpio.gz initramfs_data.cpio.bz2 initramfs_data.cpio.lzma initramfs_data.cpio.xz initramfs_data.cpio.lzo initramfs_data.cpio
 # do not try to update files included in initramfs
 $(deps_initramfs): ;
 

^ permalink raw reply

* Re: [PATCH 1/2] Squashfs: add XZ compression support
From: Lasse Collin @ 2010-12-10 22:49 UTC (permalink / raw)
  To: Phillip Lougher
  Cc: Linux Kernel Development, linux-fsdevel, Linux Embedded Maillist
In-Reply-To: <4D00726D.3080503@lougher.demon.co.uk>

On 2010-12-09 Phillip Lougher wrote:
> +	stream->state = xz_dec_init(XZ_PREALLOC, block_size);

This sets the preallocated LZMA2 dictionary size to Squashfs block size, 
which allows the best compression ratio for the given block size (bigger 
values would be waste of memory). XZ decompressor needs about 30 KiB of 
memory plus the dictionary. So with 1 MiB Squashfs block size a little 
over 1 MiB of memory is needed by the XZ decompressor.

Would it be useful to support smaller dictionaries without reducing the 
Squashfs block size? It would save RAM while increasing the Squashfs 
image size less than if also the block size was reduced. Reasonable 
dictionary sizes could be e.g. 75 %, 50 %, or 25 % of the Squashfs block 
size. The size increase of the Squashfs image will vary a lot depending 
on the files in it, but e.g. dict_size = block_size / 2 won't 
necessarily increase the image size more than 1 %.

I don't know if this kind of RAM savings matter on embedded systems. I 
hope someone else can comment. My point in this email is only to let 
others know that reducing the RAM usage is possible without reducing the 
Squashfs block size.

-- 
Lasse Collin  |  IRC: Larhzu @ IRCnet & Freenode

^ permalink raw reply

* Re: [PATCH] base: add sysfs socs info
From: Ryan Mallon @ 2010-12-15 20:17 UTC (permalink / raw)
  To: Jean-Christophe PLAGNIOL-VILLARD
  Cc: linux-kernel, Greg Kroah-Hartman, Nicolas Ferre, Patrice VILCHEZ,
	arm kernel, Linux Embedded
In-Reply-To: <1292330417-13703-1-git-send-email-plagnioj@jcrosoft.com>

On 12/15/2010 01:40 AM, Jean-Christophe PLAGNIOL-VILLARD wrote:
> this provide an easy way to register soc information

This idea has been kicked around a few times in various forms, both as a
proc file and as sysfs files. Cc'ed the arm-linux and embedded-linux
lists since this patch mainly affects them.

> arch, family, model, id, revision

Some SoCs want to expose additional information also. This patch doesn't
appear to provide any standard way of extending the information
available in sysfs.

> as this for at91sam9g20
> 
> $ cat /sys/devices/system/soc/soc0/arch
> current

What does this mean? Shouldn't it be ARM? Also, we already have ways to
determine the architecture/cpu type.

> $ cat /sys/devices/system/soc/soc0/family
> at91
> $ cat /sys/devices/system/soc/soc0/id
> at91sam9g20
> $ cat /sys/devices/system/soc/soc0/model
> 0x00000000019905a0
> $ cat /sys/devices/system/soc/soc0/revision
> 1.1

What is the difference between model and revision? Do these fields make
sense for all SoCs?

What userspace tools actually need this information? Some of the
extended information for various SoCs may be useful, but I can't think
of many good reasons for a userspace application to care about the SoC
family or revision.

> Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
> Cc: Nicolas Ferre <nicolas.ferre@atmel.com>
> Cc: Patrice VILCHEZ <patrice.vilchez@atmel.com>
> ---
>  drivers/base/Makefile |    3 +-
>  drivers/base/base.h   |    1 +
>  drivers/base/init.c   |    1 +
>  drivers/base/soc.c    |  124 +++++++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/soc.h   |   27 +++++++++++
>  5 files changed, 155 insertions(+), 1 deletions(-)
>  create mode 100644 drivers/base/soc.c
>  create mode 100644 include/linux/soc.h
> 
> diff --git a/drivers/base/Makefile b/drivers/base/Makefile
> index 5f51c3b..cf3e59f 100644
> --- a/drivers/base/Makefile
> +++ b/drivers/base/Makefile
> @@ -3,7 +3,8 @@
>  obj-y			:= core.o sys.o bus.o dd.o \
>  			   driver.o class.o platform.o \
>  			   cpu.o firmware.o init.o map.o devres.o \
> -			   attribute_container.o transport_class.o
> +			   attribute_container.o transport_class.o \
> +			   soc.o
>  obj-$(CONFIG_DEVTMPFS)	+= devtmpfs.o
>  obj-y			+= power/
>  obj-$(CONFIG_HAS_DMA)	+= dma-mapping.o
> diff --git a/drivers/base/base.h b/drivers/base/base.h
> index 2ca7f5b..e2daaf6 100644
> --- a/drivers/base/base.h
> +++ b/drivers/base/base.h
> @@ -107,6 +107,7 @@ static inline int hypervisor_init(void) { return 0; }
>  extern int platform_bus_init(void);
>  extern int system_bus_init(void);
>  extern int cpu_dev_init(void);
> +extern int soc_dev_init(void);
>  
>  extern int bus_add_device(struct device *dev);
>  extern void bus_probe_device(struct device *dev);
> diff --git a/drivers/base/init.c b/drivers/base/init.c
> index c8a934e..f908faa 100644
> --- a/drivers/base/init.c
> +++ b/drivers/base/init.c
> @@ -33,5 +33,6 @@ void __init driver_init(void)
>  	platform_bus_init();
>  	system_bus_init();
>  	cpu_dev_init();
> +	soc_dev_init();
>  	memory_dev_init();
>  }
> diff --git a/drivers/base/soc.c b/drivers/base/soc.c
> new file mode 100644
> index 0000000..c24bb41
> --- /dev/null
> +++ b/drivers/base/soc.c
> @@ -0,0 +1,124 @@
> +/*
> + * drivers/base/soc.c - basic SOC class support
> + *
> + * Copyright (C) 2010 Jean-Chrisotphe PLAGNIOL-VILLARD * <plagnioj@jcrosoft.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/sysdev.h>
> +#include <linux/module.h>
> +#include <linux/init.h>
> +#include <linux/soc.h>
> +#include <linux/device.h>
> +
> +#include "base.h"
> +
> +static int nb_socs;
> +
> +struct sysdev_class soc_sysdev_class = {
> +	.name = "soc",
> +};
> +EXPORT_SYMBOL_GPL(soc_sysdev_class);
> +
> +#define print_u64_attr(field)							\
> +static ssize_t print_socs_##field(struct sys_device *dev,			\
> +				  struct sysdev_attribute *attr, char *buf)	\
> +{										\
> +	struct soc *soc = container_of(dev, struct soc, sysdev);		\
> +										\
> +	return sprintf(buf, "0x%016Lx\n", soc->field);				\
> +}										\
> +static SYSDEV_ATTR(field, 0444, print_socs_##field, NULL);			\
> +
> +#define print_str_attr(field)							\
> +static ssize_t print_socs_##field(struct sys_device *dev,			\
> +				  struct sysdev_attribute *attr, char *buf)	\
> +{										\
> +	struct soc *soc = container_of(dev, struct soc, sysdev);		\
> +										\
> +	return sprintf(buf, "%s\n", soc->field);				\
> +}										\
> +static SYSDEV_ATTR(field, 0444, print_socs_##field, NULL);			\

At first glance this looks like two functions with the same name because
of the identical print_socs##field bit. I intuitively expect field to
just be the name of the sysfs file.

> +print_u64_attr(id)
> +print_str_attr(arch)
> +print_str_attr(family)
> +print_str_attr(model)
> +print_str_attr(revision)

These should have semicolons at the end (drop the final one from the
macro name). Also I think the names should be in caps and should be
renamed to better reflect what the do, i.e. SOC_SYSFS_U64_ATTR and
SOC_SYSFS_STRING_ATTR.

> +static char *arch_current = "current";

Should be const.

> +/*
> + * register_soc - Setup a sysfs device for a SOC.
> + *
> + * Initialize and register the SOC device.
> + */
> +int register_soc(struct soc *soc)
> +{

This name implies that it does much more than just adding some sysfs
files :-).

> +	int err;
> +
> +	if (!soc)
> +		return -EINVAL;

Wouldn't bother with this check. Just crash so that we can catch buggy code.

> +	soc->sysdev.id = nb_socs;
> +	soc->sysdev.cls = &soc_sysdev_class;
> +
> +	if (!soc->arch)
> +		soc->arch = arch_current;
> +
> +	err = sysdev_register(&soc->sysdev);
> +
> +	if (err)
> +		return err;

Why all the additional whitespace?
> +
> +	err = sysdev_create_file(&soc->sysdev, &attr_arch);
> +
> +	if (err)
> +		goto end;

You can use sysfs_create_group to register a bunch of files which will
greatly simply the code here.

> +	err = sysdev_create_file(&soc->sysdev, &attr_family);
> +
> +	if (err)
> +		goto end0;
> +
> +	err = sysdev_create_file(&soc->sysdev, &attr_model);
> +
> +	if (err)
> +		goto end1;
> +
> +	err = sysdev_create_file(&soc->sysdev, &attr_id);
> +
> +	if (err)
> +		goto end2;
> +
> +	err = sysdev_create_file(&soc->sysdev, &attr_revision);
> +
> +	if (err)
> +		goto end3;
> +
> +	nb_socs++;

If there is more than one SoC (SMP machine?) then how do you guarantee
the order of registration? Should the registration function take id as a
parameter?

> +	return 0;
> +
> +end3:
> +	sysdev_remove_file(&soc->sysdev, &attr_id);
> +end2:
> +	sysdev_remove_file(&soc->sysdev, &attr_model);
> +end1:
> +	sysdev_remove_file(&soc->sysdev, &attr_family);
> +end0:
> +	sysdev_remove_file(&soc->sysdev, &attr_arch);
> +end:
> +	sysdev_unregister(&soc->sysdev);
> +
> +	return err;
> +}

EXPORT_SYMBOL(register_soc)?

> +
> +int __init soc_dev_init(void)
> +{
> +	nb_socs = 0;
> +
> +	return sysdev_class_register(&soc_sysdev_class);
> +}

EXPORT_SYMBOL(soc_dev_init)?

> diff --git a/include/linux/soc.h b/include/linux/soc.h
> new file mode 100644
> index 0000000..55e6ea2
> --- /dev/null
> +++ b/include/linux/soc.h
> @@ -0,0 +1,27 @@
> +/*
> + * include/linux/soc.h - generic soc definition
> + *
> + * Copyright (C) 2010 Jean-Chrisotphe PLAGNIOL-VILLARD * <plagnioj@jcrosoft.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +#ifndef _LINUX_SOC_H_
> +#define _LINUX_OSC_H_
> +
> +#include <linux/sysdev.h>
> +
> +struct soc {
> +	u64 id;
> +	char *arch;
> +	char *family;
> +	char *model;
> +	char *revision;
> +	struct sys_device sysdev;
> +};
> +
> +extern int register_soc(struct soc *soc);
> +extern struct sysdev_class soc_sysdev_class;
> +
> +#endif /* _LINUX_SOC_H_ */

~Ryan

-- 
Bluewater Systems Ltd - ARM Technology Solution Centre

Ryan Mallon         		5 Amuri Park, 404 Barbadoes St
ryan@bluewatersys.com         	PO Box 13 889, Christchurch 8013
http://www.bluewatersys.com	New Zealand
Phone: +64 3 3779127		Freecall: Australia 1800 148 751
Fax:   +64 3 3779135			  USA 1800 261 2934

^ permalink raw reply

* Re: [PATCH] base: add sysfs socs info
From: Ryan Mallon @ 2010-12-15 20:24 UTC (permalink / raw)
  To: Jean-Christophe PLAGNIOL-VILLARD
  Cc: linux-kernel, Greg Kroah-Hartman, Nicolas Ferre, Patrice VILCHEZ,
	linux-arm-kernel, Linux Embedded
In-Reply-To: <4D092256.4090605@bluewatersys.com>

On 12/16/2010 09:17 AM, Ryan Mallon wrote:
> On 12/15/2010 01:40 AM, Jean-Christophe PLAGNIOL-VILLARD wrote:
>> this provide an easy way to register soc information
> 
> This idea has been kicked around a few times in various forms, both as a
> proc file and as sysfs files. Cc'ed the arm-linux and embedded-linux
> lists since this patch mainly affects them.

Gah, my email client still has the old arm-linux address cached. Cc'ed
the new one.

~Ryan

>> arch, family, model, id, revision
> 
> Some SoCs want to expose additional information also. This patch doesn't
> appear to provide any standard way of extending the information
> available in sysfs.
> 
>> as this for at91sam9g20
>>
>> $ cat /sys/devices/system/soc/soc0/arch
>> current
> 
> What does this mean? Shouldn't it be ARM? Also, we already have ways to
> determine the architecture/cpu type.
> 
>> $ cat /sys/devices/system/soc/soc0/family
>> at91
>> $ cat /sys/devices/system/soc/soc0/id
>> at91sam9g20
>> $ cat /sys/devices/system/soc/soc0/model
>> 0x00000000019905a0
>> $ cat /sys/devices/system/soc/soc0/revision
>> 1.1
> 
> What is the difference between model and revision? Do these fields make
> sense for all SoCs?
> 
> What userspace tools actually need this information? Some of the
> extended information for various SoCs may be useful, but I can't think
> of many good reasons for a userspace application to care about the SoC
> family or revision.
> 
>> Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
>> Cc: Nicolas Ferre <nicolas.ferre@atmel.com>
>> Cc: Patrice VILCHEZ <patrice.vilchez@atmel.com>
>> ---
>>  drivers/base/Makefile |    3 +-
>>  drivers/base/base.h   |    1 +
>>  drivers/base/init.c   |    1 +
>>  drivers/base/soc.c    |  124 +++++++++++++++++++++++++++++++++++++++++++++++++
>>  include/linux/soc.h   |   27 +++++++++++
>>  5 files changed, 155 insertions(+), 1 deletions(-)
>>  create mode 100644 drivers/base/soc.c
>>  create mode 100644 include/linux/soc.h
>>
>> diff --git a/drivers/base/Makefile b/drivers/base/Makefile
>> index 5f51c3b..cf3e59f 100644
>> --- a/drivers/base/Makefile
>> +++ b/drivers/base/Makefile
>> @@ -3,7 +3,8 @@
>>  obj-y			:= core.o sys.o bus.o dd.o \
>>  			   driver.o class.o platform.o \
>>  			   cpu.o firmware.o init.o map.o devres.o \
>> -			   attribute_container.o transport_class.o
>> +			   attribute_container.o transport_class.o \
>> +			   soc.o
>>  obj-$(CONFIG_DEVTMPFS)	+= devtmpfs.o
>>  obj-y			+= power/
>>  obj-$(CONFIG_HAS_DMA)	+= dma-mapping.o
>> diff --git a/drivers/base/base.h b/drivers/base/base.h
>> index 2ca7f5b..e2daaf6 100644
>> --- a/drivers/base/base.h
>> +++ b/drivers/base/base.h
>> @@ -107,6 +107,7 @@ static inline int hypervisor_init(void) { return 0; }
>>  extern int platform_bus_init(void);
>>  extern int system_bus_init(void);
>>  extern int cpu_dev_init(void);
>> +extern int soc_dev_init(void);
>>  
>>  extern int bus_add_device(struct device *dev);
>>  extern void bus_probe_device(struct device *dev);
>> diff --git a/drivers/base/init.c b/drivers/base/init.c
>> index c8a934e..f908faa 100644
>> --- a/drivers/base/init.c
>> +++ b/drivers/base/init.c
>> @@ -33,5 +33,6 @@ void __init driver_init(void)
>>  	platform_bus_init();
>>  	system_bus_init();
>>  	cpu_dev_init();
>> +	soc_dev_init();
>>  	memory_dev_init();
>>  }
>> diff --git a/drivers/base/soc.c b/drivers/base/soc.c
>> new file mode 100644
>> index 0000000..c24bb41
>> --- /dev/null
>> +++ b/drivers/base/soc.c
>> @@ -0,0 +1,124 @@
>> +/*
>> + * drivers/base/soc.c - basic SOC class support
>> + *
>> + * Copyright (C) 2010 Jean-Chrisotphe PLAGNIOL-VILLARD * <plagnioj@jcrosoft.com>
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + */
>> +
>> +#include <linux/sysdev.h>
>> +#include <linux/module.h>
>> +#include <linux/init.h>
>> +#include <linux/soc.h>
>> +#include <linux/device.h>
>> +
>> +#include "base.h"
>> +
>> +static int nb_socs;
>> +
>> +struct sysdev_class soc_sysdev_class = {
>> +	.name = "soc",
>> +};
>> +EXPORT_SYMBOL_GPL(soc_sysdev_class);
>> +
>> +#define print_u64_attr(field)							\
>> +static ssize_t print_socs_##field(struct sys_device *dev,			\
>> +				  struct sysdev_attribute *attr, char *buf)	\
>> +{										\
>> +	struct soc *soc = container_of(dev, struct soc, sysdev);		\
>> +										\
>> +	return sprintf(buf, "0x%016Lx\n", soc->field);				\
>> +}										\
>> +static SYSDEV_ATTR(field, 0444, print_socs_##field, NULL);			\
>> +
>> +#define print_str_attr(field)							\
>> +static ssize_t print_socs_##field(struct sys_device *dev,			\
>> +				  struct sysdev_attribute *attr, char *buf)	\
>> +{										\
>> +	struct soc *soc = container_of(dev, struct soc, sysdev);		\
>> +										\
>> +	return sprintf(buf, "%s\n", soc->field);				\
>> +}										\
>> +static SYSDEV_ATTR(field, 0444, print_socs_##field, NULL);			\
> 
> At first glance this looks like two functions with the same name because
> of the identical print_socs##field bit. I intuitively expect field to
> just be the name of the sysfs file.
> 
>> +print_u64_attr(id)
>> +print_str_attr(arch)
>> +print_str_attr(family)
>> +print_str_attr(model)
>> +print_str_attr(revision)
> 
> These should have semicolons at the end (drop the final one from the
> macro name). Also I think the names should be in caps and should be
> renamed to better reflect what the do, i.e. SOC_SYSFS_U64_ATTR and
> SOC_SYSFS_STRING_ATTR.
> 
>> +static char *arch_current = "current";
> 
> Should be const.
> 
>> +/*
>> + * register_soc - Setup a sysfs device for a SOC.
>> + *
>> + * Initialize and register the SOC device.
>> + */
>> +int register_soc(struct soc *soc)
>> +{
> 
> This name implies that it does much more than just adding some sysfs
> files :-).
> 
>> +	int err;
>> +
>> +	if (!soc)
>> +		return -EINVAL;
> 
> Wouldn't bother with this check. Just crash so that we can catch buggy code.
> 
>> +	soc->sysdev.id = nb_socs;
>> +	soc->sysdev.cls = &soc_sysdev_class;
>> +
>> +	if (!soc->arch)
>> +		soc->arch = arch_current;
>> +
>> +	err = sysdev_register(&soc->sysdev);
>> +
>> +	if (err)
>> +		return err;
> 
> Why all the additional whitespace?
>> +
>> +	err = sysdev_create_file(&soc->sysdev, &attr_arch);
>> +
>> +	if (err)
>> +		goto end;
> 
> You can use sysfs_create_group to register a bunch of files which will
> greatly simply the code here.
> 
>> +	err = sysdev_create_file(&soc->sysdev, &attr_family);
>> +
>> +	if (err)
>> +		goto end0;
>> +
>> +	err = sysdev_create_file(&soc->sysdev, &attr_model);
>> +
>> +	if (err)
>> +		goto end1;
>> +
>> +	err = sysdev_create_file(&soc->sysdev, &attr_id);
>> +
>> +	if (err)
>> +		goto end2;
>> +
>> +	err = sysdev_create_file(&soc->sysdev, &attr_revision);
>> +
>> +	if (err)
>> +		goto end3;
>> +
>> +	nb_socs++;
> 
> If there is more than one SoC (SMP machine?) then how do you guarantee
> the order of registration? Should the registration function take id as a
> parameter?
> 
>> +	return 0;
>> +
>> +end3:
>> +	sysdev_remove_file(&soc->sysdev, &attr_id);
>> +end2:
>> +	sysdev_remove_file(&soc->sysdev, &attr_model);
>> +end1:
>> +	sysdev_remove_file(&soc->sysdev, &attr_family);
>> +end0:
>> +	sysdev_remove_file(&soc->sysdev, &attr_arch);
>> +end:
>> +	sysdev_unregister(&soc->sysdev);
>> +
>> +	return err;
>> +}
> 
> EXPORT_SYMBOL(register_soc)?
> 
>> +
>> +int __init soc_dev_init(void)
>> +{
>> +	nb_socs = 0;
>> +
>> +	return sysdev_class_register(&soc_sysdev_class);
>> +}
> 
> EXPORT_SYMBOL(soc_dev_init)?
> 
>> diff --git a/include/linux/soc.h b/include/linux/soc.h
>> new file mode 100644
>> index 0000000..55e6ea2
>> --- /dev/null
>> +++ b/include/linux/soc.h
>> @@ -0,0 +1,27 @@
>> +/*
>> + * include/linux/soc.h - generic soc definition
>> + *
>> + * Copyright (C) 2010 Jean-Chrisotphe PLAGNIOL-VILLARD * <plagnioj@jcrosoft.com>
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + */
>> +#ifndef _LINUX_SOC_H_
>> +#define _LINUX_OSC_H_
>> +
>> +#include <linux/sysdev.h>
>> +
>> +struct soc {
>> +	u64 id;
>> +	char *arch;
>> +	char *family;
>> +	char *model;
>> +	char *revision;
>> +	struct sys_device sysdev;
>> +};
>> +
>> +extern int register_soc(struct soc *soc);
>> +extern struct sysdev_class soc_sysdev_class;
>> +
>> +#endif /* _LINUX_SOC_H_ */
> 
> ~Ryan
> 


-- 
Bluewater Systems Ltd - ARM Technology Solution Centre

Ryan Mallon         		5 Amuri Park, 404 Barbadoes St
ryan@bluewatersys.com         	PO Box 13 889, Christchurch 8013
http://www.bluewatersys.com	New Zealand
Phone: +64 3 3779127		Freecall: Australia 1800 148 751
Fax:   +64 3 3779135			  USA 1800 261 2934

^ permalink raw reply

* [PATCH 00/16 v5] pramfs: persistent and protected RAM filesystem
From: Marco Stornelli @ 2010-12-16 17:59 UTC (permalink / raw)
  To: Linux Kernel; +Cc: Linux Embedded, Linux FS Devel, Tim Bird, Andrew Morton

Hi all,

fifth round for the patch series. I summarize here the changes to
improve the review:

v5:
- removed the changelog from documentation file
- added the function pram_check_flags in the file operations
- added a check when the user uses XIP and the blocksize is different
from page size
- added i_meta_mutex to avoid race conditions in the inode update
path
- changed the lock policy during mem{lock|unlock} operations
- replaced bitmap_set with bitmap_fill
- added mount options xip, acl, noacl, noprotect, user_xattr and
nouser_xattr as required by Paul Maundt
- inserted pr_fmt in pram.h
- inserted macros IF2DT and DT2IF
- removed file name description from each file
- used min_t instead of using own check in pram_add_link()

v4:
- in bitmap init used already present bitmap_set function
- fix a possible memory leak in an error path reported by yidong zhang

v3:
- fix a possible memory leak in an error path reported by yidong zhang
- fix a warning when using XIP about not used __pram_mmap symbol
- fix test module header description and replaced TEST_MODULE with
PRAMFS_TEST_MODULE in the Kconfig and Makefile according to the comments
done by Randy Dunlap
- fix a compilation warning in super.c reported by James Hogan
- fix a compilation error when XIP was enabled
- removed not used symbol PRAM_XATTR_INDEX_LUSTRE
- fix some comment style issue

v2:
- fix documentation errors reported by Randy Dunlap and Kieran Bingham
- reworked memory write protection functions with the suggestions of
Andi Kleen

v1:
- first draft

Marco

^ permalink raw reply

* [PATCH 01/16 v5] pramfs: documentation
From: Marco Stornelli @ 2010-12-16 17:59 UTC (permalink / raw)
  To: Linux Kernel; +Cc: Linux Embedded, Linux FS Devel, Tim Bird, Andrew Morton

From: Marco Stornelli <marco.stornelli@gmail.com>

Documentation for PRAMFS.

Signed-off-by: Marco Stornelli <marco.stornelli@gmail.com>
---
diff -Nurp linux-2.6.36-orig/Documentation/filesystems/pramfs.txt linux-2.6.36/Documentation/filesystems/pramfs.txt
--- linux-2.6.36-orig/Documentation/filesystems/pramfs.txt	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.36/Documentation/filesystems/pramfs.txt	2010-12-15 19:41:58.000000000 +0100
@@ -0,0 +1,179 @@
+
+PRAMFS Overview
+===============
+
+Many embedded systems have a block of non-volatile RAM separate from
+normal system memory, i.e. of which the kernel maintains no memory page
+descriptors. For such systems it would be beneficial to mount a
+fast read/write filesystem over this "I/O memory", for storing frequently
+accessed data that must survive system reboots and power cycles. An
+example usage might be system logs under /var/log, or a user address
+book in a cell phone or PDA.
+
+Linux traditionally had no support for a persistent, non-volatile RAM-based
+filesystem, persistent meaning the filesystem survives a system reboot
+or power cycle intact. The RAM-based filesystems such as tmpfs and ramfs
+have no actual backing store but exist entirely in the page and buffer
+caches, hence the filesystem disappears after a system reboot or
+power cycle.
+
+A relatively straightforward solution is to write a simple block driver
+for the non-volatile RAM, and mount over it any disk-based filesystem such
+as ext2, ext3, ext4, etc.
+
+But the disk-based fs over non-volatile RAM block driver approach has
+some drawbacks:
+
+1. Complexity of disk-based fs: disk-based filesystems such as ext2/ext3/ext4
+   were designed for optimum performance on spinning disk media, so they
+   implement features such as block groups, which attempts to group inode data
+   into a contiguous set of data blocks to minimize disk seeking when accessing
+   files. For RAM there is no such concern; a file's data blocks can be
+   scattered throughout the media with no access speed penalty at all. So block
+   groups in a filesystem mounted over RAM just adds unnecessary
+   complexity. A better approach is to use a filesystem specifically
+   tailored to RAM media which does away with these disk-based features.
+   This increases the efficient use of space on the media, i.e. more
+   space is dedicated to actual file data storage and less to meta-data
+   needed to maintain that file data.
+
+2. Different problems between disks and RAM: Because PRAMFS attempts to avoid
+   filesystem corruption caused by kernel bugs, dirty pages in the page cache
+   are not allowed to be written back to the backing-store RAM. This way, an
+   errant write into the page cache will not get written back to the filesystem.
+   However, if the backing-store RAM is comparable in access speed to system
+   memory, the penalty of not using caching is minimal. With this consideration
+   it's better to move file data directly between the user buffers and the backing
+   store RAM, i.e. use direct I/O. This prevents the unnecessary populating of
+   the page cache with dirty pages. However direct I/O has to be enabled at
+   every file open. To enable direct I/O at all times for all regular files
+   requires either that applications be modified to include the O_DIRECT flag on
+   all file opens, or that the filesystem used performs direct I/O by default.
+
+The Persistent/Protected RAM Special Filesystem (PRAMFS) is a read/write
+filesystem that has been designed to address these issues. PRAMFS is targeted
+to fast I/O memory, and if the memory is non-volatile, the filesystem will be
+persistent.
+
+In PRAMFS, direct I/O is enabled across all files in the filesystem, in other
+words the O_DIRECT flag is forced on every open of a PRAMFS file. Also, file
+I/O in the PRAMFS is always synchronous. There is no need to block the current
+process while the transfer to/from the PRAMFS is in progress, since one of
+the requirements of the PRAMFS is that the filesystem exists in fast RAM. So
+file I/O in PRAMFS is always direct, synchronous, and never blocks.
+
+The data organization in PRAMFS can be thought of as an extremely simplified
+version of ext2, such that the ratio of data to meta-data is very high.
+
+PRAMFS supports the execute-in-place. With XIP, instead of keeping data in the
+page cache, the need to have a page cache copy is eliminated completely.
+Read&write type operations are performed directly from/to the memory. For file
+mappings, the RAM itself is mapped directly into userspace. XIP, in addition,
+speed up the applications start-up time because it removes the needs of any
+copies.
+
+PRAMFS is write protected. The page table entries that map the backing-store
+RAM are normally marked read-only. Write operations into the filesystem
+temporarily mark the affected pages as writeable, the write operation is
+carried out with locks held, and then the page table entries is
+marked read-only again.
+This feature provides protection against filesystem corruption caused by errant
+writes into the RAM due to kernel bugs for instance. In case there are systems
+where the write protection is not possible (for instance the RAM cannot be
+mapped with page tables), this feature can be disabled via the
+CONFIG_PRAMFS_WRITE_PROTECT config option.
+
+PRAMFS supports extended attributes, ACLs and security labels.
+
+In summary, PRAMFS is a light-weight, space-efficient special filesystem that
+is ideal for systems with a block of fast non-volatile RAM that need to access
+data on it using a standard filesytem interface.
+
+Supported mount options
+=======================
+
+The PRAMFS currently requires one mount option, and there are several
+optional mount options:
+
+physaddr=	Required. It tells PRAMFS the physical address of the
+		start of the RAM that makes up the filesystem. The
+		physical address must be located on a page boundary.
+
+init=		Optional. It is used to initialize the memory to an
+		empty filesystem. Any data in an existing filesystem
+		will be lost if this option is given. The parameter to
+		"init=" is the RAM in kilo/mega/giga bytes.
+
+bs=		Optional. It is used to specify a block size. It is
+		ignored if the "init=" option is not specified, since
+		otherwise the block size is read from the PRAMFS
+		super-block. The default blocksize is 2048 bytes,
+		and the allowed block sizes are 512, 1024, 2048, and
+		4096.
+
+bpi=		Optional. It is used to specify the bytes per inode
+		ratio, i.e. for every N bytes in the filesystem, an
+		inode will be created. This behaves the same as the "-i"
+		option to mke2fs. It is ignored if the "init=" option is
+		not specified.
+
+N=		Optional. It is used to specify the number of inodes to
+		allocate in the inode table. If the option is not
+		specified, the bytes-per-inode ratio is used to
+		calculate the number of inodes. If neither the "N=" or
+		"bpi=" options are specified, the default behavior is to
+		reserve 5% of the total space in the filesystem for the
+		inode table. This option behaves the same as the "-N"
+		option to mke2fs. It is ignored if the "init=" option is
+		not specified.
+
+errors=		Optional. It can be "cont", "remount-ro" and "panic". With the
+		first value no action is done in case of error. With the second
+		one the fs is mounted read-only. with the third one a kernel
+		panic happens. Default action is to continue on error.
+
+acl,noacl	Optional. Enable/disable the support for access control lists
+		(disabled by default).
+
+user_xattr,	Optional. Enable/disable the support for the user extended
+user_noxattr	attributes (disabled by default).
+
+noprotect	Optional. Disable the memory protection (enabled by default).
+
+xip		Optional. Enable the execute-in-place (disabled by default).
+
+Examples:
+
+mount -t pramfs -o physaddr=0x20000000,init=1M,bs=1k none /mnt/pram
+
+This example locates the filesystem at physical address 0x20000000, and
+also requests an empty filesystem be initialized, of total size of one
+megabyte and blocksize of one kilobyte. The mount point is /mnt/pram.
+
+mount -t pramfs -o physaddr=0x20000000 none /mnt/pram
+
+This example locates the filesystem at physical address 0x20000000 as in
+the first example, but uses the intact filesystem that already exists.
+
+Current Limitations
+===================
+
+- The RAM used for PRAMFS must be directly addressable.
+
+- PRAMFS does not support hard links.
+
+- PRAMFS supports only private memory mappings. This allows most
+  executables to run, but programs that attempt shared memory
+  mappings, such as X apps that use X shared memory, will fail.
+
+- PRAMFS does not support quota settings.
+
+Further Documentation
+=====================
+
+If you are interested in the internal design of PRAMFS, there is
+documentation available at the Sourceforge PRAMFS home page at
+http://pramfs.sourceforge.net/.
+
+Please send bug reports/comments/feedback to the pramfs development
+list at sourceforge: pramfs-devel@lists.sourceforge.net.
diff -Nurp linux-2.6.36-orig/Documentation/filesystems/xip.txt linux-2.6.36/Documentation/filesystems/xip.txt
--- linux-2.6.36-orig/Documentation/filesystems/xip.txt	2010-10-20 22:30:22.000000000 +0200
+++ linux-2.6.36/Documentation/filesystems/xip.txt	2010-10-31 09:25:05.000000000 +0100
@@ -49,6 +49,8 @@ This address space operation is mutually
 do page cache read/write operations.
 The following filesystems support it as of today:
 - ext2: the second extended filesystem, see Documentation/filesystems/ext2.txt
+- pramfs: persistent and protected RAM filesystem, see
+  Documentation/filesystems/pramfs.txt
 
 A set of file operations that do utilize get_xip_page can be found in
 mm/filemap_xip.c . The following file operation implementations are provided:

^ permalink raw reply

* [PATCH 02/16 v6] pramfs: super operations
From: Marco Stornelli @ 2010-12-16 17:59 UTC (permalink / raw)
  To: Linux Kernel; +Cc: Linux FS Devel, Andrew Morton, Tim Bird, Linux Embedded

From: Marco Stornelli <marco.stornelli@gmail.com>

Super block operations.

Signed-off-by: Marco Stornelli <marco.stornelli@gmail.com>
---
diff -Nurp linux-2.6.36-orig/fs/pramfs/super.c linux-2.6.36/fs/pramfs/super.c
--- linux-2.6.36-orig/fs/pramfs/super.c	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.36/fs/pramfs/super.c	2010-12-12 18:30:50.000000000 +0100
@@ -0,0 +1,932 @@
+/*
+ * BRIEF DESCRIPTION
+ *
+ * Super block operations.
+ *
+ * Copyright 2009-2010 Marco Stornelli <marco.stornelli@gmail.com>
+ * Copyright 2003 Sony Corporation
+ * Copyright 2003 Matsushita Electric Industrial Co., Ltd.
+ * 2003-2004 (c) MontaVista Software, Inc. , Steve Longerbeam
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/parser.h>
+#include <linux/vfs.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/mm.h>
+#include <linux/ctype.h>
+#include <linux/bitops.h>
+#include <linux/magic.h>
+#include <linux/exportfs.h>
+#include <linux/random.h>
+#include "xattr.h"
+#include "pram.h"
+
+static struct super_operations pram_sops;
+static const struct export_operations pram_export_ops;
+static struct kmem_cache *pram_inode_cachep;
+
+#ifdef CONFIG_PRAMFS_TEST
+static void *first_pram_super;
+
+struct pram_super_block *get_pram_super(void)
+{
+	return (struct pram_super_block *)first_pram_super;
+}
+EXPORT_SYMBOL(get_pram_super);
+#endif
+
+void pram_error_mng(struct super_block * sb, const char * fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	printk(KERN_ERR "pramfs error: ");
+	vprintk(fmt, args);
+	printk("\n");
+	va_end(args);
+
+	if (test_opt(sb, ERRORS_PANIC))
+		panic("pramfs: panic from previous error\n");
+	if (test_opt(sb, ERRORS_RO)) {
+		printk(KERN_CRIT "pramfs error: remounting filesystem read-only");
+		sb->s_flags |= MS_RDONLY;
+	}
+}
+
+static void pram_set_blocksize(struct super_block *sb, unsigned long size)
+{
+	int bits;
+
+	/*
+	 * We've already validated the user input and the value here must be
+	 * between PRAM_MAX_BLOCK_SIZE and PRAM_MIN_BLOCK_SIZE
+	 * and it must be a power of 2.
+	 */
+	bits = fls(size) - 1;
+	sb->s_blocksize_bits = bits;
+	sb->s_blocksize = (1<<bits);
+}
+
+static inline void *pram_ioremap(phys_addr_t phys_addr, ssize_t size, bool protect)
+{
+	void *retval;
+
+	/*
+	 * NOTE: Userland may not map this resource, we will mark the region so
+	 * /dev/mem and the sysfs MMIO access will not be allowed. This
+	 * restriction depends on STRICT_DEVMEM option. If this option is
+	 * disabled or not available we mark the region only as busy.
+	 */
+	retval = request_mem_region_exclusive(phys_addr, size, "pramfs");
+	if (!retval)
+		goto fail;
+
+	retval = ioremap_nocache(phys_addr, size);
+
+	if (retval && protect)
+		pram_writeable(retval, size, 0);
+fail:
+	return retval;
+}
+
+static loff_t pram_max_size(int bits)
+{
+	loff_t res;
+	res = (1ULL << (3*bits - 6)) - 1;
+
+	if (res > MAX_LFS_FILESIZE)
+		res = MAX_LFS_FILESIZE;
+
+	pram_info("max file size %llu bytes", res);
+	return res;
+}
+
+enum {
+	Opt_addr, Opt_bpi, Opt_size,
+	Opt_num_inodes, Opt_mode, Opt_uid,
+	Opt_gid, Opt_blocksize, Opt_user_xattr,
+	Opt_nouser_xattr, Opt_noprotect,
+	Opt_acl, Opt_noacl, Opt_xip,
+	Opt_err_cont, Opt_err_panic, Opt_err_ro,
+	Opt_err
+};
+
+static const match_table_t tokens = {
+	{Opt_bpi,		"physaddr=%x"},
+	{Opt_bpi,		"bpi=%u"},
+	{Opt_size,		"init=%s"},
+	{Opt_num_inodes,	"N=%u"},
+	{Opt_mode,		"mode=%o"},
+	{Opt_uid,		"uid=%u"},
+	{Opt_gid,		"gid=%u"},
+	{Opt_blocksize,		"bs=%s"},
+	{Opt_user_xattr,	"user_xattr"},
+	{Opt_user_xattr,	"nouser_xattr"},
+	{Opt_noprotect,		"noprotect"},
+	{Opt_acl,		"acl"},
+	{Opt_acl,		"noacl"},
+	{Opt_xip,		"xip"},
+	{Opt_err_cont,		"errors=continue"},
+	{Opt_err_panic,		"errors=panic"},
+	{Opt_err_ro,		"errors=remount-ro"},
+	{Opt_err,		NULL},
+};
+
+static phys_addr_t get_phys_addr(void **data)
+{
+	phys_addr_t phys_addr;
+	char *options = (char *) *data;
+
+	if (!options || strncmp(options, "physaddr=", 9) != 0)
+		return (phys_addr_t)ULLONG_MAX;
+	options += 9;
+	phys_addr = (phys_addr_t)simple_strtoull(options, &options, 0);
+	if (*options && *options != ',') {
+		printk(KERN_ERR "Invalid phys addr specification: %s\n",
+		       (char *) *data);
+		return (phys_addr_t)ULLONG_MAX;
+	}
+	if (phys_addr & (PAGE_SIZE - 1)) {
+		printk(KERN_ERR "physical address 0x%16llx for pramfs isn't "
+			  "aligned to a page boundary\n",
+			  (u64)phys_addr);
+		return (phys_addr_t)ULLONG_MAX;
+	}
+	if (*options == ',')
+		options++;
+	*data = (void *) options;
+	return phys_addr;
+}
+
+static int pram_parse_options(char *options, struct pram_sb_info *sbi, bool remount)
+{
+	char *p, *rest;
+	substring_t args[MAX_OPT_ARGS];
+	int option;
+
+	if (!options)
+		return 0;
+
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token;
+		if (!*p)
+			continue;
+
+		token = match_token(p, tokens, args);
+		switch (token) {
+		case Opt_addr:
+			if (remount)
+				goto bad_opt;
+			/* physaddr managed in get_phys_addr() */
+			break;
+		case Opt_bpi:
+			if (remount)
+				goto bad_opt;
+			if (match_int(&args[0], &option))
+				goto bad_val;
+			sbi->bpi = option;
+			break;
+		case Opt_uid:
+			if (remount)
+				goto bad_opt;
+			if (match_int(&args[0], &option))
+				goto bad_val;
+			sbi->uid = option;
+			break;
+		case Opt_gid:
+			if (match_int(&args[0], &option))
+				goto bad_val;
+			sbi->gid = option;
+			break;
+		case Opt_mode:
+			if (match_octal(&args[0], &option))
+				goto bad_val;
+			sbi->mode = option & 01777U;
+			break;
+		case Opt_size:
+			if (remount)
+				goto bad_opt;
+			/* memparse() will accept a K/M/G without a digit */
+			if (!isdigit(*args[0].from))
+				goto bad_val;
+			sbi->initsize = memparse(args[0].from, &rest);
+			break;
+		case Opt_num_inodes:
+			if (remount)
+				goto bad_opt;
+			if (match_int(&args[0], &option))
+				goto bad_val;
+				sbi->num_inodes = option;
+				break;
+		case Opt_blocksize:
+			if (remount)
+				goto bad_opt;
+			/* memparse() will accept a K/M/G without a digit */
+			if (!isdigit(*args[0].from))
+				goto bad_val;
+			sbi->blocksize = memparse(args[0].from, &rest);
+			if (sbi->blocksize < PRAM_MIN_BLOCK_SIZE ||
+				sbi->blocksize > PRAM_MAX_BLOCK_SIZE ||
+				!is_power_of_2(sbi->blocksize))
+				goto bad_val;
+			break;
+		case Opt_err_panic:
+			clear_opt(sbi->s_mount_opt, ERRORS_CONT);
+			clear_opt(sbi->s_mount_opt, ERRORS_RO);
+			set_opt(sbi->s_mount_opt, ERRORS_PANIC);
+			break;
+		case Opt_err_ro:
+			clear_opt(sbi->s_mount_opt, ERRORS_CONT);
+			clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
+			set_opt(sbi->s_mount_opt, ERRORS_RO);
+			break;
+		case Opt_err_cont:
+			clear_opt(sbi->s_mount_opt, ERRORS_RO);
+			clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
+			set_opt(sbi->s_mount_opt, ERRORS_CONT);
+			break;
+#ifdef CONFIG_PRAMFS_WRITE_PROTECT
+		case Opt_noprotect:
+			if (remount)
+				goto bad_opt;
+			clear_opt(sbi->s_mount_opt, PROTECT);
+			break;
+#else
+		case Opt_noprotect:
+			pram_info("noprotect options not supported");
+			break;
+#endif
+#ifdef CONFIG_PRAMFS_XATTR
+		case Opt_user_xattr:
+			set_opt(sbi->s_mount_opt, XATTR_USER);
+			break;
+		case Opt_nouser_xattr:
+			clear_opt(sbi->s_mount_opt, XATTR_USER);
+			break;
+#else
+		case Opt_user_xattr:
+		case Opt_nouser_xattr:
+			pram_info("(no)user_xattr options not supported");
+			break;
+#endif
+#ifdef CONFIG_PRAMFS_POSIX_ACL
+		case Opt_acl:
+			set_opt(sbi->s_mount_opt, POSIX_ACL);
+			break;
+		case Opt_noacl:
+			clear_opt(sbi->s_mount_opt, POSIX_ACL);
+			break;
+#else
+		case Opt_acl:
+		case Opt_noacl:
+			pram_info("(no)acl options not supported");
+			break;
+#endif
+		case Opt_xip:
+#ifdef CONFIG_PRAMFS_XIP
+			if (remount)
+				goto bad_opt;
+			set_opt(sbi->s_mount_opt, XIP);
+			break;
+#else
+			pram_info("xip option not supported");
+			break;
+#endif
+		default: {
+			goto bad_opt;
+		}
+		}
+	}
+
+	return 0;
+
+bad_val:
+	printk(KERN_ERR "Bad value '%s' for mount option '%s'\n", args[0].from, p);
+	return -EINVAL;
+bad_opt:
+	printk(KERN_ERR "Bad mount option: \"%s\"\n", p);
+	return -EINVAL;
+}
+
+static struct pram_inode *pram_init(struct super_block *sb, unsigned long size)
+{
+	unsigned long bpi, num_inodes, bitmap_size, blocksize, num_blocks;
+	u64 bitmap_start;
+	struct pram_inode *root_i;
+	struct pram_super_block *super;
+	struct pram_sb_info *sbi = (struct pram_sb_info *)sb->s_fs_info;
+
+	pram_info("creating an empty pramfs of size %lu\n", size);
+	if (pram_is_protected(sb))
+		sbi->virt_addr = pram_ioremap(sbi->phys_addr, size, 1);
+	else
+		sbi->virt_addr = pram_ioremap(sbi->phys_addr, size, 0);
+
+	if (!sbi->virt_addr) {
+		printk(KERN_ERR "ioremap of the pramfs image failed\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+#ifdef CONFIG_PRAMFS_TEST
+	if (!first_pram_super)
+		first_pram_super = sbi->virt_addr;
+#endif
+
+	if (!sbi->blocksize)
+		blocksize = PRAM_DEF_BLOCK_SIZE;
+	else
+		blocksize = sbi->blocksize;
+
+	pram_set_blocksize(sb, blocksize);
+	blocksize = sb->s_blocksize;
+
+	if (sbi->blocksize && sbi->blocksize != blocksize)
+		sbi->blocksize = blocksize;
+
+	if (size < blocksize) {
+		printk(KERN_ERR "size smaller then block size\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (!sbi->bpi)
+		/*
+		 * default is that 5% of the filesystem is
+		 * devoted to the inode table
+		 */
+		bpi = 20 * PRAM_INODE_SIZE;
+	else
+		bpi = sbi->bpi;
+
+	if (!sbi->num_inodes)
+		num_inodes = size / bpi;
+	else
+		num_inodes = sbi->num_inodes;
+
+	/*
+	 * up num_inodes such that the end of the inode table
+	 * (and start of bitmap) is on a block boundary
+	 */
+	bitmap_start = (PRAM_SB_SIZE*2) + (num_inodes<<PRAM_INODE_BITS);
+	if (bitmap_start & (blocksize - 1))
+		bitmap_start = (bitmap_start + blocksize) &
+			~(blocksize-1);
+	num_inodes = (bitmap_start - (PRAM_SB_SIZE*2)) >> PRAM_INODE_BITS;
+
+	if (sbi->num_inodes && num_inodes != sbi->num_inodes)
+		sbi->num_inodes = num_inodes;
+
+	num_blocks = (size - bitmap_start) >> sb->s_blocksize_bits;
+
+	if (!num_blocks) {
+		printk(KERN_ERR "num blocks equals to zero\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* calc the data blocks in-use bitmap size in bytes */
+	if (num_blocks & 7)
+		bitmap_size = ((num_blocks + 8) & ~7) >> 3;
+	else
+		bitmap_size = num_blocks >> 3;
+	/* round it up to the nearest blocksize boundary */
+	if (bitmap_size & (blocksize - 1))
+		bitmap_size = (bitmap_size + blocksize) & ~(blocksize-1);
+
+	pram_info("blocksize %lu, num inodes %lu, num blocks %lu\n",
+		  blocksize, num_inodes, num_blocks);
+	pram_dbg("bitmap start 0x%08x, bitmap size %lu\n",
+		 (unsigned int)bitmap_start, bitmap_size);
+	pram_dbg("max name length %d\n", (unsigned int)PRAM_NAME_LEN);
+
+	super = pram_get_super(sb);
+	pram_memunlock_range(sb, super, bitmap_start + bitmap_size);
+
+	/* clear out super-block and inode table */
+	memset(super, 0, bitmap_start);
+	super->s_size = cpu_to_be64(size);
+	super->s_blocksize = cpu_to_be32(blocksize);
+	super->s_inodes_count = cpu_to_be32(num_inodes);
+	super->s_blocks_count = cpu_to_be32(num_blocks);
+	super->s_free_inodes_count = cpu_to_be32(num_inodes - 1);
+	super->s_bitmap_blocks = cpu_to_be32(bitmap_size >> sb->s_blocksize_bits);
+	super->s_free_blocks_count = cpu_to_be32(num_blocks - be32_to_cpu(super->s_bitmap_blocks));
+	super->s_free_inode_hint = cpu_to_be32(1);
+	super->s_bitmap_start = cpu_to_be64(bitmap_start);
+	super->s_magic = cpu_to_be16(PRAM_SUPER_MAGIC);
+	pram_sync_super(super);
+
+	root_i = pram_get_inode(sb, PRAM_ROOT_INO);
+
+	root_i->i_mode = cpu_to_be32(sbi->mode);
+	root_i->i_mode = cpu_to_be16(root_i->i_mode | S_IFDIR);
+	root_i->i_uid = cpu_to_be32(sbi->uid);
+	root_i->i_gid = cpu_to_be32(sbi->gid);
+	root_i->i_links_count = cpu_to_be16(2);
+	root_i->i_d.d_parent = cpu_to_be64(PRAM_ROOT_INO);
+	pram_sync_inode(root_i);
+
+	pram_init_bitmap(sb);
+
+	pram_memlock_range(sb, super, bitmap_start + bitmap_size);
+
+	return root_i;
+}
+
+static inline void set_default_opts(struct pram_sb_info *sbi)
+{
+	set_opt(sbi->s_mount_opt, PROTECT);
+	set_opt(sbi->s_mount_opt, ERRORS_CONT);
+}
+
+static int pram_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct pram_super_block *super, *super_redund;
+	struct pram_inode *root_i;
+	struct pram_sb_info *sbi = NULL;
+	u64 root_offset;
+	unsigned long blocksize, initsize = 0;
+	u32 random = 0;
+	int retval = -EINVAL;
+
+	BUILD_BUG_ON(sizeof(struct pram_super_block) > PRAM_SB_SIZE);
+	BUILD_BUG_ON(sizeof(struct pram_inode) > PRAM_INODE_SIZE);
+
+	sbi = kzalloc(sizeof(struct pram_sb_info), GFP_KERNEL);
+	if (!sbi)
+		return -ENOMEM;
+	sb->s_fs_info = sbi;
+
+	set_default_opts(sbi);
+
+#ifdef CONFIG_PRAMFS_XATTR
+	spin_lock_init(&sbi->desc_tree_lock);
+	sbi->desc_tree.rb_node = NULL;
+#endif
+
+	sbi->phys_addr = get_phys_addr(&data);
+	if (sbi->phys_addr == (phys_addr_t)ULLONG_MAX)
+		goto out;
+
+	get_random_bytes(&random, sizeof(u32));
+	atomic_set(&sbi->next_generation, random);
+
+	/* Init with default values */
+	sbi->mode = (S_IRWXUGO | S_ISVTX);
+	sbi->uid = current_fsuid();
+	sbi->gid = current_fsgid();
+
+	if (pram_parse_options(data, sbi, 0))
+		goto out;
+
+	if (test_opt(sb, XIP) && test_opt(sb, PROTECT)) {
+		printk(KERN_ERR "xip and protect options both enabled\n");
+		goto out;
+	}
+
+	if (test_opt(sb, XIP) && sbi->blocksize != PAGE_SIZE) {
+		printk(KERN_ERR "blocksize not equal to page size and xip enabled\n");
+		goto out;
+	}
+
+	initsize = sbi->initsize;
+
+	/* Init a new pramfs instance */
+	if (initsize) {
+		root_i = pram_init(sb, initsize);
+
+		if (IS_ERR(root_i))
+			goto out;
+
+		super = pram_get_super(sb);
+
+		goto setup_sb;
+	}
+
+	pram_dbg("checking physical address 0x%016llx for pramfs image\n",
+		   (u64)sbi->phys_addr);
+
+	/* Map only one page for now. Will remap it when fs size is known. */
+	initsize = PAGE_SIZE;
+	if (pram_is_protected(sb))
+		sbi->virt_addr = pram_ioremap(sbi->phys_addr, initsize, 1);
+	else
+		sbi->virt_addr = pram_ioremap(sbi->phys_addr, initsize, 0);
+	if (!sbi->virt_addr) {
+		printk(KERN_ERR "ioremap of the pramfs image failed\n");
+		goto out;
+	}
+
+	super = pram_get_super(sb);
+	super_redund = pram_get_redund_super(sb);
+
+	/* Do sanity checks on the superblock */
+	if (be16_to_cpu(super->s_magic) != PRAM_SUPER_MAGIC) {
+		if (be16_to_cpu(super_redund->s_magic) != PRAM_SUPER_MAGIC) {
+			if (!silent)
+				printk(KERN_ERR "Can't find a valid pramfs "
+								"partition\n");
+			goto out;
+		} else {
+			pram_warn("Error in super block: try to repair it with "
+							  "the redundant copy");
+			/* Try to auto-recover the super block */
+			memcpy(super, super_redund, PRAM_SB_SIZE);
+		}
+	}
+
+	/* Read the superblock */
+	if (pram_calc_checksum((u8 *)super, PRAM_SB_SIZE)) {
+		if (pram_calc_checksum((u8 *)super_redund, PRAM_SB_SIZE)) {
+			printk(KERN_ERR "checksum error in super block\n");
+			goto out;
+		} else {
+			pram_warn("Error in super block: try to repair it with "
+							  "the redundant copy");
+			/* Try to auto-recover the super block */
+			memcpy(super, super_redund, PRAM_SB_SIZE);
+		}
+	}
+
+	blocksize = be32_to_cpu(super->s_blocksize);
+	pram_set_blocksize(sb, blocksize);
+
+	initsize = be64_to_cpu(super->s_size);
+	pram_info("pramfs image appears to be %lu KB in size\n", initsize>>10);
+	pram_info("blocksize %lu\n", blocksize);
+
+	/* Read the root inode */
+	root_i = pram_get_inode(sb, PRAM_ROOT_INO);
+
+	/* Check that the root inode is in a sane state */
+	if (pram_calc_checksum((u8 *)root_i, PRAM_INODE_SIZE)) {
+		printk(KERN_ERR "checksum error in root inode!\n");
+		goto out;
+	}
+
+	if (be64_to_cpu(root_i->i_d.d_next)) {
+		printk(KERN_ERR "root->next not NULL??!!\n");
+		goto out;
+	}
+
+	if (!S_ISDIR(be16_to_cpu(root_i->i_mode))) {
+		printk(KERN_ERR "root is not a directory!\n");
+		goto out;
+	}
+
+	root_offset = be64_to_cpu(root_i->i_type.dir.head);
+	if (root_offset == 0)
+		pram_dbg("empty filesystem\n");
+
+	/* Remap the whole filesystem now */
+	if (pram_is_protected(sb))
+		pram_writeable(sbi->virt_addr, PAGE_SIZE, 1);
+	iounmap(sbi->virt_addr);
+	release_mem_region(sbi->phys_addr, PAGE_SIZE);
+	if (pram_is_protected(sb))
+		sbi->virt_addr = pram_ioremap(sbi->phys_addr, initsize, 1);
+	else
+		sbi->virt_addr = pram_ioremap(sbi->phys_addr, initsize, 0);
+	if (!sbi->virt_addr) {
+		printk(KERN_ERR "ioremap of the pramfs image failed\n");
+		goto out;
+	}
+	super = pram_get_super(sb);
+	root_i = pram_get_inode(sb, PRAM_ROOT_INO);
+
+#ifdef CONFIG_PRAMFS_TEST
+	if (!first_pram_super)
+		first_pram_super = sbi->virt_addr;
+#endif
+
+	/* Set it all up.. */
+ setup_sb:
+	sb->s_magic = be16_to_cpu(super->s_magic);
+	sb->s_op = &pram_sops;
+	sb->s_maxbytes = pram_max_size(sb->s_blocksize_bits);
+	sb->s_time_gran = 1;
+	sb->s_export_op = &pram_export_ops;
+	sb->s_xattr = pram_xattr_handlers;
+#ifdef	CONFIG_PRAMFS_POSIX_ACL
+	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+		(sbi->s_mount_opt & PRAM_MOUNT_POSIX_ACL) ?
+		 MS_POSIXACL : 0;
+#endif
+	sb->s_root = d_alloc_root(pram_iget(sb, PRAM_ROOT_INO));
+
+	retval = 0;
+	return retval;
+ out:
+	if (sbi->virt_addr) {
+		if (pram_is_protected(sb))
+			pram_writeable(sbi->virt_addr, initsize, 1);
+		iounmap(sbi->virt_addr);
+		release_mem_region(sbi->phys_addr, initsize);
+	}
+
+	kfree(sbi);
+	return retval;
+}
+
+int pram_statfs(struct dentry *d, struct kstatfs *buf)
+{
+	struct super_block *sb = d->d_sb;
+	struct pram_super_block *ps = pram_get_super(sb);
+
+	buf->f_type = PRAM_SUPER_MAGIC;
+	buf->f_bsize = sb->s_blocksize;
+	buf->f_blocks = be32_to_cpu(ps->s_blocks_count);
+	buf->f_bfree = buf->f_bavail = pram_count_free_blocks(sb);
+	buf->f_files = be32_to_cpu(ps->s_inodes_count);
+	buf->f_ffree = be32_to_cpu(ps->s_free_inodes_count);
+	buf->f_namelen = PRAM_NAME_LEN;
+	return 0;
+}
+
+static int pram_show_options(struct seq_file *seq, struct vfsmount *vfs)
+{
+	struct pram_sb_info *sbi = (struct pram_sb_info *)vfs->mnt_sb->s_fs_info;
+
+	seq_printf(seq, ",physaddr=0x%016llx", (u64)sbi->phys_addr);
+	if (sbi->initsize)
+		seq_printf(seq, ",init=%luk", sbi->initsize >> 10);
+	if (sbi->blocksize)
+		seq_printf(seq, ",bs=%lu", sbi->blocksize);
+	if (sbi->bpi)
+		seq_printf(seq, ",bpi=%lu", sbi->bpi);
+	if (sbi->num_inodes)
+		seq_printf(seq, ",N=%lu", sbi->num_inodes);
+	if (sbi->mode != (S_IRWXUGO | S_ISVTX))
+		seq_printf(seq, ",mode=%03o", sbi->mode);
+	if (sbi->uid != 0)
+		seq_printf(seq, ",uid=%u", sbi->uid);
+	if (sbi->gid != 0)
+		seq_printf(seq, ",gid=%u", sbi->gid);
+	if (test_opt(vfs->mnt_sb, ERRORS_RO))
+		seq_puts(seq, ",errors=remount-ro");
+	if (test_opt(vfs->mnt_sb, ERRORS_PANIC))
+		seq_puts(seq, ",errors=panic");
+#ifdef CONFIG_PRAMFS_WRITE_PROTECT
+	/* memory protection enabled by default */
+	if (!test_opt(vfs->mnt_sb, PROTECT))
+		seq_puts(seq, ",noprotect");
+#else
+	/*
+	 * If it's not compiled say to the user that there
+	 * isn't the protection.
+	 */
+	seq_puts(seq, ",noprotect");
+#endif
+
+#ifdef CONFIG_PRAMFS_XATTR
+	/* user xattr not enabled by default */
+	if (test_opt(vfs->mnt_sb, XATTR_USER))
+		seq_puts(seq, ",user_xattr");
+#endif
+
+#ifdef CONFIG_PRAMFS_POSIX_ACL
+	/* acl not enabled by default */
+	if (test_opt(vfs->mnt_sb, POSIX_ACL))
+		seq_puts(seq, ",acl");
+#endif
+
+#ifdef CONFIG_PRAMFS_XIP
+	/* xip not enabled by default */
+	if (test_opt(vfs->mnt_sb, XIP))
+		seq_puts(seq, ",xip");
+#endif
+
+	return 0;
+}
+
+int pram_remount(struct super_block *sb, int *mntflags, char *data)
+{
+	unsigned long old_sb_flags;
+	unsigned long old_mount_opt;
+	struct pram_super_block *ps;
+	struct pram_sb_info *sbi = (struct pram_sb_info *)sb->s_fs_info;
+	int ret = -EINVAL;
+
+	/* Store the old options */
+	old_sb_flags = sb->s_flags;
+	old_mount_opt = sbi->s_mount_opt;
+
+	if (pram_parse_options(data, sbi, 1))
+		goto restore_opt;
+
+	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+		((sbi->s_mount_opt & PRAM_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
+
+	if ((*mntflags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
+		ps = pram_get_super(sb);
+		pram_memunlock_super(sb, ps);
+		ps->s_mtime = cpu_to_be32(get_seconds()); /* update mount time */
+		pram_memlock_super(sb, ps);
+	}
+
+	ret = 0;
+	return ret;
+
+ restore_opt:
+	sb->s_flags = old_sb_flags;
+	sbi->s_mount_opt = old_mount_opt;
+	return ret;
+}
+
+void pram_put_super(struct super_block *sb)
+{
+	struct pram_sb_info *sbi = (struct pram_sb_info *)sb->s_fs_info;
+	struct pram_super_block *ps = pram_get_super(sb);
+	u64 size = be64_to_cpu(ps->s_size);
+
+#ifdef CONFIG_PRAMFS_TEST
+	if (first_pram_super == sbi->virt_addr)
+		first_pram_super = NULL;
+#endif
+
+	pram_xattr_put_super(sb);
+	/* It's unmount time, so unmap the pramfs memory */
+	if (sbi->virt_addr) {
+		if (pram_is_protected(sb))
+			pram_writeable(sbi->virt_addr, size, 1);
+		iounmap(sbi->virt_addr);
+		sbi->virt_addr = NULL;
+		release_mem_region(sbi->phys_addr, size);
+	}
+
+	sb->s_fs_info = NULL;
+	kfree(sbi);
+}
+
+static struct inode *pram_alloc_inode(struct super_block *sb)
+{
+	struct pram_inode_vfs *vi = (struct pram_inode_vfs *)
+				kmem_cache_alloc(pram_inode_cachep, GFP_KERNEL);
+	if (!vi)
+		return NULL;
+	vi->vfs_inode.i_version = 1;
+	return &vi->vfs_inode;
+}
+
+static void pram_destroy_inode(struct inode *inode)
+{
+	kmem_cache_free(pram_inode_cachep, PRAM_I(inode));
+}
+
+static void init_once(void *foo)
+{
+	struct pram_inode_vfs *vi = (struct pram_inode_vfs *) foo;
+
+#ifdef CONFIG_PRAMFS_XATTR
+	init_rwsem(&vi->xattr_sem);
+#endif
+	mutex_init(&vi->truncate_mutex);
+	mutex_init(&vi->i_meta_mutex);
+	inode_init_once(&vi->vfs_inode);
+}
+
+static int __init init_inodecache(void)
+{
+	pram_inode_cachep = kmem_cache_create("pram_inode_cache",
+					     sizeof(struct pram_inode_vfs),
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
+					     init_once);
+	if (pram_inode_cachep == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+static void destroy_inodecache(void)
+{
+	kmem_cache_destroy(pram_inode_cachep);
+}
+
+/*
+ * the super block writes are all done "on the fly", so the
+ * super block is never in a "dirty" state, so there's no need
+ * for write_super.
+ */
+static struct super_operations pram_sops = {
+	.alloc_inode	= pram_alloc_inode,
+	.destroy_inode	= pram_destroy_inode,
+	.write_inode	= pram_write_inode,
+	.dirty_inode	= pram_dirty_inode,
+	.evict_inode	= pram_evict_inode,
+	.put_super	= pram_put_super,
+	.statfs		= pram_statfs,
+	.remount_fs	= pram_remount,
+	.show_options	= pram_show_options,
+};
+
+static int pram_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+{
+	return get_sb_nodev(fs_type, flags, data, pram_fill_super, mnt);
+}
+
+static struct file_system_type pram_fs_type = {
+	.owner          = THIS_MODULE,
+	.name           = "pramfs",
+	.get_sb         = pram_get_sb,
+	.kill_sb        = kill_anon_super,
+};
+
+static struct inode *pram_nfs_get_inode(struct super_block *sb,
+		u64 ino, u32 generation)
+{
+	struct pram_super_block *ps = pram_get_super(sb);
+	struct inode *inode;
+
+	if (ino < PRAM_ROOT_INO)
+		return ERR_PTR(-ESTALE);
+	if (((ino - PRAM_ROOT_INO) >> PRAM_INODE_BITS) > be32_to_cpu(ps->s_inodes_count))
+		return ERR_PTR(-ESTALE);
+
+	inode = pram_iget(sb, ino);
+	if (!inode)
+		return ERR_PTR(-ESTALE);
+	if (generation && inode->i_generation != generation) {
+		/* we didn't find the right inode.. */
+		iput(inode);
+		return ERR_PTR(-ESTALE);
+	}
+	return inode;
+}
+
+static struct dentry *
+pram_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
+		   int fh_type)
+{
+	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
+				    pram_nfs_get_inode);
+}
+
+static struct dentry *
+pram_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len,
+		   int fh_type)
+{
+	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
+				    pram_nfs_get_inode);
+}
+
+static const struct export_operations pram_export_ops = {
+	.fh_to_dentry = pram_fh_to_dentry,
+	.fh_to_parent = pram_fh_to_parent,
+	.get_parent = pram_get_parent,
+};
+
+static int __init init_pram_fs(void)
+{
+	int rc = 0;
+
+	rc = init_pram_xattr();
+	if (rc)
+		return rc;
+
+	rc = init_inodecache();
+	if (rc)
+		goto out1;
+
+	rc = bdi_init(&pram_backing_dev_info);
+	if (rc)
+		goto out2;
+
+	rc = register_filesystem(&pram_fs_type);
+	if (rc)
+		goto out3;
+
+	return 0;
+
+out3:
+	bdi_destroy(&pram_backing_dev_info);
+out2:
+	destroy_inodecache();
+out1:
+	exit_pram_xattr();
+	return rc;
+}
+
+static void __exit exit_pram_fs(void)
+{
+	unregister_filesystem(&pram_fs_type);
+	bdi_destroy(&pram_backing_dev_info);
+	destroy_inodecache();
+	exit_pram_xattr();
+}
+
+MODULE_AUTHOR("Marco Stornelli <marco.stornelli@gmail.com>");
+MODULE_DESCRIPTION("Protected/Persistent RAM Filesystem");
+MODULE_LICENSE("GPL");
+
+module_init(init_pram_fs)
+module_exit(exit_pram_fs)

^ permalink raw reply

* [PATCH 03/16 v5] pramfs: inode operations
From: Marco Stornelli @ 2010-12-16 17:59 UTC (permalink / raw)
  To: Linux Kernel; +Cc: Linux Embedded, Linux FS Devel, Tim Bird, Andrew Morton

From: Marco Stornelli <marco.stornelli@gmail.com>

Inode methods (allocate/free/read/write).

Signed-off-by: Marco Stornelli <marco.stornelli@gmail.com>
---
diff -Nurp linux-2.6.36-orig/fs/pramfs/inode.c linux-2.6.36/fs/pramfs/inode.c
--- linux-2.6.36-orig/fs/pramfs/inode.c	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.36/fs/pramfs/inode.c	2010-12-15 19:24:14.000000000 +0100
@@ -0,0 +1,729 @@
+/*
+ * BRIEF DESCRIPTION
+ *
+ * Inode methods (allocate/free/read/write).
+ *
+ * Copyright 2009-2010 Marco Stornelli <marco.stornelli@gmail.com>
+ * Copyright 2003 Sony Corporation
+ * Copyright 2003 Matsushita Electric Industrial Co., Ltd.
+ * 2003-2004 (c) MontaVista Software, Inc. , Steve Longerbeam
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/fs.h>
+#include <linux/smp_lock.h>
+#include <linux/sched.h>
+#include <linux/highuid.h>
+#include <linux/quotaops.h>
+#include <linux/module.h>
+#include <linux/mpage.h>
+#include <linux/backing-dev.h>
+#include "pram.h"
+#include "xattr.h"
+#include "xip.h"
+#include "acl.h"
+
+struct backing_dev_info pram_backing_dev_info __read_mostly = {
+	.ra_pages       = 0,    /* No readahead */
+	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
+};
+
+/*
+ * allocate a data block for inode and return it's absolute blocknr.
+ * Zeroes out the block if zero set. Increments inode->i_blocks.
+ */
+static int pram_new_data_block(struct inode *inode, unsigned long *blocknr, int zero)
+{
+	int errval = pram_new_block(inode->i_sb, blocknr, zero);
+
+	if (!errval) {
+		struct pram_inode *pi = pram_get_inode(inode->i_sb,
+							inode->i_ino);
+		inode->i_blocks++;
+		pram_memunlock_inode(inode->i_sb, pi);
+		pi->i_blocks = cpu_to_be32(inode->i_blocks);
+		pram_memlock_inode(inode->i_sb, pi);
+	}
+
+	return errval;
+}
+
+/*
+ * find the offset to the block represented by the given inode's file
+ * relative block number.
+ */
+u64 pram_find_data_block(struct inode *inode, unsigned long file_blocknr)
+{
+	struct super_block *sb = inode->i_sb;
+	struct pram_inode *pi;
+	u64 *row; /* ptr to row block */
+	u64 *col; /* ptr to column blocks */
+	u64 bp = 0;
+	unsigned int i_row, i_col;
+	unsigned int N = sb->s_blocksize >> 3; /* num block ptrs per block */
+	unsigned int Nbits = sb->s_blocksize_bits - 3;
+
+	pi = pram_get_inode(sb, inode->i_ino);
+
+	i_row = file_blocknr >> Nbits;
+	i_col  = file_blocknr & (N-1);
+
+	row = pram_get_block(sb, be64_to_cpu(pi->i_type.reg.row_block));
+	if (row) {
+		col = pram_get_block(sb, be64_to_cpu(row[i_row]));
+		if (col)
+			bp = be64_to_cpu(col[i_col]);
+	}
+
+	return bp;
+}
+
+/*
+ * Free data blocks from inode in the range start <=> end
+ */
+static void __pram_truncate_blocks(struct inode *inode, loff_t start, loff_t end)
+{
+	struct super_block *sb = inode->i_sb;
+	struct pram_inode *pi = pram_get_inode(sb, inode->i_ino);
+	int N = sb->s_blocksize >> 3; /* num block ptrs per block */
+	int Nbits = sb->s_blocksize_bits - 3;
+	int first_row_index, last_row_index, i, j;
+	unsigned long blocknr, first_blocknr, last_blocknr;
+	unsigned int freed = 0;
+	u64 *row; /* ptr to row block */
+	u64 *col; /* ptr to column blocks */
+
+	if (start > end || !inode->i_blocks || !pi->i_type.reg.row_block)
+		return;
+
+	mutex_lock(&PRAM_I(inode)->truncate_mutex);
+
+	first_blocknr = (start + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
+	last_blocknr = (end + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
+	first_row_index = first_blocknr >> Nbits;
+	last_row_index  = last_blocknr >> Nbits;
+
+	row = pram_get_block(sb, be64_to_cpu(pi->i_type.reg.row_block));
+
+	for (i = first_row_index; i <= last_row_index; i++) {
+		int first_col_index = (i == first_row_index) ?
+			first_blocknr & (N-1) : 0;
+		int last_col_index = (i == last_row_index) ?
+			last_blocknr & (N-1) : N-1;
+
+		if (unlikely(!row[i]))
+			continue;
+
+		col = pram_get_block(sb, be64_to_cpu(row[i]));
+
+		for (j = first_col_index; j <= last_col_index; j++) {
+
+			if (unlikely(!col[j]))
+				continue;
+
+			blocknr = pram_get_blocknr(sb, be64_to_cpu(col[j]));
+			pram_free_block(sb, blocknr);
+			freed++;
+			pram_memunlock_block(sb, col);
+			col[j] = 0;
+			pram_memlock_block(sb, col);
+		}
+
+		if (first_col_index == 0) {
+			blocknr = pram_get_blocknr(sb, be64_to_cpu(row[i]));
+			pram_free_block(sb, blocknr);
+			pram_memunlock_block(sb, row);
+			row[i] = 0;
+			pram_memlock_block(sb, row);
+		}
+	}
+
+	inode->i_blocks -= freed;
+
+	if (start == 0) {
+		blocknr = pram_get_blocknr(sb, be64_to_cpu(pi->i_type.reg.row_block));
+		pram_free_block(sb, blocknr);
+		pram_memunlock_inode(sb, pi);
+		pi->i_type.reg.row_block = 0;
+		pram_memlock_inode(sb, pi);
+	}
+	pram_memunlock_inode(sb, pi);
+	pi->i_blocks = cpu_to_be32(inode->i_blocks);
+	pram_memlock_inode(sb, pi);
+
+	mutex_unlock(&PRAM_I(inode)->truncate_mutex);
+}
+
+static void pram_truncate_blocks(struct inode *inode, loff_t start, loff_t end)
+{
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+	      S_ISLNK(inode->i_mode)))
+		return;
+	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+		return;
+
+	__pram_truncate_blocks(inode, start, end);
+	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+	pram_update_inode(inode);
+}
+
+/*
+ * Allocate num data blocks for inode, starting at given file-relative
+ * block number. All blocks except the last are zeroed out.
+ */
+int pram_alloc_blocks(struct inode *inode, int file_blocknr, int num)
+{
+	struct super_block *sb = inode->i_sb;
+	struct pram_inode *pi = pram_get_inode(sb, inode->i_ino);
+	int N = sb->s_blocksize >> 3; /* num block ptrs per block */
+	int Nbits = sb->s_blocksize_bits - 3;
+	int first_file_blocknr;
+	int last_file_blocknr;
+	int first_row_index, last_row_index;
+	int i, j, errval;
+	unsigned long blocknr;
+	u64 *row;
+	u64 *col;
+
+	if (!pi->i_type.reg.row_block) {
+		/* alloc the 2nd order array block */
+		errval = pram_new_block(sb, &blocknr, 1);
+		if (errval) {
+			pram_dbg("failed to alloc 2nd order array block\n");
+			goto fail;
+		}
+		pram_memunlock_inode(sb, pi);
+		pi->i_type.reg.row_block = cpu_to_be64(pram_get_block_off(sb, blocknr));
+		pram_memlock_inode(sb, pi);
+	}
+
+	row = pram_get_block(sb, be64_to_cpu(pi->i_type.reg.row_block));
+
+	first_file_blocknr = file_blocknr;
+	last_file_blocknr = file_blocknr + num - 1;
+
+	first_row_index = first_file_blocknr >> Nbits;
+	last_row_index  = last_file_blocknr >> Nbits;
+
+	for (i = first_row_index; i <= last_row_index; i++) {
+		int first_col_index, last_col_index;
+
+		/*
+		 * we are starting a new row, so make sure
+		 * there is a block allocated for the row.
+		 */
+		if (!row[i]) {
+			/* allocate the row block */
+			errval = pram_new_block(sb, &blocknr, 1);
+			if (errval) {
+				pram_dbg("failed to alloc row block\n");
+				goto fail;
+			}
+			pram_memunlock_block(sb, row);
+			row[i] = cpu_to_be64(pram_get_block_off(sb, blocknr));
+			pram_memlock_block(sb, row);
+		}
+		col = pram_get_block(sb, be64_to_cpu(row[i]));
+
+		first_col_index = (i == first_row_index) ?
+			first_file_blocknr & (N-1) : 0;
+
+		last_col_index = (i == last_row_index) ?
+			last_file_blocknr & (N-1) : N-1;
+
+		for (j = first_col_index; j <= last_col_index; j++) {
+			int last_block =
+				(i == last_row_index) && (j == last_col_index);
+			if (!col[j]) {
+				errval = pram_new_data_block(inode,
+							      &blocknr,
+							      !last_block);
+				if (errval) {
+					pram_dbg("failed to alloc "
+						  "data block\n");
+					goto fail;
+				}
+				pram_memunlock_block(sb, col);
+				col[j] = cpu_to_be64(pram_get_block_off(sb, blocknr));
+				pram_memlock_block(sb, col);
+			}
+		}
+	}
+
+	errval = 0;
+ fail:
+	return errval;
+}
+
+static int pram_read_inode(struct inode *inode, struct pram_inode *pi)
+{
+	int ret = -EIO;
+
+	mutex_lock(&PRAM_I(inode)->i_meta_mutex);
+
+	if (pram_calc_checksum((u8 *)pi, PRAM_INODE_SIZE)) {
+		pram_err(inode->i_sb, "checksum error in inode %08x\n",
+			  (u32)inode->i_ino);
+		goto bad_inode;
+	}
+
+	inode->i_mode = be16_to_cpu(pi->i_mode);
+	inode->i_uid = be32_to_cpu(pi->i_uid);
+	inode->i_gid = be32_to_cpu(pi->i_gid);
+	inode->i_nlink = be16_to_cpu(pi->i_links_count);
+	inode->i_size = be32_to_cpu(pi->i_size);
+	inode->i_atime.tv_sec = be32_to_cpu(pi->i_atime);
+	inode->i_ctime.tv_sec = be32_to_cpu(pi->i_ctime);
+	inode->i_mtime.tv_sec = be32_to_cpu(pi->i_mtime);
+	inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec =
+		inode->i_ctime.tv_nsec = 0;
+	inode->i_generation = be32_to_cpu(pi->i_generation);
+
+	/* check if the inode is active. */
+	if (inode->i_nlink == 0 && (inode->i_mode == 0 || be32_to_cpu(pi->i_dtime))) {
+		/* this inode is deleted */
+		pram_dbg("read inode: inode %lu not active", inode->i_ino);
+		ret = -EINVAL;
+		goto bad_inode;
+	}
+
+	inode->i_blocks = be32_to_cpu(pi->i_blocks);
+	inode->i_ino = pram_get_inodenr(inode->i_sb, pi);
+	inode->i_mapping->a_ops = &pram_aops;
+	inode->i_mapping->backing_dev_info = &pram_backing_dev_info;
+
+	insert_inode_hash(inode);
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFREG:
+		if (pram_use_xip(inode->i_sb)) {
+			inode->i_mapping->a_ops = &pram_aops_xip;
+			inode->i_fop = &pram_xip_file_operations;
+		} else {
+			inode->i_op = &pram_file_inode_operations;
+			inode->i_fop = &pram_file_operations;
+		}
+		break;
+	case S_IFDIR:
+		inode->i_op = &pram_dir_inode_operations;
+		inode->i_fop = &pram_dir_operations;
+		break;
+	case S_IFLNK:
+		inode->i_op = &pram_symlink_inode_operations;
+		break;
+	default:
+		inode->i_size = 0;
+		init_special_inode(inode, inode->i_mode,
+				   be32_to_cpu(pi->i_type.dev.rdev));
+		break;
+	}
+
+	mutex_unlock(&PRAM_I(inode)->i_meta_mutex);
+	return 0;
+
+ bad_inode:
+	make_bad_inode(inode);
+	mutex_unlock(&PRAM_I(inode)->i_meta_mutex);
+	return ret;
+}
+
+int pram_update_inode(struct inode *inode)
+{
+	struct pram_inode *pi;
+	int retval = 0;
+
+	pi = pram_get_inode(inode->i_sb, inode->i_ino);
+	if (!pi)
+		return -EACCES;
+
+	mutex_lock(&PRAM_I(inode)->i_meta_mutex);
+
+	pram_memunlock_inode(inode->i_sb, pi);
+	pi->i_mode = cpu_to_be16(inode->i_mode);
+	pi->i_uid = cpu_to_be32(inode->i_uid);
+	pi->i_gid = cpu_to_be32(inode->i_gid);
+	pi->i_links_count = cpu_to_be16(inode->i_nlink);
+	pi->i_size = cpu_to_be32(inode->i_size);
+	pi->i_blocks = cpu_to_be32(inode->i_blocks);
+	pi->i_atime = cpu_to_be32(inode->i_atime.tv_sec);
+	pi->i_ctime = cpu_to_be32(inode->i_ctime.tv_sec);
+	pi->i_mtime = cpu_to_be32(inode->i_mtime.tv_sec);
+	pi->i_generation = cpu_to_be32(inode->i_generation);
+
+	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+		pi->i_type.dev.rdev = cpu_to_be32(inode->i_rdev);
+	
+	pram_memlock_inode(inode->i_sb, pi);
+
+	mutex_unlock(&PRAM_I(inode)->i_meta_mutex);
+	return retval;
+}
+
+/*
+ * NOTE! When we get the inode, we're the only people
+ * that have access to it, and as such there are no
+ * race conditions we have to worry about. The inode
+ * is not on the hash-lists, and it cannot be reached
+ * through the filesystem because the directory entry
+ * has been deleted earlier.
+ */
+static void pram_free_inode(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	struct pram_super_block *ps;
+	struct pram_inode *pi;
+	unsigned long inode_nr;
+
+	pram_xattr_delete_inode(inode);
+
+	lock_super(sb);
+
+	inode_nr = (inode->i_ino - PRAM_ROOT_INO) >> PRAM_INODE_BITS;
+
+	pi = pram_get_inode(sb, inode->i_ino);
+	pram_memunlock_inode(sb, pi);
+	pi->i_dtime = cpu_to_be32(get_seconds());
+	pi->i_type.reg.row_block = 0;
+	pi->i_xattr = 0;
+	pram_memlock_inode(sb, pi);
+
+	/* increment s_free_inodes_count */
+	ps = pram_get_super(sb);
+	pram_memunlock_super(sb, ps);
+	if (inode_nr < be32_to_cpu(ps->s_free_inode_hint))
+		ps->s_free_inode_hint = cpu_to_be32(inode_nr);
+	be32_add_cpu(&ps->s_free_inodes_count, 1);
+	if (be32_to_cpu(ps->s_free_inodes_count) == be32_to_cpu(ps->s_inodes_count) - 1) {
+		/* filesystem is empty */
+		pram_dbg("fs is empty!\n");
+		ps->s_free_inode_hint = cpu_to_be32(1);
+	}
+	pram_memlock_super(sb, ps);
+
+	unlock_super(sb);
+}
+
+struct inode *pram_iget(struct super_block *sb, unsigned long ino)
+{
+	struct inode *inode;
+	struct pram_inode *pi;
+	int err;
+
+	inode = iget_locked(sb, ino);
+	if (unlikely(!inode))
+		return ERR_PTR(-ENOMEM);
+	if (!(inode->i_state & I_NEW))
+		return inode;
+
+	pi = pram_get_inode(sb, ino);
+	if (!pi) {
+		err = -EACCES;
+		goto fail;
+	}
+	err = pram_read_inode(inode, pi);
+	if (unlikely(err))
+		goto fail;
+
+	unlock_new_inode(inode);
+	return inode;
+fail:
+	iget_failed(inode);
+	return ERR_PTR(err);
+}
+
+void pram_evict_inode(struct inode *inode)
+{
+	int want_delete = 0;
+
+	if (!inode->i_nlink && !is_bad_inode(inode))
+		want_delete = 1;
+
+	truncate_inode_pages(&inode->i_data, 0);
+
+	if (want_delete) {
+		/* unlink from chain in the inode's directory */
+		pram_remove_link(inode);
+		if (inode->i_blocks)
+			pram_truncate_blocks(inode, 0, inode->i_size);
+		inode->i_size = 0;
+	}
+
+	invalidate_inode_buffers(inode);
+	end_writeback(inode);
+
+	if (want_delete)
+		pram_free_inode(inode);
+}
+
+
+struct inode *pram_new_inode(struct inode *dir, int mode)
+{
+	struct super_block *sb;
+	struct pram_sb_info *sbi;
+	struct pram_super_block *ps;
+	struct inode *inode;
+	struct pram_inode *pi = NULL;
+	int i, errval;
+	ino_t ino = 0;
+
+	sb = dir->i_sb;
+	sbi = (struct pram_sb_info *)sb->s_fs_info;
+	inode = new_inode(sb);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	lock_super(sb);
+	ps = pram_get_super(sb);
+
+	if (ps->s_free_inodes_count) {
+		/* find the oldest unused pram inode */
+		for (i = be32_to_cpu(ps->s_free_inode_hint); i < be32_to_cpu(ps->s_inodes_count); i++) {
+			ino = PRAM_ROOT_INO + (i << PRAM_INODE_BITS);
+			pi = pram_get_inode(sb, ino);
+			/* check if the inode is active. */
+			if (be16_to_cpu(pi->i_links_count) == 0 &&
+			   (be16_to_cpu(pi->i_mode) == 0 ||
+			   be32_to_cpu(pi->i_dtime))) {
+				/* this inode is deleted */
+				break;
+			}
+		}
+
+		if (i >= be32_to_cpu(ps->s_inodes_count)) {
+			pram_err(sb, "s_free_inodes_count!=0 but none free!?\n");
+			errval = -ENOSPC;
+			goto fail1;
+		}
+
+		pram_dbg("allocating inode %lu\n", ino);
+		pram_memunlock_super(sb, ps);
+		be32_add_cpu(&ps->s_free_inodes_count, -1);
+		if (i < be32_to_cpu(ps->s_inodes_count)-1)
+			ps->s_free_inode_hint = cpu_to_be32(i+1);
+		else
+			ps->s_free_inode_hint = 0;
+		pram_memlock_super(sb, ps);
+	} else {
+		pram_dbg("no space left to create new inode!\n");
+		errval = -ENOSPC;
+		goto fail1;
+	}
+
+	/* chosen inode is in ino */
+
+	inode->i_ino = ino;
+	inode_init_owner(inode, dir, mode);
+	inode->i_blocks = inode->i_size = 0;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+
+	inode->i_generation = atomic_add_return(1, &sbi->next_generation);
+
+	pram_memunlock_inode(sb, pi);
+	pi->i_d.d_next = 0;
+	pi->i_d.d_prev = 0;
+	pram_memlock_inode(sb, pi);
+
+	if (insert_inode_locked(inode) < 0) {
+		errval = -EINVAL;
+		goto fail2;
+	}
+	pram_write_inode(inode, 0);
+
+	errval = pram_init_acl(inode, dir);
+	if (errval)
+		goto fail2;
+
+	errval = pram_init_security(inode, dir);
+	if (errval)
+		goto fail2;
+
+	unlock_super(sb);
+
+	return inode;
+fail2:
+	unlock_super(sb);
+	unlock_new_inode(inode);
+	iput(inode);
+	return ERR_PTR(errval);
+fail1:
+	unlock_super(sb);
+	make_bad_inode(inode);
+	iput(inode);
+	return ERR_PTR(errval);
+}
+
+int pram_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+	return pram_update_inode(inode);
+}
+
+/*
+ * dirty_inode() is called from __mark_inode_dirty()
+ */
+void pram_dirty_inode(struct inode *inode)
+{
+	pram_update_inode(inode);
+}
+
+/* pram_get_and_update_block()
+ *
+ * Look for a block. If not found it can create a new one if create is
+ * different from zero.
+ *
+ * It returns zero if plain lookup failed or blocks mapped or allocated
+ * (plain lookup failed is not an error, e.g. for holes). Minor than zero
+ * otherwise.
+ */
+int pram_get_and_update_block(struct inode *inode, sector_t iblock,
+				     struct buffer_head *bh, int create)
+{
+	struct super_block *sb = inode->i_sb;
+	unsigned int blocksize = 1 << inode->i_blkbits;
+	int err = 0;
+	u64 block;
+	void *bp;
+
+	mutex_lock(&PRAM_I(inode)->truncate_mutex);
+
+	block = pram_find_data_block(inode, iblock);
+
+	if (!block) {
+		if (!create)
+			goto out;
+
+		err = pram_alloc_blocks(inode, iblock, 1);
+		if (err)
+			goto out;
+		block = pram_find_data_block(inode, iblock);
+		if (!block) {
+			err = -EIO;
+			goto out;
+		}
+		set_buffer_new(bh);
+	}
+
+	bh->b_blocknr = block;
+	set_buffer_mapped(bh);
+
+	/* now update the buffer synchronously */
+	bp = pram_get_block(sb, block);
+	if (buffer_new(bh)) {
+		pram_memunlock_block(sb, bp);
+		memset(bp, 0, blocksize);
+		pram_memlock_block(sb, bp);
+		memset(bh->b_data, 0, blocksize);
+	} else {
+		memcpy(bh->b_data, bp, blocksize);
+	}
+
+	set_buffer_uptodate(bh);
+
+ out:
+	mutex_unlock(&PRAM_I(inode)->truncate_mutex);
+	return err;
+}
+
+/*
+ * Called to zeros out a single block. It's used in the "resize"
+ * to avoid to keep data in case the file grow up again.
+ */
+static int pram_clear_block(struct inode *inode, loff_t newsize)
+{
+	pgoff_t index = newsize >> PAGE_CACHE_SHIFT;
+	unsigned long offset = newsize & (PAGE_CACHE_SIZE - 1);
+	unsigned long blocksize, length;
+	sector_t iblock;
+	u64 blockoff;
+	char *bp;
+	int ret = 0;
+
+	blocksize = 1 << inode->i_blkbits;
+	length = offset & (blocksize - 1);
+
+	/* Block boundary ? */
+	if (!length)
+		goto out;
+
+	length = blocksize - length;
+	iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+	mutex_lock(&PRAM_I(inode)->truncate_mutex);
+	blockoff = pram_find_data_block(inode, iblock);
+
+	/* Hole ? */
+	if (!blockoff)
+		goto out_unlock;
+
+	bp = pram_get_block(inode->i_sb, blockoff);
+	if (!bp) {
+		ret = -EACCES;
+		goto out_unlock;
+	}
+	pram_memunlock_block(inode->i_sb, bp);
+	memset(bp + offset, 0, length);
+	pram_memlock_block(inode->i_sb, bp);
+
+out_unlock:
+	mutex_unlock(&PRAM_I(inode)->truncate_mutex);
+out:
+	return ret;
+}
+
+static int pram_setsize(struct inode *inode, loff_t newsize)
+{
+	int ret = 0;
+	loff_t oldsize;
+
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+	    S_ISLNK(inode->i_mode)))
+		return -EINVAL;
+	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+		return -EPERM;
+
+	if (mapping_is_xip(inode->i_mapping))
+		ret = xip_truncate_page(inode->i_mapping, newsize);
+	else
+		ret = pram_clear_block(inode, newsize);
+	if (ret)
+		return ret;
+
+	oldsize = inode->i_size;
+	i_size_write(inode, newsize);
+	__pram_truncate_blocks(inode, newsize, oldsize);
+	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+	pram_update_inode(inode);
+
+	return ret;
+}
+
+int pram_notify_change(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+	int error;
+
+	error = inode_change_ok(inode, attr);
+	if (error)
+		return error;
+
+	if (attr->ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
+		error = pram_setsize(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
+	setattr_copy(inode, attr);
+	if (attr->ia_valid & ATTR_MODE)
+		error = pram_acl_chmod(inode);
+	error = pram_update_inode(inode);
+
+	return error;
+}
+
+struct address_space_operations pram_aops = {
+	.readpage	= pram_readpage,
+	.direct_IO	= pram_direct_IO,
+};
+
+struct address_space_operations pram_aops_xip = {
+	.get_xip_mem	= pram_get_xip_mem,
+};

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox