[2/2] The sparse byte code writer.

linux-sparse.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [2/2] The sparse byte code writer.
@ 2008-09-04  5:34 Christopher Li
  0 siblings, 0 replies; only message in thread
From: Christopher Li @ 2008-09-04  5:34 UTC (permalink / raw)
  To: Linux-Sparse, Alexey Zaytsev, Josh Triplett

[-- Attachment #1: Type: text/plain, Size: 2219 bytes --]

To test the writer:

$ ./test-write linearize.c
export syms 29 internal 72 import 47
struct instruction * size 349524
struct entrypoint * size 2280
struct basic_block * size 39248
pseudo_t size 118632
struct pseudo_user * size 0
struct asm_constraint * size 0
struct symbol * size 358608
struct asm_rules * size 0
struct multijmp * size 1344
struct expression * size 4064
struct ident * size 12596
struct string * size 0
char * size 0
char * (stream) size 191
struct ptr_list * size 114948
toalsize 995359

It generate linearize.b file.

Some internal detail:

traverse.c is responsible for traverse the pointer in an object.
It don't actually convert the pointer. It delegates the actual convertion
to the caller. The reader and writer can share the same traverse
code to convert the pointer to index, index back to pointer.

The writer use a two pass stage to write the object file.
The first pass is finding out which object need to write out to
the file. It register them into the hash table for later look up.
It add a shadow copy of the object with all the pointer inside the
object converted into index.

This is a recursive process until all the object under dependency
has been registered.

The second stage is the actual dumping of the registered file.

Compare to the one step convert and dump approach (been there
done that). The two step write back generate very nice sequential
disk IO. It is also much easier to work with. We can optionally
apply some compression method before we write it to the disk.
It can save the object file size. Currently it is the raw C structure.

Known limits:
- static string is not saved yet. The linearizer skip the initialization
  part of the storage type symbol. We should consider generate
  some initialization node for lay out the static C structure initializer.

- I think the linearized instruction does not have the full C type information
  yet.

- A few more I can't remember right now.
- Reader is half baked. Not included in this patch series. Too ugly to show
  on the list. Drop me a line and I can send you what I got. I will be very
  glad if some one want to adopt this...

Comments are welcome.

Chris

Signed-Off-By: Christopher Li <sparse@chrisli.org>

[-- Attachment #2: writer-5 --]
[-- Type: application/octet-stream, Size: 31868 bytes --]



Index: sparse/traverse.c
===================================================================
--- sparse.orig/traverse.c
+++ sparse/traverse.c
@@ -0,0 +1,307 @@
+/*
+ * traverse.c
+ *
+ * Keep track of pointers inside the objects. It can be
+ * shared by reader and writer.
+ *
+ * Copyright (C) 2008 Christopher Li.
+ *
+ * Licensed under the Open Software License version 1.1
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "allocate.h"
+#include "symbol.h"
+#include "expression.h"
+#include "linearize.h"
+#include "traverse.h"
+
+int traverse_level = 0;
+
+void inline traverse_enter(void)
+{
+	traverse_level ++;
+}
+
+void inline traverse_exit(void)
+{
+	traverse_level --;
+}
+
+const char* object_typename(int type)
+{
+	static const char *object_name_table[PTR_LAST] = {
+		[PTR_INSN] = "struct instruction *",
+		[PTR_ENTRYPOINT] = "struct entrypoint *",
+		[PTR_BASICBLOCK] = "struct basic_block *",
+		[PTR_PSEUDO] = "pseudo_t",
+		[PTR_PSEUDO_USER] = "struct pseudo_user *",
+		[PTR_ASM_CONSTRAINT] = "struct asm_constraint *",
+		[PTR_SYMBOL] = "struct symbol *",
+		[PTR_ASM_RULES] = "struct asm_rules *",
+		[PTR_EXPR] = "struct expression *",
+		[PTR_MULTIJMP] = "struct multijmp *",
+
+		[PTR_IDENT] = "struct ident *",
+		[PTR_STRING] = "struct string *",
+		[PTR_CHARSTRING] = "char *",
+		[PTR_STREAM_NAME] = "char * (stream)",
+		[PTR_LIST] = "struct ptr_list *",
+	};
+	return object_name_table[type];
+}
+
+
+void traverse_entrypoint(struct convert_op *op, void *ptr, void *data)
+{
+	struct entrypoint *ep = (struct entrypoint*) ptr;
+	traverse_enter();
+	convert_symbol(op, &ep->name, data);
+	convert_symbol_list(op, &ep->syms, data);
+	convert_pseudo_list(op, &ep->accesses, data);
+	convert_basic_block_list(op, &ep->bbs, data);
+	convert_instruction(op, &ep->entry, data);
+	traverse_exit();
+}
+
+void traverse_basic_block(struct convert_op *op, void *ptr, void *data)
+{
+	struct basic_block *bb = (struct basic_block*) ptr;
+	traverse_enter();
+	op->convert_position(&bb->pos, data);
+	convert_entrypoint(op, &bb->ep, data);
+	convert_basic_block_list(op, &bb->parents, data);
+	convert_basic_block_list(op, &bb->children, data);
+	convert_instruction_list(op, &bb->insns, data);
+	convert_pseudo_list(op, &bb->needs, data);
+	convert_pseudo_list(op, &bb->defines, data);
+	traverse_exit();
+}
+
+void traverse_pseudo(struct convert_op *op, void *ptr, void *data)
+{
+	pseudo_t pseudo = (pseudo_t) ptr;
+	traverse_enter();
+	switch(pseudo->type) {
+	case PSEUDO_SYM:
+		convert_symbol(op, &pseudo->sym, data);
+		break;
+	case PSEUDO_REG:
+	case PSEUDO_PHI:
+		op->convert_ident(&pseudo->ident, data);
+		break;
+	default:
+		break;
+	}
+	traverse_exit();
+}
+
+void traverse_symbol(struct convert_op *op, void *ptr, void *data)
+{
+	struct symbol *sym = (struct symbol*) ptr;
+	traverse_enter();
+	op->convert_position(&sym->pos, data);
+	op->convert_ident(&sym->ident, data);
+	switch(sym->type) {
+	case SYM_FN:
+		convert_symbol_list(op, &sym->arguments, data);
+		break;
+	case SYM_STRUCT:
+	case SYM_UNION:
+		convert_symbol_list(op, &sym->symbol_list, data);
+		break;
+	case SYM_ARRAY:
+		convert_expression(op, &sym->array_size, data);
+		break;
+	case SYM_BASETYPE:
+		goto done;
+	case SYM_NODE:
+	case SYM_PTR:
+	case SYM_BITFIELD:
+	case SYM_ENUM:
+		break;
+	default:
+		die("unimplement symbol %d\n", sym->type);
+		break;
+	}
+	convert_symbol(op, &sym->ctype.base_type, data);
+
+	/* sym->initializer has already been taken care by the linearize code. */
+
+	// convert_expression(op, &sym->initializer, data);
+done:
+	traverse_exit();
+}
+
+void traverse_expression(struct convert_op *op, void *ptr, void *data)
+{
+	struct expression *expr = (struct expression*) ptr;
+	traverse_enter();
+	switch(expr->type) {
+	case EXPR_SYMBOL:
+		convert_symbol(op, &expr->symbol, data);
+		op->convert_ident(&expr->symbol_name, data);
+		break;
+	case EXPR_STRING:
+		op->convert_string(&expr->string, data);
+		break;
+	case EXPR_VALUE:
+		break;
+	default:
+		die("unimplement expression %d\n", expr->type);
+		break;
+	}
+	traverse_exit();
+}
+
+
+void traverse_multijmp(struct convert_op *op, void *ptr, void *data)
+{
+	struct multijmp *mj = (struct multijmp*) ptr;
+	traverse_enter();
+	convert_basic_block(op, &mj->target, data);
+	traverse_exit();
+}
+
+void traverse_asm_constraint(struct convert_op *op, void *ptr, void *data)
+{
+	struct asm_constraint *ct = (struct asm_constraint*) ptr;
+	traverse_enter();
+	convert_pseudo(op, &ct->pseudo, data);
+	op->convert_charstring((char**)&ct->constraint, data);
+	op->convert_ident((struct ident**)&ct->ident, data);
+	traverse_exit();
+}
+
+void traverse_asm_rules(struct convert_op *op, void *ptr, void *data)
+{
+	struct asm_rules *rules = (struct asm_rules*) ptr;
+	traverse_enter();
+	convert_asm_constraint_list(op, &rules->inputs, data);
+	convert_asm_constraint_list(op, &rules->outputs, data);
+	convert_asm_constraint_list(op, &rules->clobbers, data);
+	traverse_exit();
+}
+
+void traverse_instruction(struct convert_op *op, void *ptr, void *data)
+{
+	struct instruction *insn = (struct instruction*) ptr;
+	traverse_enter();
+	op->convert_position(&insn->pos, data);
+	if (!insn->bb)
+		goto exit;
+	convert_basic_block(op, &insn->bb, data);
+	switch (insn->opcode) {
+	case OP_ENTRY:
+		convert_pseudo_list(op, &insn->arg_list, data);
+		break;
+	case OP_RET:
+		convert_pseudo(op, &insn->src, data);
+		break;
+	case OP_BR:
+		convert_pseudo(op, &insn->cond, data);
+		convert_basic_block(op, &insn->bb_true, data);
+		convert_basic_block(op, &insn->bb_false, data);
+		break;
+
+	case OP_SWITCH:
+	case OP_COMPUTEDGOTO:
+		convert_pseudo(op, &insn->cond, data);
+		convert_multijmp_list(op, &insn->multijmp_list, data);
+		break;
+
+	case OP_PHISOURCE:
+		convert_pseudo(op, &insn->target, data);
+		convert_pseudo(op, &insn->phi_src, data);
+		convert_instruction_list(op, &insn->phi_users, data);
+		break;
+
+	case OP_PHI:
+		convert_pseudo(op, &insn->target, data);
+		convert_pseudo_list(op, &insn->phi_list, data);
+		break;
+
+	case OP_LNOP:
+	case OP_SNOP:
+		convert_pseudo(op, &insn->target, data);
+		break;
+	case OP_LOAD:
+	case OP_STORE:
+		convert_pseudo(op, &insn->target, data);
+		convert_pseudo(op, &insn->src, data);
+		break;
+	case OP_CALL:
+	case OP_INLINED_CALL:
+		convert_pseudo(op, &insn->target, data);
+		convert_pseudo(op, &insn->func, data);
+		convert_pseudo_list(op, &insn->arguments, data);
+		break;
+	case OP_CAST:
+	case OP_SCAST:
+	case OP_FPCAST:
+	case OP_PTRCAST:
+		convert_pseudo(op, &insn->target, data);
+		convert_symbol(op, &insn->orig_type, data);
+		convert_pseudo(op, &insn->src, data);
+		break;
+
+	case OP_BINARY ... OP_BINARY_END:
+	case OP_BINCMP ... OP_BINCMP_END:
+		convert_pseudo(op, &insn->target, data);
+		convert_pseudo(op, &insn->src1, data);
+		convert_pseudo(op, &insn->src2, data);
+		break;
+
+	case OP_SEL:
+		convert_pseudo(op, &insn->target, data);
+		convert_pseudo(op, &insn->src1, data);
+		convert_pseudo(op, &insn->src2, data);
+		convert_pseudo(op, &insn->src3, data);
+		break;
+
+	case OP_SLICE:
+		convert_pseudo(op, &insn->target, data);
+		convert_pseudo(op, &insn->base, data);
+		break;
+
+	case OP_NOP:
+		convert_pseudo(op, &insn->target, data);
+		break;
+	case OP_NOT: case OP_NEG:
+		convert_pseudo(op, &insn->target, data);
+		convert_pseudo(op, &insn->src1, data);
+		break;
+	case OP_CONTEXT:
+		convert_expression(op, &insn->context_expr, data);
+		break;
+	case OP_RANGE:
+		convert_pseudo(op, &insn->src1, data);
+		convert_pseudo(op, &insn->src2, data);
+		convert_pseudo(op, &insn->src3, data);
+		break;
+	case OP_DEATHNOTE:
+		convert_pseudo(op, &insn->target, data);
+		break;
+	case OP_ASM:
+		op->convert_charstring((char **)&insn->string, data);
+		convert_asm_rules(op, &insn->asm_rules, data);
+		break;
+	case OP_COPY:
+		convert_pseudo(op, &insn->target, data);
+		convert_pseudo(op, &insn->src, data);
+		break;
+	case OP_SYMADDR:
+		convert_pseudo(op, &insn->target, data);
+		convert_pseudo(op, &insn->symbol, data);
+		break;
+	default:
+		die("unimplement %s\n", show_instruction(insn));
+	}
+exit:
+	traverse_exit();
+}
+
+
Index: sparse/tokenize.c
===================================================================
--- sparse.orig/tokenize.c
+++ sparse/tokenize.c
@@ -23,6 +23,7 @@
 #define EOF (-1)
 
 int input_stream_nr = 0;
+int free_stream_head = 0;	/* input_stream can relocate, using index */
 struct stream *input_streams;
 static int input_streams_allocated;
 
@@ -167,11 +168,23 @@ const char *show_token(const struct toke
 	}
 }
 
-int init_stream(const char *name, int fd, const char **next_path)
+void free_stream(int stream)
 {
-	int stream = input_stream_nr;
-	struct stream *current;
+	if (stream) {
+		input_streams[stream].fd = free_stream_head;
+		free_stream_head = stream;
+	}
+}
 
+int allocate_stream(void)
+{
+	int stream;
+	if (free_stream_head) {
+		stream = free_stream_head;
+		free_stream_head = input_streams[stream].fd;
+		return stream;
+	}
+	stream = input_stream_nr;
 	if (stream >= input_streams_allocated) {
 		int newalloc = stream * 4 / 3 + 10;
 		input_streams = realloc(input_streams, newalloc * sizeof(struct stream));
@@ -179,6 +192,15 @@ int init_stream(const char *name, int fd
 			die("Unable to allocate more streams space");
 		input_streams_allocated = newalloc;
 	}
+	input_stream_nr = stream+1;
+	return stream;
+}
+
+int init_stream(const char *name, int fd, const char **next_path)
+{
+	int stream = allocate_stream();
+	struct stream *current;
+	
 	current = input_streams + stream;
 	memset(current, 0, sizeof(*current));
 	current->name = name;
@@ -186,7 +208,6 @@ int init_stream(const char *name, int fd
 	current->next_path = next_path;
 	current->path = NULL;
 	current->constant = CONSTANT_FILE_MAYBE;
-	input_stream_nr = stream+1;
 	return stream;
 }
 
Index: sparse/symbol.h
===================================================================
--- sparse.orig/symbol.h
+++ sparse/symbol.h
@@ -107,6 +107,8 @@ extern int expand_constant_p(struct expr
 #define SYM_ATTR_NORMAL		1
 #define SYM_ATTR_STRONG		2
 
+struct entrypoint;
+
 struct symbol {
 	enum type type:8;
 	enum namespace namespace:9;
@@ -159,6 +161,7 @@ struct symbol {
 	};
 	union /* backend */ {
 		struct basic_block *bb_target;	/* label */
+		struct entrypoint *ep;		/* function */
 		void *aux;			/* Auxiliary info, e.g. backend information */
 		struct {			/* sparse ctags */
 			char kind;
Index: sparse/test-linearize.c
===================================================================
Index: sparse/Makefile
===================================================================
--- sparse.orig/Makefile
+++ sparse/Makefile
@@ -25,7 +25,7 @@ INCLUDEDIR=$(PREFIX)/include
 PKGCONFIGDIR=$(LIBDIR)/pkgconfig
 
 PROGRAMS=test-lexing test-parsing obfuscate compile graph sparse test-linearize example \
-	 test-unssa test-dissect ctags
+	 test-unssa test-dissect ctags test-write 
 
 
 INST_PROGRAMS=sparse cgcc
@@ -38,12 +38,13 @@ endif
 
 LIB_H=    token.h parse.h lib.h symbol.h scope.h expression.h target.h \
 	  linearize.h bitmap.h ident-list.h compat.h flow.h allocate.h \
-	  storage.h ptrlist.h dissect.h
+	  storage.h ptrlist.h dissect.h traverse.h bytecode.h
 
 LIB_OBJS= target.o parse.o tokenize.o pre-process.o symbol.o lib.o scope.o \
 	  expression.o show-parse.o evaluate.o expand.o inline.o linearize.o \
 	  sort.o allocate.o compat-$(OS).o ptrlist.o \
-	  flow.o cse.o simplify.o memops.o liveness.o storage.o unssa.o dissect.o
+	  flow.o cse.o simplify.o memops.o liveness.o storage.o unssa.o dissect.o \
+	  writer.o traverse.o
 
 LIB_FILE= libsparse.a
 SLIB_FILE= libsparse.so
@@ -133,6 +134,9 @@ ctags: ctags.o $(LIBS)
 c2xml: c2xml.o $(LIBS)
 	$(QUIET_LINK)$(CC) $(LDFLAGS)  -o $@ $< $(LIBS) `pkg-config --libs libxml-2.0`
 
+test-write: test-write.o $(LIBS)
+	$(CC) $(LDFLAGS) -o $@ $< $(LIBS)
+
 $(LIB_FILE): $(LIB_OBJS)
 	$(QUIET_AR)$(AR) rcs $@ $(LIB_OBJS)
 
@@ -177,6 +181,10 @@ graph.o: $(LIB_H)
 
 c2xml.o: c2xml.c $(LIB_H)
 	$(QUIET_CC)$(CC) `pkg-config --cflags libxml-2.0` -o $@ -c $(CFLAGS) $<
+traverse.o: $(LIB_H)
+writer.o: $(LIB_H)
+traverse.o: $(LIB_H)
+test-write.o: $(LIB_H)
 
 compat-linux.o: compat/strtold.c compat/mmap-blob.c \
 	$(LIB_H)
Index: sparse/token.h
===================================================================
--- sparse.orig/token.h
+++ sparse/token.h
@@ -40,7 +40,8 @@ struct stream {
 
 	/* Use these to check for "already parsed" */
 	enum constantfile constant;
-	int dirty;
+	unsigned dirty:1;
+	unsigned used:1;
 	struct ident *protect;
 	struct token *ifndef;
 	struct token *top_if;
@@ -210,4 +211,7 @@ static inline int match_ident(struct tok
 	return token->pos.type == TOKEN_IDENT && token->ident == id;
 }
 
+extern void free_stream(int stream);
+extern int allocate_stream(void);
+
 #endif
Index: sparse/lib.h
===================================================================
Index: sparse/bytecode.h
===================================================================
--- sparse.orig/bytecode.h
+++ sparse/bytecode.h
@@ -0,0 +1,44 @@
+/*
+ * bytecode.h
+ *
+ * Copyright (C) 2008 Christopher Li.
+ *
+ * Licensed under the Open Software License version 1.1
+ */
+
+#include "linux/types.h"
+#include "ptrlist.h"
+
+DECLARE_PTR_LIST(stream_list, struct stream);
+DECLARE_PTR_LIST(ident_list, struct ident);
+
+struct object_header {
+	__u32 type;
+	__u32 offset;
+	__u32 count;
+	__u32 size;
+};
+
+struct bytecode_hdr {
+	__u32 magic;
+	__u16 major_version;
+	__u16 minor_version;
+	__u32 export_symbols;		// first group # symbols in ident objects
+	__u32 internal_symbols;		// second group # symbol in ident objects
+	__u32 import_symbols;		// follow up # symbols in ident objects
+	__u32 symbol_list;
+	__u32 function_list;
+	__u16 object_headers;
+	struct object_header headers[];
+};
+
+void write_bytecode(char *src, struct symbol_list *syms);
+struct bytecode_hdr *read_bytecode(char *src);
+
+static inline void **add_ident(struct ident_list **list, struct ident *ident)
+{
+	void *p = (void*) ident;
+	if (!p)
+		return NULL;
+	return add_ptr_list((struct ptr_list**) list, p);
+}
Index: sparse/linearize.h
===================================================================
--- sparse.orig/linearize.h
+++ sparse/linearize.h
@@ -8,7 +8,10 @@
 #include "symbol.h"
 
 struct instruction;
+struct entrypoint;
+
 DECLARE_PTR_LIST(pseudo_ptr_list, pseudo_t);
+DECLARE_PTR_LIST(entrypoint_list, struct entrypoint);
 
 struct pseudo_user {
 	struct instruction *insn;
@@ -71,18 +74,17 @@ struct instruction {
 		 size:24;
 	struct basic_block *bb;
 	struct position pos;
-	union {
-		pseudo_t target;
-		pseudo_t cond;		/* for branch and switch */
-	};
+	pseudo_t target;
 	union {
 		struct /* entrypoint */ {
 			struct pseudo_list *arg_list;
 		};
 		struct /* branch */ {
+			pseudo_t cond;
 			struct basic_block *bb_true, *bb_false;
 		};
 		struct /* switch */ {
+			pseudo_t cond;
 			struct multijmp_list *multijmp_list;
 		};
 		struct /* phi_node */ {
Index: sparse/writer.c
===================================================================
--- sparse.orig/writer.c
+++ sparse/writer.c
@@ -0,0 +1,456 @@
+/*
+ * writer.c
+ *
+ * Walk the sparse symbol tree and linearized instruction. Save
+ * them into file.
+ *
+ * Copyright (C) 2008 Christopher Li.
+ *
+ * Licensed under the Open Software License version 1.1
+ */
+
+#include <unistd.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include "allocate.h"
+#include "linearize.h"
+#include "traverse.h"
+#include "token.h"
+#include "lib.h"
+#include "expression.h"
+#include "bytecode.h"
+
+struct shadow_ptr {
+	struct shadow_ptr *next;
+	void *orig;
+	int nr;
+	int size;
+};
+
+struct object {};
+
+ALLOCATOR(shadow_ptr, "shadow ptr");
+ALLOCATOR(object, "converted object");
+
+struct object_table {
+	int count;
+	int size;
+	int totalsize;
+	struct ptr_list *object_list;
+} object_table[PTR_LAST] = {
+	[PTR_INSN].size 	= sizeof(struct instruction),
+	[PTR_ENTRYPOINT].size 	= sizeof(struct entrypoint),
+	[PTR_BASICBLOCK].size	= sizeof(struct basic_block),
+	[PTR_PSEUDO].size	= sizeof(struct pseudo),
+	[PTR_PSEUDO_USER].size 	= sizeof(struct pseudo_user),
+	[PTR_ASM_CONSTRAINT].size = sizeof(struct asm_constraint),
+	[PTR_SYMBOL].size	= sizeof (struct symbol),
+	[PTR_ASM_RULES].size	= sizeof (struct asm_rules),
+	[PTR_MULTIJMP].size	= sizeof (struct multijmp),
+	[PTR_EXPR].size		= sizeof (struct expression),
+};
+
+struct program {
+	struct symbol_list *export_syms;
+	struct symbol_list *import_syms;
+	struct symbol_list *symbols;
+	struct entrypoint_list *function_list;
+};
+
+#define PTR_HASH_SIZE (38333)
+
+void *file_buffer = NULL;
+
+static struct shadow_ptr *ptr_hash_table[PTR_HASH_SIZE];
+static struct ptr_list *ptr_offset_list = NULL;
+static struct ptr_list *used_stream_name = NULL;
+
+unsigned int total_ptr = 0;
+
+// #define DEBUG_WRITE
+#ifdef DEBUG_WRITE
+static const char *indent_string = "                                                    ";
+#define dbg(fmt, ...) printf("%.*s" fmt, traverse_level*2, indent_string, ## __VA_ARGS__)
+#else
+#define dbg(fmt, ...)
+#endif
+
+static struct convert_op writer_op;
+
+static unsigned long inline ptr_hash(void *ptr)
+{
+	return ((unsigned long) ptr >> 2) % PTR_HASH_SIZE;
+}
+
+
+static inline void register_object(int type, void *object, int size, struct shadow_ptr *sp, void **pptr)
+{
+	struct object_table *entry = object_table + type;
+	if (sp && pptr)
+		*(unsigned int*) pptr = sp->nr = entry->count;
+	entry->count ++;
+	entry->totalsize += size;
+	add_ptr_list_notag(&entry->object_list, object);
+}
+
+static void writer_init_object_table(void)
+{
+	int i;
+	for (i = 0; i < PTR_LAST; i++) {
+		struct object_table *entry = object_table + i;
+		entry->count = 1;
+		entry->totalsize = 0;
+		entry->object_list = NULL;
+	}
+}
+
+static void writer_init(void)
+{
+	total_ptr = 0;
+	used_stream_name = NULL;
+	ptr_offset_list = NULL;
+	writer_init_object_table();
+}
+
+static inline struct shadow_ptr *create_shadow_pointer(void *pptr)
+{
+	void *ptr = *(void**)pptr;
+	unsigned int hash =  ptr_hash(ptr);
+	struct shadow_ptr *sp;
+
+	if (!ptr) {
+		*(unsigned int*) pptr = 0;
+		return NULL;
+	}
+	sp = ptr_hash_table[hash];
+	while (sp) {
+		if (sp->orig == ptr) {
+			*(unsigned int*) pptr = sp->nr;
+			return NULL;
+		}
+		sp = sp->next;
+	}
+
+	sp = __alloc_shadow_ptr(0);
+	sp->next = ptr_hash_table[hash];
+	ptr_hash_table[hash] = sp;
+	sp->orig = ptr;
+	return sp;
+
+}
+
+static void writer_convert_object(int type, void *pptr, int size, traverse_t traverse, void *data)
+{
+	void *ptr = *(void**)pptr;
+	struct shadow_ptr *sp = create_shadow_pointer(pptr);
+	if (sp) {
+		void *object = __alloc_object(size);
+		memcpy(object, ptr, size);
+		register_object(type, object, size, sp, (void**) pptr);
+		traverse(&writer_op, object, data);
+	}
+}
+
+static void writer_convert_ptr_list(int type, void *pptr, int size, traverse_t traverse, void *data)
+{
+	struct ptr_list *list = *(struct ptr_list**)pptr;
+	struct shadow_ptr *sp = create_shadow_pointer(pptr);
+	if (sp) {
+		int n = ptr_list_size(list);
+		int listsize = (n + 1) * sizeof (unsigned int);
+		unsigned int *cur, *list_object = (unsigned int*) __alloc_object(listsize);
+		*list_object = n;
+		cur = list_object + 1;
+		register_object(PTR_LIST, list_object, listsize, sp, (void **) pptr);
+		void *p;
+
+		FOR_EACH_PTR(list, p) {
+			if (size)
+				writer_convert_object(type, &p, size, traverse, data);
+			*cur++ = (unsigned int)p;
+		} END_FOR_EACH_PTR(p);
+	}
+}
+
+static void writer_convert_string(struct string **pptr, void *data)
+{
+	struct string *string = *pptr;
+	struct shadow_ptr *sp = create_shadow_pointer(pptr);
+	if (sp) {
+		int size = sizeof *string + string->length;
+		register_object(PTR_STRING, string, size, sp, (void**) pptr);
+	}
+}
+
+static void writer_convert_charstring(char **pptr, void *data)
+{
+	char *string = *pptr;
+	struct shadow_ptr *sp = create_shadow_pointer(pptr);
+	if (sp) {
+		int size = strlen(string) + 1;
+		register_object(PTR_CHARSTRING, string, size, sp, (void**) pptr);
+	}
+}
+
+static void writer_convert_posistion(struct position *pos, void *data)
+{
+	struct stream *stream = input_streams + pos->stream;
+	char *name = (char*) stream->name;
+	struct shadow_ptr *sp = create_shadow_pointer(&stream);
+	if (sp) {
+		register_object(PTR_STREAM_NAME, name, strlen(name) + 1, sp, (void**) &stream);
+	}
+	pos->stream = (unsigned int) stream;
+}
+
+static void writer_convert_ident(struct ident **pident, void *data)
+{
+	struct ident *ident = *pident;
+	struct shadow_ptr *sp = create_shadow_pointer(pident);
+	if (sp) {
+		int size = ident->len + sizeof *ident;
+		register_object(PTR_IDENT, ident, size, sp, (void**) pident);
+	}
+}
+
+static struct convert_op writer_op = {
+	.convert_ptr = writer_convert_object,
+	.convert_list = writer_convert_ptr_list,
+	.convert_position = writer_convert_posistion,
+	.convert_string = writer_convert_string,
+	.convert_charstring = writer_convert_charstring,
+	.convert_ident = writer_convert_ident,
+};
+
+static size_t cacluate_size(void)
+{
+	int i;
+	size_t total = 0;
+	for (i = 0; i < PTR_LAST; i++) {
+		total += object_table[i].totalsize;
+		printf("%s size %d\n", object_typename(i), object_table[i].totalsize);
+	}
+	return total;
+}
+
+static int register_symbol_idents(struct symbol_list *syms)
+{
+	struct symbol *sym;
+	int orig = object_table[PTR_IDENT].count;
+
+	FOR_EACH_PTR(syms, sym) {
+		struct ident *ident = sym->ident;
+		writer_convert_ident(&ident, NULL);
+	} END_FOR_EACH_PTR(sym);
+
+	return object_table[PTR_IDENT].count - orig;
+}
+
+static size_t write_fixsize_object(struct bytecode_hdr *hdr, int type, size_t offset)
+{
+	struct object_table *entry = object_table + type;
+	struct object_header *header = hdr->headers + type;
+	int size = entry->size;
+	void *ptr;
+	char *buffer = ((char*) hdr) +  offset;
+	
+	header->type = type;
+	header->offset = offset;
+	header->count = entry->count;
+	header->size = entry->totalsize;
+
+	offset = 0;
+	FOR_EACH_PTR(entry->object_list, ptr) {
+		memcpy(buffer + offset, ptr, size);
+		offset += size;
+	} END_FOR_EACH_PTR(ptr);
+	return offset;
+}
+
+static size_t write_ident(struct bytecode_hdr *hdr, size_t offset)
+{
+	struct object_table *entry = object_table + PTR_IDENT;
+	struct object_header *header = hdr->headers + PTR_IDENT;
+	struct ident *ident;
+	struct ident_list *list = (struct ident_list*) entry->object_list;
+	char *buffer = ((char*) hdr) +  offset;
+
+	header->type = PTR_IDENT;
+	header->offset = offset;
+	header->count = entry->count;
+	header->size = entry->totalsize;
+
+	offset = 0;
+	FOR_EACH_PTR(list, ident) {
+		memcpy(buffer + offset, ident->name, ident->len);
+		buffer[offset + ident->len] = '\0';
+		offset += ident->len + 1;
+	} END_FOR_EACH_PTR(ident);
+
+	header->size = offset;
+	return offset;
+}
+
+static size_t write_string(struct bytecode_hdr *hdr, size_t offset)
+{
+	struct object_table *entry = object_table + PTR_STRING;
+	struct object_header *header = hdr->headers + PTR_STRING;
+	void *ptr;
+	char *buffer = ((char*) hdr) +  offset;
+
+	header->type = PTR_STRING;
+	header->offset = offset;
+	header->count = entry->count;
+	header->size = entry->totalsize;
+
+	offset = 0;
+	FOR_EACH_PTR(entry->object_list, ptr) {
+		struct string *string = (struct string *) ptr;
+		int size = sizeof *string + string->length;
+		memcpy(buffer + offset, ptr, size);
+		offset += size;
+	} END_FOR_EACH_PTR(ptr);
+	return offset;
+}
+
+static size_t write_charstring(struct bytecode_hdr *hdr, int type, size_t offset)
+{
+	struct object_table *entry = object_table + type;
+	struct object_header *header = hdr->headers + type;
+	void *ptr;
+	char *buffer = ((char*) hdr) +  offset;
+
+	header->type = type;
+	header->offset = offset;
+	header->count = entry->count;
+	header->size = entry->totalsize;
+
+	offset = 0;
+	FOR_EACH_PTR(entry->object_list, ptr) {
+		int size = strlen(ptr);
+		strncpy(buffer + offset, ptr, size);
+		buffer[offset + size] = '\0';
+		offset += size + 1;
+	} END_FOR_EACH_PTR(ptr);
+	return offset;
+}
+
+static size_t write_list(struct bytecode_hdr *hdr, size_t offset)
+{
+	struct object_table *entry = object_table + PTR_LIST;
+	struct object_header *header = hdr->headers + PTR_LIST;
+	void *ptr;
+	char *buffer = ((char*) hdr) +  offset;
+
+	header->type = PTR_LIST;
+	header->offset = offset;
+	header->count = entry->count;
+	header->size = entry->totalsize;
+
+	offset = 0;
+	FOR_EACH_PTR(entry->object_list, ptr) {
+		int size = ((*(unsigned int*)ptr) + 1) * sizeof (unsigned int);
+		memcpy(buffer + offset, ptr, size);
+		offset += size;
+	} END_FOR_EACH_PTR(ptr);
+	return offset;
+}
+
+static void write_file(char *filename, struct program *prog)
+{
+	struct bytecode_hdr hdr = {}, *bhdr;
+	struct program cprog = *prog;
+	int fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, 0644);
+	size_t offset = 0, total;
+	int hdrsize = sizeof hdr + PTR_LAST * sizeof (struct object_header);
+	void *buffer;
+	int i;
+
+	if (fd < 0)
+		die("error opening file %s\n", filename);
+
+	writer_init();
+
+	hdr.export_symbols = register_symbol_idents(prog->export_syms);
+	hdr.internal_symbols = register_symbol_idents(prog->symbols);
+	hdr.import_symbols = register_symbol_idents(prog->import_syms);
+
+	printf ("export syms %d internal %d import %d\n", hdr.export_symbols, hdr.internal_symbols, hdr.import_symbols);
+
+	writer_convert_ptr_list(PTR_SYMBOL, &cprog.symbols, sizeof(struct symbol),
+				traverse_symbol, NULL);
+	writer_convert_ptr_list(PTR_ENTRYPOINT, &cprog.function_list, sizeof(struct entrypoint),
+				traverse_entrypoint, NULL);
+	hdr.symbol_list = (unsigned int) cprog.symbols;
+	hdr.function_list = (unsigned int) cprog.function_list;
+
+	total = cacluate_size() + hdrsize;
+
+	ftruncate(fd, total);
+	buffer = mmap(NULL, total , PROT_READ | PROT_WRITE,
+		      MAP_SHARED, fd, 0);
+
+	if (buffer == MAP_FAILED)
+		die("error mmaping file %s with size %lx\n", filename, (unsigned long)total);
+
+	memcpy(buffer, &hdr, sizeof hdr);
+	bhdr = (struct bytecode_hdr*) buffer;
+	offset += hdrsize;
+
+	offset += write_ident(bhdr, offset);
+	offset += write_string(bhdr, offset);
+	offset += write_charstring(bhdr, PTR_STREAM_NAME,offset);
+	offset += write_charstring(bhdr, PTR_CHARSTRING, offset);
+	offset += write_list(bhdr, offset);
+
+	for (i = 0; i < PTR_VARSIZE; i++)
+		offset += write_fixsize_object(bhdr, i, offset);
+	munmap(buffer, total);
+	ftruncate(fd, offset);
+	printf("toalsize %d\n", offset);
+	close(fd);
+}
+
+void write_bytecode(char *filename, struct symbol_list *syms)
+{
+	struct symbol *sym;
+	struct program prog = {};
+	struct entrypoint *ep;
+
+
+	prog.symbols = syms;
+	prog.import_syms = NULL;
+
+	FOR_EACH_PTR(syms, sym) {
+		expand_symbol(sym);
+		ep = linearize_symbol(sym);
+
+		if (!(sym->ctype.modifiers & MOD_STATIC))
+			add_symbol(&prog.export_syms, sym);
+
+		if (ep) {
+			struct pseudo *p;
+			struct symbol *s;
+			add_ptr_list(&prog.function_list, ep);
+			FOR_EACH_PTR(ep->accesses, p) {
+				struct ident *ident;
+				s = p->sym;
+				if (!(s->ctype.modifiers & MOD_EXTERN))
+					continue;
+				if (!(ident = s->ident))
+					continue;
+				if (find_ptr_in_list((struct ptr_list *)prog.import_syms, s))
+					continue;
+				add_symbol(&prog.import_syms, s);
+			} END_FOR_EACH_PTR(p);
+		}
+	} END_FOR_EACH_PTR(sym);
+
+	write_file(filename, &prog);
+}
+
Index: sparse/test-write.c
===================================================================
--- sparse.orig/test-write.c
+++ sparse/test-write.c
@@ -0,0 +1,54 @@
+/*
+ * Test program for dumping the sparse parsing result into
+ * bytecode files.
+ *
+ *  Copyright (C) 2008 Christopher Li.
+ *
+ * Licensed under the Open Software License version 1.1
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "lib.h"
+#include "allocate.h"
+#include "symbol.h"
+#include "expression.h"
+#include "linearize.h"
+#include "bytecode.h"
+
+struct stream_list *used_stream = NULL;
+
+static void write_one_file(char *srcfile)
+{
+	char *outfile;
+	int end;
+	struct symbol_list *syms;
+
+	outfile = strdup(srcfile);
+	if (!outfile)
+		die("out of memory %s\n", srcfile);
+	end = strlen(outfile);
+	if (end <= 2 || outfile [end - 1] != 'c')
+		die("not a c file");
+	outfile[end - 1] = 'b';
+	syms = sparse(srcfile);
+	write_bytecode(outfile, syms);
+	free(outfile);
+}
+
+int main(int argc, char **argv)
+{
+	struct string_list *filelist = NULL;
+	char *file;
+
+	sparse_initialize(argc, argv, &filelist);
+	FOR_EACH_PTR_NOTAG(filelist, file) {
+		write_one_file(file);
+	} END_FOR_EACH_PTR_NOTAG(file);
+	return 0;
+}
Index: sparse/traverse.h
===================================================================
--- sparse.orig/traverse.h
+++ sparse/traverse.h
@@ -0,0 +1,87 @@
+/*
+ * traverse.h
+ *
+ * Copyright (C) 2008 Christopher Li.
+ *
+ * Licensed under the Open Software License version 1.1
+ */
+
+#include "symbol.h"
+#include "expression.h"
+
+struct convert_op;
+
+enum ptr_type {
+	/* fix size objects */
+	PTR_INSN,
+	PTR_ENTRYPOINT,
+	PTR_BASICBLOCK,
+	PTR_PSEUDO,
+	PTR_PSEUDO_USER,
+	PTR_ASM_CONSTRAINT,
+	PTR_SYMBOL,
+	PTR_ASM_RULES,
+	PTR_MULTIJMP,
+	PTR_EXPR,
+
+	/* variable size objects */
+	PTR_VARSIZE,
+	PTR_IDENT = PTR_VARSIZE,
+	PTR_STRING,
+	PTR_CHARSTRING,
+	PTR_STREAM_NAME,
+	PTR_LIST,
+	PTR_LAST,
+};
+
+
+typedef void (*traverse_t)(struct convert_op*, void *, void *);
+
+struct convert_op {
+	void (*convert_ptr)(int type, void *ptr, int size, traverse_t func, void *data);
+	void (*convert_list)(int type, void *ptr, int size, traverse_t func, void *data);
+	void (*convert_position)(struct position*, void *data);
+	void (*convert_string)(struct string**, void *data);
+	void (*convert_charstring)(char **, void *data);
+	void (*convert_ident)(struct ident**, void *data);
+};
+
+void traverse_entrypoint(struct convert_op*, void *ptr, void *data);
+void traverse_basic_block(struct convert_op*, void *ptr, void *data);
+void traverse_multijmp(struct convert_op*, void *ptr, void *data);
+void traverse_instruction(struct convert_op*, void *ptr, void *data);
+void traverse_symbol(struct convert_op*, void *ptr, void *data);
+void traverse_pseudo(struct convert_op *op, void *ptr, void *data);
+void traverse_asm_constraint(struct convert_op*, void *ptr, void *data);
+void traverse_asm_rules(struct convert_op*, void *ptr, void *data);
+void traverse_expression(struct convert_op*, void *ptr, void *data);
+
+extern const char* object_typename(int type);
+extern int traverse_level;
+
+#define DECLARE_PTR_CONVERTOR(e, n)							\
+	static inline void convert_##n(struct convert_op *op, struct n **n, void *data)	\
+	{										\
+		op->convert_ptr(e, n, sizeof **n, traverse_##n, data);			\
+	} 										\
+
+
+#define DECLARE_LIST_CONVERTOR(e, n)							\
+	static inline void convert_##n##_list(struct convert_op *op, struct n##_list **n, void *data)\
+	{										\
+		op->convert_list(e, n, sizeof *(*n)->list[0], traverse_##n, data);	\
+	} 										\
+
+#define DECLARE_CONVERTOR(e,n)	DECLARE_PTR_CONVERTOR(e, n) DECLARE_LIST_CONVERTOR(e, n) 
+
+DECLARE_CONVERTOR(PTR_SYMBOL, symbol);
+DECLARE_CONVERTOR(PTR_EXPR, expression);
+DECLARE_CONVERTOR(PTR_ENTRYPOINT, entrypoint);
+DECLARE_CONVERTOR(PTR_PSEUDO, pseudo);
+DECLARE_CONVERTOR(PTR_BASICBLOCK, basic_block);
+DECLARE_CONVERTOR(PTR_INSN, instruction);
+DECLARE_CONVERTOR(PTR_MULTIJMP, multijmp);
+DECLARE_CONVERTOR(PTR_ASM_CONSTRAINT, asm_constraint);
+DECLARE_PTR_CONVERTOR(PTR_ASM_RULES, asm_rules);
+
+
Index: sparse/test-read.c
===================================================================
Index: sparse/linearize.c
===================================================================

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2008-09-04  5:34 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-09-04  5:34 [2/2] The sparse byte code writer Christopher Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).