* [SEMANAGE] Join: implementation
@ 2006-01-12 11:17 Ivan Gyurdiev
0 siblings, 0 replies; only message in thread
From: Ivan Gyurdiev @ 2006-01-12 11:17 UTC (permalink / raw)
To: selinux; +Cc: Stephen Smalley
[-- Attachment #1: Type: text/plain, Size: 1586 bytes --]
This patch implements a two-source database join. I am thinking that it
would be useful to have a multi-source join, but I don't know how to do
that right now, and we have no use cases for it. I think two source-join
is fine for now, and we can expand on that later. Also, technically you
could nest several joins to merge information from many sources,
although we probably need a better way to describe joint records first
(right now I have to write lots of small functions that route to the
proper record).
Anyway, this join is backed by a linked list, using inheritance - exact
same thing as database_file and database_activedb. It does all the
join-ing on cache() and all the un-joining on flush(). Cache lists
records in each component dbase, then quick-sorts them, then merges them
together. Flush writes back to its component databases, not to disk
(direct_api controls how and when that happens).
The join requires each supporting record to implement functions join()
and split() which construct a joint record from two component ones, and
split it into pieces. An interesting feature is that join() must support
one NULL component if necessary. What happens in that case is a partial
join - the missing information is left blank if optional, or is
initialized to something reasonable if it's required. Particularly, the
key is copied from the part that isn't missing. This provides a
migration path from users without prefix to users with prefix. It's
rather neat - if you have a completely different user set in each join
part, they will be combined.
[-- Attachment #2: libsemanage.join_implementation.diff --]
[-- Type: text/x-patch, Size: 10275 bytes --]
diff -Naurp --exclude pywrap-test.py --exclude user_record.h --exclude user_internal.h --exclude policy_components.c --exclude direct_api.c --exclude semanage_store.c --exclude user_base_record.c --exclude handle.h --exclude ports_local.c --exclude 'users_*' --exclude-from excludes old/libsemanage/src/database_join.c new/libsemanage/src/database_join.c
--- old/libsemanage/src/database_join.c 1969-12-31 17:00:00.000000000 -0700
+++ new/libsemanage/src/database_join.c 2006-01-12 02:44:32.000000000 -0700
@@ -0,0 +1,304 @@
+/* Copyright (C) 2005 Red Hat, Inc. */
+
+/* Object: dbase_join_t (Join)
+ * Extends: dbase_llist_t (Linked List)
+ * Implements: dbase_t (Database)
+ */
+
+struct dbase_join;
+typedef struct dbase_join dbase_t;
+#define DBASE_DEFINED
+
+#include <stdlib.h>
+
+#include "user_internal.h"
+#include "debug.h"
+#include "handle.h"
+#include "database_join.h"
+#include "database_llist.h"
+
+/* JOIN dbase */
+struct dbase_join {
+
+ /* Parent object - must always be
+ * the first field - here we are using
+ * a linked list to store the records */
+ dbase_llist_t llist;
+
+ /* Backing databases - for each
+ * thing being joined */
+ dbase_config_t* join1;
+ dbase_config_t* join2;
+
+ /* JOIN extension */
+ record_join_table_t* rjtable;
+};
+
+static int dbase_join_cache(
+ semanage_handle_t* handle,
+ dbase_join_t* dbase) {
+
+ /* Extract all the object tables information */
+ dbase_t* dbase1 = dbase->join1->dbase;
+ dbase_t* dbase2 = dbase->join2->dbase;
+ dbase_table_t* dtable1 = dbase->join1->dtable;
+ dbase_table_t* dtable2 = dbase->join2->dtable;
+ record_table_t* rtable = dbase_llist_get_rtable(&dbase->llist);
+ record_join_table_t* rjtable = dbase->rjtable;
+ record_table_t* rtable1 = dtable1->get_rtable(dbase1);
+ record_table_t* rtable2 = dtable2->get_rtable(dbase2);
+
+ record_key_t* rkey = NULL;
+ record_t* record = NULL;
+ record1_t **records1 = NULL;
+ record2_t **records2 = NULL;
+ unsigned int rcount1 = 0, rcount2 = 0, i = 0, j = 0;
+
+ /* Already cached */
+ if (dbase_llist_is_cached(&dbase->llist))
+ return STATUS_SUCCESS;
+
+ dbase_llist_cache_init(&dbase->llist);
+
+ /* First cache any child dbase, which must
+ * be the first thing done when calling dbase
+ * functions internally */
+ if (dtable1->cache(handle, dbase1) < 0)
+ goto err;
+ if (dtable2->cache(handle, dbase2) < 0)
+ goto err;
+
+ /* Fetch records */
+ if (dtable1->list(handle, dbase1, &records1, &rcount1) < 0)
+ goto err;
+ if (dtable2->list(handle, dbase2, &records2, &rcount2) < 0)
+ goto err;
+
+ /* Sort for quicker merge later */
+ qsort(records1, rcount1, sizeof(record1_t*),
+ (int (*) (const void*, const void*)) rtable1->compare2_qsort);
+ qsort(records2, rcount2, sizeof(record2_t*),
+ (int (*) (const void*, const void*)) rtable2->compare2_qsort);
+
+ /* Now merge into this dbase */
+ while (i < rcount1 || j < rcount2) {
+ int rc;
+
+ /* End of one list, or the other */
+ if (i == rcount1)
+ rc = -1;
+ else if (j == rcount2)
+ rc = 1;
+
+ /* Still more records to go, compare them */
+ else {
+ if (rtable1->key_extract(handle, records1[i], &rkey) < 0)
+ goto err;
+
+ rc = rtable2->compare(records2[j], rkey);
+
+ rtable->key_free(rkey);
+ rkey = NULL;
+ }
+
+ /* Missing record1 data */
+ if (rc < 0) {
+ if (rjtable->join(handle, NULL,
+ records2[j], &record) < 0)
+ goto err;
+ j++;
+ }
+
+ /* Missing record2 data */
+ else if (rc > 0) {
+ if (rjtable->join(handle, records1[i],
+ NULL, &record) < 0)
+ goto err;
+ i++;
+ }
+
+ /* Both records available */
+ else {
+ if (rjtable->join(handle, records1[i],
+ records2[j], &record) < 0)
+ goto err;
+
+ i++;
+ j++;
+ }
+
+ /* Add result record to database */
+ if (dbase_llist_cache_prepend(handle, &dbase->llist, record) < 0)
+ goto err;
+
+ rtable->free(record);
+ record = NULL;
+ }
+
+ dbase_llist_set_cached(&dbase->llist, 1);
+
+ for (i=0; i < rcount1; i++)
+ rtable1->free(records1[i]);
+ for (i=0; i < rcount2; i++)
+ rtable2->free(records2[i]);
+ free(records1);
+ free(records2);
+ return STATUS_SUCCESS;
+
+ err:
+ ERR(handle, "could not cache join database");
+ for (i=0; i < rcount1; i++)
+ rtable1->free(records1[i]);
+ for (i=0; i < rcount2; i++)
+ rtable2->free(records2[i]);
+ free(records1);
+ free(records2);
+ rtable->key_free(rkey);
+ rtable->free(record);
+ return STATUS_ERR;
+}
+
+/* Flush database */
+static int dbase_join_flush(
+ semanage_handle_t* handle,
+ dbase_join_t* dbase) {
+
+ /* Extract all the object tables information */
+ dbase_t* dbase1 = dbase->join1->dbase;
+ dbase_t* dbase2 = dbase->join2->dbase;
+ dbase_table_t* dtable1 = dbase->join1->dtable;
+ dbase_table_t* dtable2 = dbase->join2->dtable;
+ record_table_t* rtable = dbase_llist_get_rtable(&dbase->llist);
+ record_join_table_t* rjtable = dbase->rjtable;
+ record_table_t* rtable1 = dtable1->get_rtable(dbase1);
+ record_table_t* rtable2 = dtable2->get_rtable(dbase2);
+
+ cache_entry_t* ptr;
+ record_key_t* rkey = NULL;
+ record1_t* record1 = NULL;
+ record2_t* record2 = NULL;
+
+ /* No effect of flush */
+ if (!dbase_llist_is_cached(&dbase->llist) ||
+ !dbase_llist_is_modified(&dbase->llist))
+ return STATUS_SUCCESS;
+
+ /* First cache any dbase, (which should already be cached
+ * unless somebody did a drop_cache on the underlying
+ * databases while we were working on the join, so this probably
+ * doesn't do anything - it's just a precaution) */
+ if (dtable1->cache(handle, dbase1) < 0)
+ goto err;
+ if (dtable2->cache(handle, dbase2) < 0)
+ goto err;
+
+ /* Then clear all records from the cache.
+ * This is *not* the same as dropping the cache - it's an explicit
+ * request to delete all current records. We need to do
+ * this because we don't store delete deltas for the join,
+ * so we must re-add all records from scratch */
+ if (dtable1->del_all(handle, dbase1) < 0)
+ goto err;
+ if (dtable2->del_all(handle, dbase2) < 0)
+ goto err;
+
+ /* For each record, split, and add parts into their corresponding databases */
+ for (ptr = dbase->llist.cache_tail; ptr != NULL; ptr = ptr->prev) {
+
+ if (rtable->key_extract(handle, ptr->data, &rkey) < 0)
+ goto err;
+
+ if (rjtable->split(handle, ptr->data,
+ &record1, &record2) < 0)
+ goto err;
+
+ if (record1 && dtable1->add(handle, dbase1, rkey, record1) < 0)
+ goto err;
+
+ if (record2 && dtable2->add(handle, dbase2, rkey, record2) < 0)
+ goto err;
+
+ rtable->key_free(rkey);
+ rtable1->free(record1);
+ rtable2->free(record2);
+ rkey = NULL;
+ record1 = NULL;
+ record2 = NULL;
+ }
+
+ /* Note that this function does not flush the child databases, it
+ * leaves that decision up to higher-level code */
+
+ dbase_llist_set_modified(&dbase->llist, 0);
+ return STATUS_SUCCESS;
+
+ err:
+ ERR(handle, "could not flush join database");
+ rtable->key_free(rkey);
+ rtable1->free(record1);
+ rtable2->free(record2);
+ return STATUS_ERR;
+}
+
+int dbase_join_init(
+ semanage_handle_t* handle,
+ record_table_t* rtable,
+ record_join_table_t* rjtable,
+ dbase_config_t* join1,
+ dbase_config_t* join2,
+ dbase_t** dbase) {
+
+ dbase_join_t* tmp_dbase = malloc(sizeof(dbase_join_t));
+
+ if (!tmp_dbase)
+ goto omem;
+
+ dbase_llist_init(
+ &tmp_dbase->llist, rtable, &SEMANAGE_JOIN_DTABLE);
+
+ tmp_dbase->rjtable = rjtable;
+ tmp_dbase->join1 = join1;
+ tmp_dbase->join2 = join2;
+
+ *dbase = tmp_dbase;
+
+ return STATUS_SUCCESS;
+
+ omem:
+ ERR(handle, "out of memory, could not initialize join database");
+ free(tmp_dbase);
+ return STATUS_ERR;
+}
+
+/* Release dbase resources */
+void dbase_join_release(
+ dbase_join_t* dbase) {
+
+ dbase_llist_drop_cache(&dbase->llist);
+ free(dbase);
+}
+
+/* JOIN dbase - method table implementation */
+dbase_table_t SEMANAGE_JOIN_DTABLE = {
+
+ /* Cache/Transactions */
+ .cache = dbase_join_cache,
+ .drop_cache = (void*) dbase_llist_drop_cache,
+ .flush = dbase_join_flush,
+ .is_modified = (void*) dbase_llist_is_modified,
+
+ /* Database API */
+ .iterate = (void*) dbase_llist_iterate,
+ .exists = (void*) dbase_llist_exists,
+ .list = (void*) dbase_llist_list,
+ .add = (void*) dbase_llist_add,
+ .set = (void*) dbase_llist_set,
+ .del = (void*) dbase_llist_del,
+ .del_all = (void*) dbase_llist_del_all,
+ .modify = (void*) dbase_llist_modify,
+ .query = (void*) dbase_llist_query,
+ .count = (void*) dbase_llist_count,
+
+ /* Polymorphism */
+ .get_rtable = (void*) dbase_llist_get_rtable
+};
diff -Naurp --exclude pywrap-test.py --exclude user_record.h --exclude user_internal.h --exclude policy_components.c --exclude direct_api.c --exclude semanage_store.c --exclude user_base_record.c --exclude handle.h --exclude ports_local.c --exclude 'users_*' --exclude-from excludes old/libsemanage/src/database_join.h new/libsemanage/src/database_join.h
--- old/libsemanage/src/database_join.h 1969-12-31 17:00:00.000000000 -0700
+++ new/libsemanage/src/database_join.h 2006-01-11 21:16:05.000000000 -0700
@@ -0,0 +1,54 @@
+/* Copyright (C) 2005 Red Hat, Inc. */
+
+#ifndef _SEMANAGE_DATABASE_JOIN_INTERNAL_H_
+#define _SEMANAGE_DATABASE_JOIN_INTERNAL_H_
+
+#include "database.h"
+#include "handle.h"
+
+#ifndef DBASE_RECORD_JOIN_DEFINED
+typedef void* record1_t;
+typedef void* record2_t;
+#define DBASE_RECORD_JOIN_DEFINED
+#endif
+
+struct dbase_join;
+typedef struct dbase_join dbase_join_t;
+
+/* JOIN extension to RECORD interface - method table */
+typedef struct record_join_table {
+
+ /* Join two records together.
+ * One of the provided records could be NULL */
+ int (*join) (
+ semanage_handle_t* handle,
+ const record1_t* record1,
+ const record2_t* record2,
+ record_t** result);
+
+ /* Splits a record into two */
+ int (*split) (
+ semanage_handle_t* handle,
+ const record_t* record,
+ record1_t** split1,
+ record2_t** split2);
+
+} record_join_table_t;
+
+/* JOIN - initialization */
+extern int dbase_join_init(
+ semanage_handle_t* handle,
+ record_table_t* rtable,
+ record_join_table_t* rjtable,
+ dbase_config_t* join1,
+ dbase_config_t* join2,
+ dbase_join_t** dbase);
+
+/* FILE - release */
+extern void dbase_join_release(
+ dbase_join_t* dbase);
+
+/* JOIN - method table implementation */
+extern dbase_table_t SEMANAGE_JOIN_DTABLE;
+
+#endif
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2006-01-12 11:17 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-01-12 11:17 [SEMANAGE] Join: implementation Ivan Gyurdiev
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.