From: "Andrew Olsen via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Andrew Olsen <andrew232@gmail.com>,
Andrew Olsen <andrew.olsen@koordinates.com>
Subject: [PATCH 3/4] Sample list-object-filter extensions
Date: Sun, 05 Sep 2021 23:51:38 +0000 [thread overview]
Message-ID: <8e0d11ea53a080e8212768f370fb8f05eaded312.1630885899.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.1031.git.1630885899.gitgitgadget@gmail.com>
From: Andrew Olsen <andrew.olsen@koordinates.com>
Basic filter extension example which filters to a random subset of
blobs, and another example which shows how to do the same in C++ and
how to link in another library required by a filter extension.
Documentation changes follow.
Signed-off-by: Andrew Olsen <andrew.olsen@koordinates.com>
---
contrib/filter-extensions/rand/.gitignore | 2 +
contrib/filter-extensions/rand/Makefile | 28 +++++
contrib/filter-extensions/rand/rand.c | 103 ++++++++++++++++++
contrib/filter-extensions/rand_cpp/.gitignore | 2 +
contrib/filter-extensions/rand_cpp/Makefile | 34 ++++++
.../rand_cpp/adapter_functions.c | 6 +
.../rand_cpp/adapter_functions.h | 10 ++
contrib/filter-extensions/rand_cpp/rand.cpp | 103 ++++++++++++++++++
8 files changed, 288 insertions(+)
create mode 100644 contrib/filter-extensions/rand/.gitignore
create mode 100644 contrib/filter-extensions/rand/Makefile
create mode 100644 contrib/filter-extensions/rand/rand.c
create mode 100644 contrib/filter-extensions/rand_cpp/.gitignore
create mode 100644 contrib/filter-extensions/rand_cpp/Makefile
create mode 100644 contrib/filter-extensions/rand_cpp/adapter_functions.c
create mode 100644 contrib/filter-extensions/rand_cpp/adapter_functions.h
create mode 100644 contrib/filter-extensions/rand_cpp/rand.cpp
diff --git a/contrib/filter-extensions/rand/.gitignore b/contrib/filter-extensions/rand/.gitignore
new file mode 100644
index 00000000000..9eca6c88cf2
--- /dev/null
+++ b/contrib/filter-extensions/rand/.gitignore
@@ -0,0 +1,2 @@
+*.a
+*.o
diff --git a/contrib/filter-extensions/rand/Makefile b/contrib/filter-extensions/rand/Makefile
new file mode 100644
index 00000000000..267221ee952
--- /dev/null
+++ b/contrib/filter-extensions/rand/Makefile
@@ -0,0 +1,28 @@
+# Run this via `FILTER_EXTENSIONS=contrib/filter-extensions/rand/rand.a make`
+# from the main git directory. That way we inherit useful variables.
+
+ifneq ($(findstring s,$(MAKEFLAGS)),s)
+ifndef V
+ QUIET_CC = @echo ' ' CC $@;
+ QUIET_AR = @echo ' ' AR $@;
+endif
+endif
+
+FILTER_STATIC_LIB = rand.a
+
+all: $(FILTER_STATIC_LIB)
+ifeq ($(MAKELEVEL),0)
+ $(error "Run via parent git make")
+endif
+ @:
+
+$(FILTER_STATIC_LIB): rand.o
+ $(QUIET_AR)$(AR) $(ARFLAGS) $@ $^
+
+rand.o: rand.c
+ $(QUIET_CC)$(CC) -c $(ALL_CFLAGS) $<
+
+clean:
+ $(RM) $(FILTER_STATIC_LIB) rand.o
+
+.PHONY: all clean
diff --git a/contrib/filter-extensions/rand/rand.c b/contrib/filter-extensions/rand/rand.c
new file mode 100644
index 00000000000..af153709345
--- /dev/null
+++ b/contrib/filter-extensions/rand/rand.c
@@ -0,0 +1,103 @@
+#include "../../../git-compat-util.h"
+#include "../../../list-objects-filter-extensions.h"
+#include "../../../object.h"
+#include "../../../hash.h"
+#include "../../../trace.h"
+
+
+static struct trace_key trace_filter = TRACE_KEY_INIT(FILTER);
+
+struct rand_context {
+ int percentageMatch;
+ int matchCount;
+ int blobCount;
+ int treeCount;
+ uint64_t started_at;
+};
+
+static int rand_init(
+ const struct repository *r,
+ const char *filter_arg,
+ void **context)
+{
+ struct rand_context *ctx = calloc(1, sizeof(struct rand_context));
+
+ ctx->percentageMatch = atoi(filter_arg);
+ if (ctx->percentageMatch > 100 || ctx->percentageMatch < 0) {
+ fprintf(stderr, "filter-rand: warning: invalid match %%: %s\n",
+ filter_arg);
+ ctx->percentageMatch = 1; // default 1%
+ }
+ fprintf(stderr, "filter-rand: matching %d%%\n", ctx->percentageMatch);
+ ctx->started_at = getnanotime();
+ (*context) = ctx;
+
+ return 0;
+}
+
+static enum list_objects_filter_result rand_filter_object(
+ const struct repository *r,
+ const enum list_objects_filter_situation filter_situation,
+ struct object *obj,
+ const char *pathname,
+ const char *filename,
+ enum list_objects_filter_omit *omit,
+ void *context)
+{
+ struct rand_context *ctx = (struct rand_context*)(context);
+
+ if ((ctx->blobCount + ctx->treeCount + 1) % 100000 == 0) {
+ fprintf(stderr, "filter-rand: %d...\n",
+ (ctx->blobCount + ctx->treeCount + 1));
+ }
+
+ switch (filter_situation) {
+ default:
+ die("filter-rand: unknown filter_situation: %d", filter_situation);
+
+ case LOFS_BEGIN_TREE:
+ ctx->treeCount++;
+ /* always include all tree objects */
+ return LOFR_MARK_SEEN | LOFR_DO_SHOW;
+
+ case LOFS_END_TREE:
+ return LOFR_ZERO;
+
+ case LOFS_BLOB:
+ ctx->blobCount++;
+
+ if ((rand() % 100) < ctx->percentageMatch) {
+ ctx->matchCount++;
+ trace_printf_key(&trace_filter,
+ "match: %s %s\n",
+ oid_to_hex(&obj->oid),
+ pathname
+ );
+ return LOFR_MARK_SEEN | LOFR_DO_SHOW;
+ } else {
+ *omit = LOFO_OMIT;
+ return LOFR_MARK_SEEN; /* hard omit */
+ }
+ }
+}
+
+static void rand_free(const struct repository *r, void *context)
+{
+ struct rand_context *ctx = (struct rand_context*)(context);
+ double elapsed = (getnanotime() - ctx->started_at)/1E9;
+ int count = ctx->blobCount + ctx->treeCount;
+
+ fprintf(stderr, "filter-rand: done: count=%d (blob=%d tree=%d) "
+ "matched=%d elapsed=%fs rate=%0.1f/s average=%0.1fus\n",
+ count, ctx->blobCount, ctx->treeCount, ctx->matchCount,
+ elapsed, count/elapsed, elapsed/count*1E6);
+
+ free(ctx);
+}
+
+const struct filter_extension filter_extension_rand = {
+ "rand",
+ &rand_init,
+ &rand_filter_object,
+ &rand_free,
+};
diff --git a/contrib/filter-extensions/rand_cpp/.gitignore b/contrib/filter-extensions/rand_cpp/.gitignore
new file mode 100644
index 00000000000..9eca6c88cf2
--- /dev/null
+++ b/contrib/filter-extensions/rand_cpp/.gitignore
@@ -0,0 +1,2 @@
+*.a
+*.o
diff --git a/contrib/filter-extensions/rand_cpp/Makefile b/contrib/filter-extensions/rand_cpp/Makefile
new file mode 100644
index 00000000000..278121e3d5a
--- /dev/null
+++ b/contrib/filter-extensions/rand_cpp/Makefile
@@ -0,0 +1,34 @@
+# Run this via `FILTER_EXTENSIONS=contrib/filter-extensions/rand_cpp/rand_cpp.a make`
+# from the main git directory. That way we inherit useful variables.
+
+ifneq ($(findstring s,$(MAKEFLAGS)),s)
+ifndef V
+ QUIET_CC = @echo ' ' CC $@;
+ QUIET_CXX = @echo ' ' CXX $@;
+ QUIET_AR = @echo ' ' AR $@;
+endif
+endif
+
+FILTER_STATIC_LIB = rand_cpp.a
+
+ALL_CXXFLAGS += -std=c++11
+
+all: $(FILTER_STATIC_LIB)
+ifeq ($(MAKELEVEL),0)
+ $(error "Run via parent git make")
+endif
+ @:
+
+$(FILTER_STATIC_LIB): rand.o adapter_functions.o
+ $(QUIET_AR)$(AR) $(ARFLAGS) $@ $^
+
+rand.o: rand.cpp
+ $(QUIET_CXX)$(CXX) -c $(ALL_CFLAGS) $(ALL_CXXFLAGS) $<
+
+adapter_functions.o: adapter_functions.c
+ $(QUIET_CC)$(CC) -c $(ALL_CFLAGS) $<
+
+clean:
+ $(RM) $(FILTER_STATIC_LIB) rand.o
+
+.PHONY: all clean
diff --git a/contrib/filter-extensions/rand_cpp/adapter_functions.c b/contrib/filter-extensions/rand_cpp/adapter_functions.c
new file mode 100644
index 00000000000..0d9d2a2aa96
--- /dev/null
+++ b/contrib/filter-extensions/rand_cpp/adapter_functions.c
@@ -0,0 +1,6 @@
+#include "../../../git-compat-util.h"
+#include "../../../object.h"
+
+char *obj_to_hex_oid(struct object *obj) {
+ return oid_to_hex(&obj->oid);
+}
diff --git a/contrib/filter-extensions/rand_cpp/adapter_functions.h b/contrib/filter-extensions/rand_cpp/adapter_functions.h
new file mode 100644
index 00000000000..1150c21a258
--- /dev/null
+++ b/contrib/filter-extensions/rand_cpp/adapter_functions.h
@@ -0,0 +1,10 @@
+#ifndef RAND_CPP_ADAPTER_FUNCTIONS_H
+#define RAND_CPP_ADAPTER_FUNCTIONS_H
+
+struct object;
+
+uint64_t getnanotime(void);
+
+char *obj_to_hex_oid(struct object *obj);
+
+#endif /* RAND_CPP_ADAPTER_FUNCTIONS_H */
diff --git a/contrib/filter-extensions/rand_cpp/rand.cpp b/contrib/filter-extensions/rand_cpp/rand.cpp
new file mode 100644
index 00000000000..cb608d14ed9
--- /dev/null
+++ b/contrib/filter-extensions/rand_cpp/rand.cpp
@@ -0,0 +1,103 @@
+#include <iomanip>
+#include <iostream>
+#include <sstream>
+
+#include <time.h>
+
+extern "C" {
+ #include "../../../list-objects-filter-extensions.h"
+ #include "adapter_functions.h"
+}
+
+namespace {
+
+struct rand_context {
+ int percentageMatch = 0;
+ int matchCount = 0;
+ int blobCount = 0;
+ int treeCount = 0;
+ uint64_t started_at = 0;
+};
+
+static int rand_init(
+ const struct repository *r,
+ const char *filter_arg,
+ void **context)
+{
+ struct rand_context *ctx = new rand_context();
+
+ ctx->percentageMatch = atoi(filter_arg);
+ if (ctx->percentageMatch > 100 || ctx->percentageMatch < 0) {
+ std::cerr << "filter-rand-cpp: warning: invalid match %: " << filter_arg << "\n";
+ ctx->percentageMatch = 1; // default 1%
+ }
+ std::cerr << "filter-rand-cpp: matching " << ctx->percentageMatch << "%\n";
+ ctx->started_at = getnanotime();
+
+ return 0;
+}
+
+enum list_objects_filter_result rand_filter_object(
+ const struct repository *r,
+ const enum list_objects_filter_situation filter_situation,
+ struct object *obj,
+ const char *pathname,
+ const char *filename,
+ enum list_objects_filter_omit *omit,
+ void *context)
+{
+ struct rand_context *ctx = static_cast<struct rand_context*>(context);
+
+ if ((ctx->blobCount + ctx->treeCount + 1) % 100000 == 0) {
+ std::cerr << "filter-rand-cpp: " << (ctx->blobCount + ctx->treeCount + 1) << "...\n";
+ }
+ switch (filter_situation) {
+ default:
+ std::cerr << "filter-rand-cpp: unknown filter_situation: " << filter_situation << "\n";
+ abort();
+
+ case LOFS_BEGIN_TREE:
+ ctx->treeCount++;
+ /* always include all tree objects */
+ return static_cast<list_objects_filter_result>(LOFR_MARK_SEEN | LOFR_DO_SHOW);
+
+ case LOFS_END_TREE:
+ return LOFR_ZERO;
+
+ case LOFS_BLOB:
+ ctx->blobCount++;
+
+ if ((rand() % 100) < ctx->percentageMatch) {
+ ctx->matchCount++;
+ std::cout << "match: " << obj_to_hex_oid(obj) << pathname << "\n";
+ return static_cast<list_objects_filter_result>(LOFR_MARK_SEEN | LOFR_DO_SHOW);
+ } else {
+ *omit = LOFO_OMIT;
+ return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
+ }
+ }
+}
+
+void rand_free(const struct repository *r, void *context) {
+ struct rand_context *ctx = static_cast<struct rand_context*>(context);
+ double elapsed = (getnanotime() - ctx->started_at)/1E9;
+ int count = ctx->blobCount + ctx->treeCount;
+
+ std::cerr << "filter-rand-cpp: done: count=" << count
+ << " (blob=" << ctx->blobCount << " tree=" << ctx->treeCount << ")"
+ << " matched=" << ctx->matchCount
+ << " elapsed=" << elapsed << "s"
+ << " rate=" << count/elapsed << "/s"
+ << " average=" << elapsed/count*1E6 << "us\n";
+
+ delete ctx;
+}
+
+} // namespace
+
+extern const struct filter_extension filter_extension_rand_cpp = {
+ "rand_cpp",
+ &rand_init,
+ &rand_filter_object,
+ &rand_free,
+};
--
gitgitgadget
next prev parent reply other threads:[~2021-09-05 23:52 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-09-05 23:51 [PATCH 0/4] Compile-time extensions for list-object-filter Andrew Olsen via GitGitGadget
2021-09-05 23:51 ` [PATCH 1/4] " Andrew Olsen via GitGitGadget
2021-09-05 23:51 ` [PATCH 2/4] Makefile for list-object-filter extensions Andrew Olsen via GitGitGadget
2021-09-06 6:15 ` Bagas Sanjaya
2021-09-05 23:51 ` Andrew Olsen via GitGitGadget [this message]
2021-09-05 23:51 ` [PATCH 4/4] Documentation " Andrew Olsen via GitGitGadget
2021-09-06 0:49 ` [PATCH 0/4] Compile-time extensions for list-object-filter Ævar Arnfjörð Bjarmason
2021-09-06 6:18 ` Bagas Sanjaya
2021-09-07 0:37 ` Andrew Olsen
2021-09-07 8:59 ` Ævar Arnfjörð Bjarmason
2021-09-08 14:23 ` Robert Coup
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=8e0d11ea53a080e8212768f370fb8f05eaded312.1630885899.git.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail.com \
--cc=andrew.olsen@koordinates.com \
--cc=andrew232@gmail.com \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).