From: "Tzvetomir Stoyanov (VMware)" <tz.stoyanov@gmail.com>
To: rostedt@goodmis.org
Cc: linux-trace-devel@vger.kernel.org
Subject: [PATCH v3 14/20] trace-cmd library: Add logic for in-memory decompression
Date: Fri,  8 Oct 2021 07:22:08 +0300	[thread overview]
Message-ID: <20211008042214.977193-15-tz.stoyanov@gmail.com> (raw)
In-Reply-To: <20211008042214.977193-1-tz.stoyanov@gmail.com>
There are two approaches to read compressed trace data:
 - use a temporary file to decompress entire trace data before reading
 - use in-memory decompression of requested trace data chunk only
In-memory decompression seems to be more efficient, but selecting which
approach to use depends in the use case.
A compression chunk consists of multiple trace pages, that's why a small
cache with uncompressed chunks is implemented. The chunk stays in the
cache until there are pages which have reference to it.
Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
---
 lib/trace-cmd/trace-input.c | 110 ++++++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)
diff --git a/lib/trace-cmd/trace-input.c b/lib/trace-cmd/trace-input.c
index 91114483..5b9ff041 100644
--- a/lib/trace-cmd/trace-input.c
+++ b/lib/trace-cmd/trace-input.c
@@ -29,6 +29,9 @@
 
 #define COMMIT_MASK ((1 << 27) - 1)
 
+/* force uncompressing in memory */
+#define INMEMORY_DECOMPRESS
+
 /* for debugging read instead of mmap */
 static int force_read = 0;
 
@@ -1249,6 +1252,105 @@ static void free_page_map(struct page_map *page_map)
 	free(page_map);
 }
 
+#define CHUNK_CHECK_OFFSET(C, O)	((O) >= (C)->offset && (O) < ((C)->offset + (C)->size))
+static struct tracecmd_compress_chunk *get_zchunk(struct cpu_data *cpu, off64_t offset)
+{
+	struct cpu_zdata *cpuz = &cpu->compress;
+	int min, mid, max;
+
+	if (!cpuz->chunks)
+		return NULL;
+	if (offset > (cpuz->chunks[cpuz->count - 1].offset + cpuz->chunks[cpuz->count - 1].size))
+		return NULL;
+
+	/* check if the requested offset is in the last requested chunk or in the next chunk */
+	if (CHUNK_CHECK_OFFSET(cpuz->chunks + cpuz->last_chunk, offset))
+		return cpuz->chunks + cpuz->last_chunk;
+	cpuz->last_chunk++;
+	if (cpuz->last_chunk < cpuz->count &&
+	    CHUNK_CHECK_OFFSET(cpuz->chunks + cpuz->last_chunk, offset))
+		return cpuz->chunks + cpuz->last_chunk;
+
+	/* do a binary search to find the chunk holding the given offset */
+	min = 0;
+	max = cpuz->count - 1;
+	mid = (min + max)/2;
+	while (min <= max) {
+		if (offset < cpuz->chunks[mid].offset)
+			max = mid - 1;
+		else if (offset > (cpuz->chunks[mid].offset + cpuz->chunks[mid].size))
+			min = mid + 1;
+		else
+			break;
+		mid = (min + max)/2;
+	}
+	cpuz->last_chunk = mid;
+	return cpuz->chunks + mid;
+}
+
+static void free_zpage(struct cpu_data *cpu_data, void *map)
+{
+	struct zchunk_cache *cache;
+
+	list_for_each_entry(cache, &cpu_data->compress.cache, list) {
+		if (map <= cache->map && map > (cache->map + cache->chunk->size))
+			goto found;
+	}
+	return;
+
+found:
+	cache->ref--;
+	if (cache->ref)
+		return;
+	list_del(&cache->list);
+	free(cache->map);
+	free(cache);
+}
+
+static void *read_zpage(struct tracecmd_input *handle, int cpu, off64_t offset)
+{
+	struct cpu_data *cpu_data = &handle->cpu_data[cpu];
+	struct tracecmd_compress_chunk *chunk;
+	struct zchunk_cache *cache;
+	void *map = NULL;
+	int pindex;
+	int size;
+
+	/* Look in the cache of already loaded chunks */
+	list_for_each_entry(cache, &cpu_data->compress.cache, list) {
+		if (CHUNK_CHECK_OFFSET(cache->chunk, offset)) {
+			cache->ref++;
+			goto out;
+		}
+	}
+
+	chunk =  get_zchunk(cpu_data, offset);
+	if (!chunk)
+		return NULL;
+	size = handle->page_size > chunk->size ? handle->page_size : chunk->size;
+	map = malloc(size);
+	if (!map)
+		return NULL;
+	if (tracecmd_uncompress_chunk(handle->compress, chunk, map) < 0)
+		goto error;
+
+	cache = calloc(1, sizeof(struct zchunk_cache));
+	if (!cache)
+		goto error;
+	cache->ref = 1;
+	cache->chunk = chunk;
+	cache->map = map;
+	list_add(&cache->list, &cpu_data->compress.cache);
+
+	/* a chunk can hold multiple pages, get the requested one */
+out:
+	pindex = (offset - cache->chunk->offset) / handle->page_size;
+	return cache->map + (pindex * handle->page_size);
+error:
+	free(map);
+	return NULL;
+}
+
 static void *allocate_page_map(struct tracecmd_input *handle,
 			       struct page *page, int cpu, off64_t offset)
 {
@@ -1260,6 +1362,9 @@ static void *allocate_page_map(struct tracecmd_input *handle,
 	int ret;
 	int fd;
 
+	if (handle->cpu_compressed && handle->read_zpage)
+		return read_zpage(handle, cpu, offset);
+
 	if (handle->read_page) {
 		map = malloc(handle->page_size);
 		if (!map)
@@ -1402,6 +1507,8 @@ static void __free_page(struct tracecmd_input *handle, struct page *page)
 
 	if (handle->read_page)
 		free(page->map);
+	else if (handle->read_zpage)
+		free_zpage(cpu_data, page->map);
 	else
 		free_page_map(page->page_map);
 
@@ -3890,6 +3997,9 @@ struct tracecmd_input *tracecmd_alloc_fd(int fd, int flags)
 	/* By default, use usecs, unless told otherwise */
 	handle->flags |= TRACECMD_FL_IN_USECS;
 
+#ifdef INMEMORY_DECOMPRESS
+	handle->read_zpage = 1;
+#endif
 	if (do_read_check(handle, buf, 3))
 		goto failed_read;
 
-- 
2.31.1
next prev parent reply	other threads:[~2021-10-08  4:22 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-08  4:21 [PATCH v3 00/20] Trace file version 7 - compression Tzvetomir Stoyanov (VMware)
2021-10-08  4:21 ` [PATCH v3 01/20] trace-cmd library: Add support for compression algorithms Tzvetomir Stoyanov (VMware)
2021-10-08  4:21 ` [PATCH v3 02/20] trace-cmd library: Internal helpers for compressing data Tzvetomir Stoyanov (VMware)
2021-10-08  4:21 ` [PATCH v3 03/20] trace-cmd library: Internal helpers for uncompressing data Tzvetomir Stoyanov (VMware)
2021-10-08  4:21 ` [PATCH v3 04/20] trace-cmd library: Inherit compression algorithm from input file Tzvetomir Stoyanov (VMware)
2021-10-08  4:21 ` [PATCH v3 05/20] trace-cmd library: New API to configure compression on an output handler Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 06/20] trace-cmd library: Write compression header in the trace file Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 07/20] trace-cmd library: Compress part of " Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 08/20] trace-cmd library: Add local helper function for data compression Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 09/20] trace-cmd library: Compress the trace data Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 10/20] trace-cmd library: Decompress the options section, if it is compressed Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 11/20] trace-cmd library: Read compression header Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 12/20] trace-cmd library: Extend the input handler with trace data decompression context Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 13/20] trace-cmd library: Initialize CPU data decompression logic Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` Tzvetomir Stoyanov (VMware) [this message]
2021-10-08  4:22 ` [PATCH v3 15/20] trace-cmd library: Read compressed latency data Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 16/20] trace-cmd library: Decompress file sections on reading Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 17/20] trace-cmd library: Add zlib compression algorithm Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 18/20] trace-cmd list: Show supported compression algorithms Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 19/20] trace-cmd record: Add compression to the trace context Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 20/20] trace-cmd report: Add new parameter for trace file compression Tzvetomir Stoyanov (VMware)
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox
  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):
  git send-email \
    --in-reply-to=20211008042214.977193-15-tz.stoyanov@gmail.com \
    --to=tz.stoyanov@gmail.com \
    --cc=linux-trace-devel@vger.kernel.org \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY
  https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
  Be sure your reply has a Subject: header at the top and a blank line
  before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).