All of lore.kernel.org
 help / color / mirror / Atom feed
* [bitbake-devel][PATCH 0/5] Add cleanup commands for hash equivalence
@ 2023-10-06 15:36 Joshua Watt
  2023-10-06 15:36 ` [bitbake-devel][PATCH 1/5] hashserv: Add remove API Joshua Watt
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Joshua Watt @ 2023-10-06 15:36 UTC (permalink / raw)
  To: bitbake-devel; +Cc: Joshua Watt

Adds two new APIs and corresponding `bitbake-hashclient` commands for
the hash equivalence server. 

The first command is `remove` (`bitbake-hashclient remove`) and it can
be used to remove database entries that match the specified criteria. It
can be useful to remove entries in the in database that are know to be
no longer valid.

The second command is `clean-unused` (`bitbake-hashclient clean-unused`)
which will remove any database entries that are older than the specified
age and are not referenced by a valid unihash.

[YOCTO #15064]

Joshua Watt (5):
  hashserv: Add remove API
  bitbake-hashclient: Add remove subcommand
  hashserv: Extend get_outhash API to optionally include unihash
  hashserv: Add API to clean unused entries
  bitbake-hashclient: Add clean-unused subcommand

 bitbake/bin/bitbake-hashclient | 22 ++++++++
 bitbake/lib/hashserv/client.py | 14 ++++-
 bitbake/lib/hashserv/server.py | 93 ++++++++++++++++++++++++++++------
 bitbake/lib/hashserv/tests.py  | 52 +++++++++++++++++++
 4 files changed, 163 insertions(+), 18 deletions(-)

-- 
2.34.1



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [bitbake-devel][PATCH 1/5] hashserv: Add remove API
  2023-10-06 15:36 [bitbake-devel][PATCH 0/5] Add cleanup commands for hash equivalence Joshua Watt
@ 2023-10-06 15:36 ` Joshua Watt
  2023-10-06 15:36 ` [bitbake-devel][PATCH 2/5] bitbake-hashclient: Add remove subcommand Joshua Watt
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Joshua Watt @ 2023-10-06 15:36 UTC (permalink / raw)
  To: bitbake-devel; +Cc: Joshua Watt

Adds a `remove` API to the client and server that can be used to remove
hash equivalence entries that match a particular critera

Signed-off-by: Joshua Watt <JPEWhacker@gmail.com>
---
 bitbake/lib/hashserv/client.py |  5 +++++
 bitbake/lib/hashserv/server.py | 28 ++++++++++++++++++++++++++++
 bitbake/lib/hashserv/tests.py  | 33 +++++++++++++++++++++++++++++++++
 3 files changed, 66 insertions(+)

diff --git a/bitbake/lib/hashserv/client.py b/bitbake/lib/hashserv/client.py
index b2aa1026ac9..7446e4c9f67 100644
--- a/bitbake/lib/hashserv/client.py
+++ b/bitbake/lib/hashserv/client.py
@@ -101,6 +101,10 @@ class AsyncClient(bb.asyncrpc.AsyncClient):
         await self._set_mode(self.MODE_NORMAL)
         return (await self.send_message({"backfill-wait": None}))["tasks"]
 
+    async def remove(self, where):
+        await self._set_mode(self.MODE_NORMAL)
+        return await self.send_message({"remove": {"where": where}})
+
 
 class Client(bb.asyncrpc.Client):
     def __init__(self):
@@ -115,6 +119,7 @@ class Client(bb.asyncrpc.Client):
             "get_stats",
             "reset_stats",
             "backfill_wait",
+            "remove",
         )
 
     def _get_async_client(self):
diff --git a/bitbake/lib/hashserv/server.py b/bitbake/lib/hashserv/server.py
index d40a2ab8f88..daf1ffacbb9 100644
--- a/bitbake/lib/hashserv/server.py
+++ b/bitbake/lib/hashserv/server.py
@@ -186,6 +186,7 @@ class ServerClient(bb.asyncrpc.AsyncServerConnection):
                 'report-equiv': self.handle_equivreport,
                 'reset-stats': self.handle_reset_stats,
                 'backfill-wait': self.handle_backfill_wait,
+                'remove': self.handle_remove,
             })
 
     def validate_proto_version(self):
@@ -499,6 +500,33 @@ class ServerClient(bb.asyncrpc.AsyncServerConnection):
         await self.backfill_queue.join()
         self.write_message(d)
 
+    async def handle_remove(self, request):
+        condition = request["where"]
+        if not isinstance(condition, dict):
+            raise TypeError("Bad condition type %s" % type(condition))
+
+        def do_remove(columns, table_name, cursor):
+            nonlocal condition
+            where = {}
+            for c in columns:
+                if c in condition and condition[c] is not None:
+                    where[c] = condition[c]
+
+            if where:
+                query = ('DELETE FROM %s WHERE ' % table_name) + ' AND '.join("%s=:%s" % (k, k) for k in where.keys())
+                cursor.execute(query, where)
+                return cursor.rowcount
+
+            return 0
+
+        count = 0
+        with closing(self.db.cursor()) as cursor:
+            count += do_remove(OUTHASH_TABLE_COLUMNS, "outhashes_v2", cursor)
+            count += do_remove(UNIHASH_TABLE_COLUMNS, "unihashes_v2", cursor)
+            self.db.commit()
+
+        self.write_message({"count": count})
+
     def query_equivalent(self, cursor, method, taskhash):
         # This is part of the inner loop and must be as fast as possible
         cursor.execute(
diff --git a/bitbake/lib/hashserv/tests.py b/bitbake/lib/hashserv/tests.py
index f6b85aed85a..a3e066406e3 100644
--- a/bitbake/lib/hashserv/tests.py
+++ b/bitbake/lib/hashserv/tests.py
@@ -84,6 +84,7 @@ class HashEquivalenceCommonTests(object):
 
         result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
         self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
+        return taskhash, outhash, unihash
 
     def test_create_equivalent(self):
         # Tests that a second reported task with the same outhash will be
@@ -125,6 +126,38 @@ class HashEquivalenceCommonTests(object):
 
         self.assertClientGetHash(self.client, taskhash, unihash)
 
+    def test_remove_taskhash(self):
+        taskhash, outhash, unihash = self.test_create_hash()
+        result = self.client.remove({"taskhash": taskhash})
+        self.assertGreater(result["count"], 0)
+        self.assertClientGetHash(self.client, taskhash, None)
+
+        result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash)
+        self.assertIsNone(result_outhash)
+
+    def test_remove_unihash(self):
+        taskhash, outhash, unihash = self.test_create_hash()
+        result = self.client.remove({"unihash": unihash})
+        self.assertGreater(result["count"], 0)
+        self.assertClientGetHash(self.client, taskhash, None)
+
+    def test_remove_outhash(self):
+        taskhash, outhash, unihash = self.test_create_hash()
+        result = self.client.remove({"outhash": outhash})
+        self.assertGreater(result["count"], 0)
+
+        result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash)
+        self.assertIsNone(result_outhash)
+
+    def test_remove_method(self):
+        taskhash, outhash, unihash = self.test_create_hash()
+        result = self.client.remove({"method": self.METHOD})
+        self.assertGreater(result["count"], 0)
+        self.assertClientGetHash(self.client, taskhash, None)
+
+        result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash)
+        self.assertIsNone(result_outhash)
+
     def test_huge_message(self):
         # Simple test that hashes can be created
         taskhash = 'c665584ee6817aa99edfc77a44dd853828279370'
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [bitbake-devel][PATCH 2/5] bitbake-hashclient: Add remove subcommand
  2023-10-06 15:36 [bitbake-devel][PATCH 0/5] Add cleanup commands for hash equivalence Joshua Watt
  2023-10-06 15:36 ` [bitbake-devel][PATCH 1/5] hashserv: Add remove API Joshua Watt
@ 2023-10-06 15:36 ` Joshua Watt
  2023-10-06 15:36 ` [bitbake-devel][PATCH 3/5] hashserv: Extend get_outhash API to optionally include unihash Joshua Watt
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Joshua Watt @ 2023-10-06 15:36 UTC (permalink / raw)
  To: bitbake-devel; +Cc: Joshua Watt

Adds a subcommand to invoke the remove API on the server


Signed-off-by: Joshua Watt <JPEWhacker@gmail.com>
---
 bitbake/bin/bitbake-hashclient | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/bitbake/bin/bitbake-hashclient b/bitbake/bin/bitbake-hashclient
index 494f17592ac..d09104336ab 100755
--- a/bitbake/bin/bitbake-hashclient
+++ b/bitbake/bin/bitbake-hashclient
@@ -113,6 +113,14 @@ def main():
                     with lock:
                         pbar.update()
 
+    def handle_remove(args, client):
+        where = {k: v for k, v in args.where}
+        if where:
+            result = client.remove(where)
+            print("Removed %d row(s)" % (result["count"]))
+        else:
+            print("No query specified")
+
     parser = argparse.ArgumentParser(description='Hash Equivalence Client')
     parser.add_argument('--address', default=DEFAULT_ADDRESS, help='Server address (default "%(default)s")')
     parser.add_argument('--log', default='WARNING', help='Set logging level')
@@ -137,6 +145,11 @@ def main():
                                help='Include string in outhash')
     stress_parser.set_defaults(func=handle_stress)
 
+    remove_parser = subparsers.add_parser('remove', help="Remove hash entries")
+    remove_parser.add_argument("--where", "-w", metavar="KEY VALUE", nargs=2, action="append", default=[],
+                               help="Remove entries from table where KEY == VALUE")
+    remove_parser.set_defaults(func=handle_remove)
+
     args = parser.parse_args()
 
     logger = logging.getLogger('hashserv')
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [bitbake-devel][PATCH 3/5] hashserv: Extend get_outhash API to optionally include unihash
  2023-10-06 15:36 [bitbake-devel][PATCH 0/5] Add cleanup commands for hash equivalence Joshua Watt
  2023-10-06 15:36 ` [bitbake-devel][PATCH 1/5] hashserv: Add remove API Joshua Watt
  2023-10-06 15:36 ` [bitbake-devel][PATCH 2/5] bitbake-hashclient: Add remove subcommand Joshua Watt
@ 2023-10-06 15:36 ` Joshua Watt
  2023-10-06 15:36 ` [bitbake-devel][PATCH 4/5] hashserv: Add API to clean unused entries Joshua Watt
  2023-10-06 15:36 ` [bitbake-devel][PATCH 5/5] bitbake-hashclient: Add clean-unused subcommand Joshua Watt
  4 siblings, 0 replies; 6+ messages in thread
From: Joshua Watt @ 2023-10-06 15:36 UTC (permalink / raw)
  To: bitbake-devel; +Cc: Joshua Watt

Extends the get_outhash API with a flag indicating whether to include
the unihash in the output. This is means that the query doesn't require
the unihash entry to be present to return a result

Signed-off-by: Joshua Watt <JPEWhacker@gmail.com>
---
 bitbake/lib/hashserv/client.py |  4 +--
 bitbake/lib/hashserv/server.py | 45 ++++++++++++++++++++++------------
 2 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/bitbake/lib/hashserv/client.py b/bitbake/lib/hashserv/client.py
index 7446e4c9f67..eeafeabda05 100644
--- a/bitbake/lib/hashserv/client.py
+++ b/bitbake/lib/hashserv/client.py
@@ -83,10 +83,10 @@ class AsyncClient(bb.asyncrpc.AsyncClient):
             {"get": {"taskhash": taskhash, "method": method, "all": all_properties}}
         )
 
-    async def get_outhash(self, method, outhash, taskhash):
+    async def get_outhash(self, method, outhash, taskhash, with_unihash=True):
         await self._set_mode(self.MODE_NORMAL)
         return await self.send_message(
-            {"get-outhash": {"outhash": outhash, "taskhash": taskhash, "method": method}}
+            {"get-outhash": {"outhash": outhash, "taskhash": taskhash, "method": method, "with_unihash": with_unihash}}
         )
 
     async def get_stats(self):
diff --git a/bitbake/lib/hashserv/server.py b/bitbake/lib/hashserv/server.py
index daf1ffacbb9..d52e1d46df5 100644
--- a/bitbake/lib/hashserv/server.py
+++ b/bitbake/lib/hashserv/server.py
@@ -270,27 +270,42 @@ class ServerClient(bb.asyncrpc.AsyncServerConnection):
         method = request['method']
         outhash = request['outhash']
         taskhash = request['taskhash']
+        with_unihash = request.get("with_unihash", True)
 
         with closing(self.db.cursor()) as cursor:
-            d = await self.get_outhash(cursor, method, outhash, taskhash)
+            d = await self.get_outhash(cursor, method, outhash, taskhash, with_unihash)
 
         self.write_message(d)
 
-    async def get_outhash(self, cursor, method, outhash, taskhash):
+    async def get_outhash(self, cursor, method, outhash, taskhash, with_unihash=True):
         d = None
-        cursor.execute(
-            '''
-            SELECT *, unihashes_v2.unihash AS unihash FROM outhashes_v2
-            INNER JOIN unihashes_v2 ON unihashes_v2.method=outhashes_v2.method AND unihashes_v2.taskhash=outhashes_v2.taskhash
-            WHERE outhashes_v2.method=:method AND outhashes_v2.outhash=:outhash
-            ORDER BY outhashes_v2.created ASC
-            LIMIT 1
-            ''',
-            {
-                'method': method,
-                'outhash': outhash,
-            }
-        )
+        if with_unihash:
+            cursor.execute(
+                '''
+                SELECT *, unihashes_v2.unihash AS unihash FROM outhashes_v2
+                INNER JOIN unihashes_v2 ON unihashes_v2.method=outhashes_v2.method AND unihashes_v2.taskhash=outhashes_v2.taskhash
+                WHERE outhashes_v2.method=:method AND outhashes_v2.outhash=:outhash
+                ORDER BY outhashes_v2.created ASC
+                LIMIT 1
+                ''',
+                {
+                    'method': method,
+                    'outhash': outhash,
+                }
+            )
+        else:
+            cursor.execute(
+                """
+                SELECT * FROM outhashes_v2
+                WHERE outhashes_v2.method=:method AND outhashes_v2.outhash=:outhash
+                ORDER BY outhashes_v2.created ASC
+                LIMIT 1
+                """,
+                {
+                    'method': method,
+                    'outhash': outhash,
+                }
+            )
         row = cursor.fetchone()
 
         if row is not None:
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [bitbake-devel][PATCH 4/5] hashserv: Add API to clean unused entries
  2023-10-06 15:36 [bitbake-devel][PATCH 0/5] Add cleanup commands for hash equivalence Joshua Watt
                   ` (2 preceding siblings ...)
  2023-10-06 15:36 ` [bitbake-devel][PATCH 3/5] hashserv: Extend get_outhash API to optionally include unihash Joshua Watt
@ 2023-10-06 15:36 ` Joshua Watt
  2023-10-06 15:36 ` [bitbake-devel][PATCH 5/5] bitbake-hashclient: Add clean-unused subcommand Joshua Watt
  4 siblings, 0 replies; 6+ messages in thread
From: Joshua Watt @ 2023-10-06 15:36 UTC (permalink / raw)
  To: bitbake-devel; +Cc: Joshua Watt

Adds an API to remove unused entries in the outhash database based on
age and if they are referenced by any unihash

Signed-off-by: Joshua Watt <JPEWhacker@gmail.com>
---
 bitbake/lib/hashserv/client.py |  5 +++++
 bitbake/lib/hashserv/server.py | 20 +++++++++++++++++++-
 bitbake/lib/hashserv/tests.py  | 19 +++++++++++++++++++
 3 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/bitbake/lib/hashserv/client.py b/bitbake/lib/hashserv/client.py
index eeafeabda05..d5c981864a2 100644
--- a/bitbake/lib/hashserv/client.py
+++ b/bitbake/lib/hashserv/client.py
@@ -105,6 +105,10 @@ class AsyncClient(bb.asyncrpc.AsyncClient):
         await self._set_mode(self.MODE_NORMAL)
         return await self.send_message({"remove": {"where": where}})
 
+    async def clean_unused(self, max_age):
+        await self._set_mode(self.MODE_NORMAL)
+        return await self.send_message({"clean_unused": {"max_age_seconds": max_age}})
+
 
 class Client(bb.asyncrpc.Client):
     def __init__(self):
@@ -120,6 +124,7 @@ class Client(bb.asyncrpc.Client):
             "reset_stats",
             "backfill_wait",
             "remove",
+            "clean_unused",
         )
 
     def _get_async_client(self):
diff --git a/bitbake/lib/hashserv/server.py b/bitbake/lib/hashserv/server.py
index d52e1d46df5..b2ca357b2b1 100644
--- a/bitbake/lib/hashserv/server.py
+++ b/bitbake/lib/hashserv/server.py
@@ -4,7 +4,7 @@
 #
 
 from contextlib import closing, contextmanager
-from datetime import datetime
+from datetime import datetime, timedelta
 import enum
 import asyncio
 import logging
@@ -187,6 +187,7 @@ class ServerClient(bb.asyncrpc.AsyncServerConnection):
                 'reset-stats': self.handle_reset_stats,
                 'backfill-wait': self.handle_backfill_wait,
                 'remove': self.handle_remove,
+                'clean_unused': self.handle_clean_unused,
             })
 
     def validate_proto_version(self):
@@ -542,6 +543,23 @@ class ServerClient(bb.asyncrpc.AsyncServerConnection):
 
         self.write_message({"count": count})
 
+    async def handle_clean_unused(self, request):
+        max_age = request["max_age_seconds"]
+        with closing(self.db.cursor()) as cursor:
+            cursor.execute(
+                """
+                DELETE FROM outhashes_v2 WHERE created<:oldest AND NOT EXISTS (
+                    SELECT unihashes_v2.id FROM unihashes_v2 WHERE unihashes_v2.method=outhashes_v2.method AND unihashes_v2.taskhash=outhashes_v2.taskhash LIMIT 1
+                )
+                """,
+                {
+                    "oldest": datetime.now() - timedelta(seconds=-max_age)
+                }
+            )
+            count = cursor.rowcount
+
+        self.write_message({"count": count})
+
     def query_equivalent(self, cursor, method, taskhash):
         # This is part of the inner loop and must be as fast as possible
         cursor.execute(
diff --git a/bitbake/lib/hashserv/tests.py b/bitbake/lib/hashserv/tests.py
index a3e066406e3..f343c586b5d 100644
--- a/bitbake/lib/hashserv/tests.py
+++ b/bitbake/lib/hashserv/tests.py
@@ -158,6 +158,25 @@ class HashEquivalenceCommonTests(object):
         result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash)
         self.assertIsNone(result_outhash)
 
+    def test_clean_unused(self):
+        taskhash, outhash, unihash = self.test_create_hash()
+
+        # Clean the database, which should not remove anything because all hashes an in-use
+        result = self.client.clean_unused(0)
+        self.assertEqual(result["count"], 0)
+        self.assertClientGetHash(self.client, taskhash, unihash)
+
+        # Remove the unihash. The row in the outhash table should still be present
+        self.client.remove({"unihash": unihash})
+        result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash, False)
+        self.assertIsNotNone(result_outhash)
+
+        # Now clean with no minimum age which will remove the outhash
+        result = self.client.clean_unused(0)
+        self.assertEqual(result["count"], 1)
+        result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash, False)
+        self.assertIsNone(result_outhash)
+
     def test_huge_message(self):
         # Simple test that hashes can be created
         taskhash = 'c665584ee6817aa99edfc77a44dd853828279370'
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [bitbake-devel][PATCH 5/5] bitbake-hashclient: Add clean-unused subcommand
  2023-10-06 15:36 [bitbake-devel][PATCH 0/5] Add cleanup commands for hash equivalence Joshua Watt
                   ` (3 preceding siblings ...)
  2023-10-06 15:36 ` [bitbake-devel][PATCH 4/5] hashserv: Add API to clean unused entries Joshua Watt
@ 2023-10-06 15:36 ` Joshua Watt
  4 siblings, 0 replies; 6+ messages in thread
From: Joshua Watt @ 2023-10-06 15:36 UTC (permalink / raw)
  To: bitbake-devel; +Cc: Joshua Watt

Adds a subcommand to clean unused outhash entries from the server based
on age

Signed-off-by: Joshua Watt <JPEWhacker@gmail.com>
---
 bitbake/bin/bitbake-hashclient | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/bitbake/bin/bitbake-hashclient b/bitbake/bin/bitbake-hashclient
index d09104336ab..3f265e8fa7b 100755
--- a/bitbake/bin/bitbake-hashclient
+++ b/bitbake/bin/bitbake-hashclient
@@ -121,6 +121,11 @@ def main():
         else:
             print("No query specified")
 
+    def handle_clean_unused(args, client):
+        result = client.clean_unused(args.max_age)
+        print("Removed %d rows" % (result["count"]))
+        return 0
+
     parser = argparse.ArgumentParser(description='Hash Equivalence Client')
     parser.add_argument('--address', default=DEFAULT_ADDRESS, help='Server address (default "%(default)s")')
     parser.add_argument('--log', default='WARNING', help='Set logging level')
@@ -150,6 +155,10 @@ def main():
                                help="Remove entries from table where KEY == VALUE")
     remove_parser.set_defaults(func=handle_remove)
 
+    clean_unused_parser = subparsers.add_parser('clean-unused', help="Remove unused database entries")
+    clean_unused_parser.add_argument("max_age", metavar="SECONDS", type=int, help="Remove unused entries older than SECONDS old")
+    clean_unused_parser.set_defaults(func=handle_clean_unused)
+
     args = parser.parse_args()
 
     logger = logging.getLogger('hashserv')
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-10-06 15:37 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-10-06 15:36 [bitbake-devel][PATCH 0/5] Add cleanup commands for hash equivalence Joshua Watt
2023-10-06 15:36 ` [bitbake-devel][PATCH 1/5] hashserv: Add remove API Joshua Watt
2023-10-06 15:36 ` [bitbake-devel][PATCH 2/5] bitbake-hashclient: Add remove subcommand Joshua Watt
2023-10-06 15:36 ` [bitbake-devel][PATCH 3/5] hashserv: Extend get_outhash API to optionally include unihash Joshua Watt
2023-10-06 15:36 ` [bitbake-devel][PATCH 4/5] hashserv: Add API to clean unused entries Joshua Watt
2023-10-06 15:36 ` [bitbake-devel][PATCH 5/5] bitbake-hashclient: Add clean-unused subcommand Joshua Watt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.