From a81069be61677958b273fad891f8ef097f715209 Mon Sep 17 00:00:00 2001 From: Andrew Senetar Date: Sat, 11 May 2024 00:11:27 -0700 Subject: [PATCH] fix: Photo matching fixes - Correct bad query introduced in rotation matching - Promote get_orientation from "private" on photo class - Fix prepare_pictures to only generate the needed blocks, add check for missing blocks when rotation matchin is true - Fix cache test inputs to match schema --- core/pe/cache_sqlite.py | 2 +- core/pe/matchblock.py | 14 ++++++++++---- core/pe/photo.py | 6 +++--- core/tests/cache_test.py | 28 ++++++++++++++-------------- 4 files changed, 28 insertions(+), 22 deletions(-) diff --git a/core/pe/cache_sqlite.py b/core/pe/cache_sqlite.py index 43ffac06..f7020e46 100644 --- a/core/pe/cache_sqlite.py +++ b/core/pe/cache_sqlite.py @@ -158,7 +158,7 @@ class SqliteCache: ids = ",".join(map(str, rowids)) sql = ( "select rowid, blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 " - f"from pictures where rowid in {ids}" + f"from pictures where rowid in ({ids})" ) cur = self.con.execute(sql) return ( diff --git a/core/pe/matchblock.py b/core/pe/matchblock.py index f312b2ec..0aa0d253 100644 --- a/core/pe/matchblock.py +++ b/core/pe/matchblock.py @@ -54,7 +54,7 @@ def get_cache(cache_path, readonly=False): return SqliteCache(cache_path, readonly=readonly) -def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob): +def prepare_pictures(pictures, cache_path, with_dimensions, match_rotated, j=job.nulljob): # The MemoryError handlers in there use logging without first caring about whether or not # there is enough memory left to carry on the operation because it is assumed that the # MemoryError happens when trying to read an image file, which is freed from memory by the @@ -76,8 +76,14 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob): if with_dimensions: picture.dimensions # pre-read dimensions try: - if picture.unicode_path not in cache: - blocks = [picture.get_blocks(BLOCK_COUNT_PER_SIDE, orientation) for orientation in range(1, 9)] + if picture.unicode_path not in cache or ( + match_rotated and any(block == [] for block in cache[picture.unicode_path]) + ): + if match_rotated: + blocks = [picture.get_blocks(BLOCK_COUNT_PER_SIDE, orientation) for orientation in range(1, 9)] + else: + blocks = [[]] * 8 + blocks[max(picture.get_orientation() - 1, 0)] = picture.get_blocks(BLOCK_COUNT_PER_SIDE) cache[picture.unicode_path] = blocks prepared.append(picture) except (OSError, ValueError) as e: @@ -187,7 +193,7 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, match_rotate j.set_progress(comparison_count, progress_msg) j = j.start_subjob([3, 7]) - pictures = prepare_pictures(pictures, cache_path, with_dimensions=not match_scaled, j=j) + pictures = prepare_pictures(pictures, cache_path, not match_scaled, match_rotated, j=j) j = j.start_subjob([9, 1], tr("Preparing for matching")) cache = get_cache(cache_path) id2picture = {} diff --git a/core/pe/photo.py b/core/pe/photo.py index 5bc8356f..06c6c245 100644 --- a/core/pe/photo.py +++ b/core/pe/photo.py @@ -37,7 +37,7 @@ class Photo(fs.File): def _plat_get_blocks(self, block_count_per_side, orientation): raise NotImplementedError() - def _get_orientation(self): + def get_orientation(self): if not hasattr(self, "_cached_orientation"): try: with self.path.open("rb") as fp: @@ -95,13 +95,13 @@ class Photo(fs.File): fs.File._read_info(self, field) if field == "dimensions": self.dimensions = self._plat_get_dimensions() - if self._get_orientation() in {5, 6, 7, 8}: + if self.get_orientation() in {5, 6, 7, 8}: self.dimensions = (self.dimensions[1], self.dimensions[0]) elif field == "exif_timestamp": self.exif_timestamp = self._get_exif_timestamp() def get_blocks(self, block_count_per_side, orientation: int = None): if orientation is None: - return self._plat_get_blocks(block_count_per_side, self._get_orientation()) + return self._plat_get_blocks(block_count_per_side, self.get_orientation()) else: return self._plat_get_blocks(block_count_per_side, orientation) diff --git a/core/tests/cache_test.py b/core/tests/cache_test.py index 8dbead64..6e9e0e88 100644 --- a/core/tests/cache_test.py +++ b/core/tests/cache_test.py @@ -59,13 +59,13 @@ class BaseTestCaseCache: def test_set_then_retrieve_blocks(self): c = self.get_cache() - b = [(0, 0, 0), (1, 2, 3)] + b = [[(0, 0, 0), (1, 2, 3)]] * 8 c["foo"] = b eq_(b, c["foo"]) def test_delitem(self): c = self.get_cache() - c["foo"] = "" + c["foo"] = [[]] * 8 del c["foo"] assert "foo" not in c with raises(KeyError): @@ -74,16 +74,16 @@ class BaseTestCaseCache: def test_persistance(self, tmpdir): DBNAME = tmpdir.join("hstest.db") c = self.get_cache(str(DBNAME)) - c["foo"] = [(1, 2, 3)] + c["foo"] = [[(1, 2, 3)]] * 8 del c c = self.get_cache(str(DBNAME)) - eq_([(1, 2, 3)], c["foo"]) + eq_([[(1, 2, 3)]] * 8, c["foo"]) def test_filter(self): c = self.get_cache() - c["foo"] = "" - c["bar"] = "" - c["baz"] = "" + c["foo"] = [[]] * 8 + c["bar"] = [[]] * 8 + c["baz"] = [[]] * 8 c.filter(lambda p: p != "bar") # only 'bar' is removed eq_(2, len(c)) assert "foo" in c @@ -92,9 +92,9 @@ class BaseTestCaseCache: def test_clear(self): c = self.get_cache() - c["foo"] = "" - c["bar"] = "" - c["baz"] = "" + c["foo"] = [[]] * 8 + c["bar"] = [[]] * 8 + c["baz"] = [[]] * 8 c.clear() eq_(0, len(c)) assert "foo" not in c @@ -104,7 +104,7 @@ class BaseTestCaseCache: def test_by_id(self): # it's possible to use the cache by referring to the files by their row_id c = self.get_cache() - b = [(0, 0, 0), (1, 2, 3)] + b = [[(0, 0, 0), (1, 2, 3)]] * 8 c["foo"] = b foo_id = c.get_id("foo") eq_(c[foo_id], b) @@ -127,10 +127,10 @@ class TestCaseSqliteCache(BaseTestCaseCache): fp.write("invalid sqlite content") fp.close() c = self.get_cache(dbname) # should not raise a DatabaseError - c["foo"] = [(1, 2, 3)] + c["foo"] = [[(1, 2, 3)]] * 8 del c c = self.get_cache(dbname) - eq_(c["foo"], [(1, 2, 3)]) + eq_(c["foo"], [[(1, 2, 3)]] * 8) class TestCaseCacheSQLEscape: @@ -152,7 +152,7 @@ class TestCaseCacheSQLEscape: def test_delitem(self): c = self.get_cache() - c["foo'bar"] = [] + c["foo'bar"] = [[]] * 8 try: del c["foo'bar"] except KeyError: