1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2024-10-29 21:05:57 +00:00

fix: Photo matching fixes

- Correct bad query introduced in rotation matching
- Promote get_orientation from "private" on photo class
- Fix prepare_pictures to only generate the needed blocks, add check for missing blocks when rotation matchin is true
- Fix cache test inputs to match schema
This commit is contained in:
Andrew Senetar 2024-05-11 00:11:27 -07:00
parent 08154815d0
commit a81069be61
Signed by: arsenetar
GPG Key ID: C63300DCE48AB2F1
4 changed files with 28 additions and 22 deletions

View File

@ -158,7 +158,7 @@ class SqliteCache:
ids = ",".join(map(str, rowids)) ids = ",".join(map(str, rowids))
sql = ( sql = (
"select rowid, blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 " "select rowid, blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 "
f"from pictures where rowid in {ids}" f"from pictures where rowid in ({ids})"
) )
cur = self.con.execute(sql) cur = self.con.execute(sql)
return ( return (

View File

@ -54,7 +54,7 @@ def get_cache(cache_path, readonly=False):
return SqliteCache(cache_path, readonly=readonly) return SqliteCache(cache_path, readonly=readonly)
def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob): def prepare_pictures(pictures, cache_path, with_dimensions, match_rotated, j=job.nulljob):
# The MemoryError handlers in there use logging without first caring about whether or not # The MemoryError handlers in there use logging without first caring about whether or not
# there is enough memory left to carry on the operation because it is assumed that the # there is enough memory left to carry on the operation because it is assumed that the
# MemoryError happens when trying to read an image file, which is freed from memory by the # MemoryError happens when trying to read an image file, which is freed from memory by the
@ -76,8 +76,14 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
if with_dimensions: if with_dimensions:
picture.dimensions # pre-read dimensions picture.dimensions # pre-read dimensions
try: try:
if picture.unicode_path not in cache: if picture.unicode_path not in cache or (
blocks = [picture.get_blocks(BLOCK_COUNT_PER_SIDE, orientation) for orientation in range(1, 9)] match_rotated and any(block == [] for block in cache[picture.unicode_path])
):
if match_rotated:
blocks = [picture.get_blocks(BLOCK_COUNT_PER_SIDE, orientation) for orientation in range(1, 9)]
else:
blocks = [[]] * 8
blocks[max(picture.get_orientation() - 1, 0)] = picture.get_blocks(BLOCK_COUNT_PER_SIDE)
cache[picture.unicode_path] = blocks cache[picture.unicode_path] = blocks
prepared.append(picture) prepared.append(picture)
except (OSError, ValueError) as e: except (OSError, ValueError) as e:
@ -187,7 +193,7 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, match_rotate
j.set_progress(comparison_count, progress_msg) j.set_progress(comparison_count, progress_msg)
j = j.start_subjob([3, 7]) j = j.start_subjob([3, 7])
pictures = prepare_pictures(pictures, cache_path, with_dimensions=not match_scaled, j=j) pictures = prepare_pictures(pictures, cache_path, not match_scaled, match_rotated, j=j)
j = j.start_subjob([9, 1], tr("Preparing for matching")) j = j.start_subjob([9, 1], tr("Preparing for matching"))
cache = get_cache(cache_path) cache = get_cache(cache_path)
id2picture = {} id2picture = {}

View File

@ -37,7 +37,7 @@ class Photo(fs.File):
def _plat_get_blocks(self, block_count_per_side, orientation): def _plat_get_blocks(self, block_count_per_side, orientation):
raise NotImplementedError() raise NotImplementedError()
def _get_orientation(self): def get_orientation(self):
if not hasattr(self, "_cached_orientation"): if not hasattr(self, "_cached_orientation"):
try: try:
with self.path.open("rb") as fp: with self.path.open("rb") as fp:
@ -95,13 +95,13 @@ class Photo(fs.File):
fs.File._read_info(self, field) fs.File._read_info(self, field)
if field == "dimensions": if field == "dimensions":
self.dimensions = self._plat_get_dimensions() self.dimensions = self._plat_get_dimensions()
if self._get_orientation() in {5, 6, 7, 8}: if self.get_orientation() in {5, 6, 7, 8}:
self.dimensions = (self.dimensions[1], self.dimensions[0]) self.dimensions = (self.dimensions[1], self.dimensions[0])
elif field == "exif_timestamp": elif field == "exif_timestamp":
self.exif_timestamp = self._get_exif_timestamp() self.exif_timestamp = self._get_exif_timestamp()
def get_blocks(self, block_count_per_side, orientation: int = None): def get_blocks(self, block_count_per_side, orientation: int = None):
if orientation is None: if orientation is None:
return self._plat_get_blocks(block_count_per_side, self._get_orientation()) return self._plat_get_blocks(block_count_per_side, self.get_orientation())
else: else:
return self._plat_get_blocks(block_count_per_side, orientation) return self._plat_get_blocks(block_count_per_side, orientation)

View File

@ -59,13 +59,13 @@ class BaseTestCaseCache:
def test_set_then_retrieve_blocks(self): def test_set_then_retrieve_blocks(self):
c = self.get_cache() c = self.get_cache()
b = [(0, 0, 0), (1, 2, 3)] b = [[(0, 0, 0), (1, 2, 3)]] * 8
c["foo"] = b c["foo"] = b
eq_(b, c["foo"]) eq_(b, c["foo"])
def test_delitem(self): def test_delitem(self):
c = self.get_cache() c = self.get_cache()
c["foo"] = "" c["foo"] = [[]] * 8
del c["foo"] del c["foo"]
assert "foo" not in c assert "foo" not in c
with raises(KeyError): with raises(KeyError):
@ -74,16 +74,16 @@ class BaseTestCaseCache:
def test_persistance(self, tmpdir): def test_persistance(self, tmpdir):
DBNAME = tmpdir.join("hstest.db") DBNAME = tmpdir.join("hstest.db")
c = self.get_cache(str(DBNAME)) c = self.get_cache(str(DBNAME))
c["foo"] = [(1, 2, 3)] c["foo"] = [[(1, 2, 3)]] * 8
del c del c
c = self.get_cache(str(DBNAME)) c = self.get_cache(str(DBNAME))
eq_([(1, 2, 3)], c["foo"]) eq_([[(1, 2, 3)]] * 8, c["foo"])
def test_filter(self): def test_filter(self):
c = self.get_cache() c = self.get_cache()
c["foo"] = "" c["foo"] = [[]] * 8
c["bar"] = "" c["bar"] = [[]] * 8
c["baz"] = "" c["baz"] = [[]] * 8
c.filter(lambda p: p != "bar") # only 'bar' is removed c.filter(lambda p: p != "bar") # only 'bar' is removed
eq_(2, len(c)) eq_(2, len(c))
assert "foo" in c assert "foo" in c
@ -92,9 +92,9 @@ class BaseTestCaseCache:
def test_clear(self): def test_clear(self):
c = self.get_cache() c = self.get_cache()
c["foo"] = "" c["foo"] = [[]] * 8
c["bar"] = "" c["bar"] = [[]] * 8
c["baz"] = "" c["baz"] = [[]] * 8
c.clear() c.clear()
eq_(0, len(c)) eq_(0, len(c))
assert "foo" not in c assert "foo" not in c
@ -104,7 +104,7 @@ class BaseTestCaseCache:
def test_by_id(self): def test_by_id(self):
# it's possible to use the cache by referring to the files by their row_id # it's possible to use the cache by referring to the files by their row_id
c = self.get_cache() c = self.get_cache()
b = [(0, 0, 0), (1, 2, 3)] b = [[(0, 0, 0), (1, 2, 3)]] * 8
c["foo"] = b c["foo"] = b
foo_id = c.get_id("foo") foo_id = c.get_id("foo")
eq_(c[foo_id], b) eq_(c[foo_id], b)
@ -127,10 +127,10 @@ class TestCaseSqliteCache(BaseTestCaseCache):
fp.write("invalid sqlite content") fp.write("invalid sqlite content")
fp.close() fp.close()
c = self.get_cache(dbname) # should not raise a DatabaseError c = self.get_cache(dbname) # should not raise a DatabaseError
c["foo"] = [(1, 2, 3)] c["foo"] = [[(1, 2, 3)]] * 8
del c del c
c = self.get_cache(dbname) c = self.get_cache(dbname)
eq_(c["foo"], [(1, 2, 3)]) eq_(c["foo"], [[(1, 2, 3)]] * 8)
class TestCaseCacheSQLEscape: class TestCaseCacheSQLEscape:
@ -152,7 +152,7 @@ class TestCaseCacheSQLEscape:
def test_delitem(self): def test_delitem(self):
c = self.get_cache() c = self.get_cache()
c["foo'bar"] = [] c["foo'bar"] = [[]] * 8
try: try:
del c["foo'bar"] del c["foo'bar"]
except KeyError: except KeyError: