match all orientations

2025-09-11 17:58:17 +00:00 · 2023-05-13 18:03:13 -07:00 · 2023-05-13 18:03:13 -07:00 · b00f0bf4f1
commit b00f0bf4f1
parent 322d29a996
5 changed files with 43 additions and 32 deletions
--- a/core/engine.py
+++ b/core/engine.py
@ -304,12 +304,12 @@ def getmatches_by_contents(files, bigsize=0, j=job.nulljob):
                result.append(Match(first, second, 100))
                continue
            # if digests are the same (and not None) then files match
-            if first.digest_partial == second.digest_partial and first.digest_partial is not None:
+            if first.digest_partial is not None and first.digest_partial == second.digest_partial:
                if bigsize > 0 and first.size > bigsize:
-                    if first.digest_samples == second.digest_samples and first.digest_samples is not None:
+                    if first.digest_samples is not None and first.digest_samples == second.digest_samples:
                        result.append(Match(first, second, 100))
                else:
-                    if first.digest == second.digest and first.digest is not None:
+                    if first.digest is not None and first.digest == second.digest:
                        result.append(Match(first, second, 100))
        group_count += 1
        j.add_progress(desc=PROGRESS_MESSAGE % (len(result), group_count))
--- a/core/fs.py
+++ b/core/fs.py
@ -199,7 +199,7 @@ class File:
    # Slots for File make us save quite a bit of memory. In a memory test I've made with a lot of
    # files, I saved 35% memory usage with "unread" files (no _read_info() call) and gains become
    # even greater when we take into account read attributes (70%!). Yeah, it's worth it.
-    __slots__ = ("path", "is_ref", "words") + tuple(INITIAL_INFO.keys())
+    __slots__ = ("path", "unicode_path", "is_ref", "words") + tuple(INITIAL_INFO.keys())

    def __init__(self, path):
        for attrname in self.INITIAL_INFO:
@ -210,6 +210,8 @@ class File:
            self.mtime = nonone(path.stat().st_mtime, 0)
        else:
            self.path = path
+        if self.path:
+            self.unicode_path = str(self.path)

    def __repr__(self):
        return f"<{self.__class__.__name__} {str(self.path)}>"
--- a/core/pe/cache_sqlite.py
+++ b/core/pe/cache_sqlite.py
@ -15,10 +15,10 @@ from core.pe.cache import bytes_to_colors, colors_to_bytes
 class SqliteCache:
    """A class to cache picture blocks in a sqlite backend."""

-    schema_version = 1
-    schema_version_description = "Changed from string to bytes for blocks."
+    schema_version = 2
+    schema_version_description = "Added blocks for all 8 orientations."

-    create_table_query = "CREATE TABLE IF NOT EXISTS pictures(path TEXT, mtime_ns INTEGER, blocks BLOB)"
+    create_table_query = "CREATE TABLE IF NOT EXISTS pictures(path TEXT, mtime_ns INTEGER, blocks BLOB, blocks2 BLOB, blocks3 BLOB, blocks4 BLOB, blocks5 BLOB, blocks6 BLOB, blocks7 BLOB, blocks8 BLOB)"
    create_index_query = "CREATE INDEX IF NOT EXISTS idx_path on pictures (path)"
    drop_table_query = "DROP TABLE IF EXISTS pictures"
    drop_index_query = "DROP INDEX IF EXISTS idx_path"
@ -43,12 +43,12 @@ class SqliteCache:
    # Optimized
    def __getitem__(self, key):
        if isinstance(key, int):
-            sql = "select blocks from pictures where rowid = ?"
+            sql = "select blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 from pictures where rowid = ?"
        else:
-            sql = "select blocks from pictures where path = ?"
-        result = self.con.execute(sql, [key]).fetchone()
-        if result:
-            result = bytes_to_colors(result[0])
+            sql = "select blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 from pictures where path = ?"
+        blocks = self.con.execute(sql, [key]).fetchone()
+        if blocks:
+            result = [bytes_to_colors(block) for block in blocks]
            return result
        else:
            raise KeyError(key)
@ -64,17 +64,17 @@ class SqliteCache:
        return result[0][0]

    def __setitem__(self, path_str, blocks):
-        blocks = colors_to_bytes(blocks)
+        blocks = [colors_to_bytes(block) for block in blocks]
        if op.exists(path_str):
            mtime = int(os.stat(path_str).st_mtime)
        else:
            mtime = 0
        if path_str in self:
-            sql = "update pictures set blocks = ?, mtime_ns = ? where path = ?"
+            sql = "update pictures set blocks = ?, blocks2 = ?, blocks3 = ?, blocks4 = ?, blocks5 = ?, blocks6 = ?, blocks7 = ?, blocks8 = ?, mtime_ns = ? where path = ?"
        else:
-            sql = "insert into pictures(blocks,mtime_ns,path) values(?,?,?)"
+            sql = "insert into pictures(blocks,blocks2,blocks3,blocks4,blocks5,blocks6,blocks7,blocks8,mtime_ns,path) values(?,?,?,?,?,?,?,?,?,?)"
        try:
-            self.con.execute(sql, [blocks, mtime, path_str])
+            self.con.execute(sql, blocks + [mtime, path_str])
        except sqlite.OperationalError:
            logging.warning("Picture cache could not set value for key %r", path_str)
        except sqlite.DatabaseError as e:
@ -136,9 +136,9 @@ class SqliteCache:
            raise ValueError(path)

    def get_multiple(self, rowids):
-        sql = "select rowid, blocks from pictures where rowid in (%s)" % ",".join(map(str, rowids))
+        sql = "select rowid, blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 from pictures where rowid in (%s)" % ",".join(map(str, rowids))
        cur = self.con.execute(sql)
-        return ((rowid, bytes_to_colors(blocks)) for rowid, blocks in cur)
+        return ((rowid, [bytes_to_colors(blocks), bytes_to_colors(blocks2), bytes_to_colors(blocks3), bytes_to_colors(blocks4), bytes_to_colors(blocks5), bytes_to_colors(blocks6), bytes_to_colors(blocks7), bytes_to_colors(blocks8)]) for rowid, blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 in cur)

    def purge_outdated(self):
        """Go through the cache and purge outdated records.
--- a/core/pe/matchblock.py
+++ b/core/pe/matchblock.py
@ -72,13 +72,12 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
                # entry in iPhoto library.
                logging.warning("We have a picture with a null path here")
                continue
-            picture.unicode_path = str(picture.path)
            logging.debug("Analyzing picture at %s", picture.unicode_path)
            if with_dimensions:
                picture.dimensions  # pre-read dimensions
            try:
                if picture.unicode_path not in cache:
-                    blocks = picture.get_blocks(BLOCK_COUNT_PER_SIDE)
+                    blocks = [picture.get_blocks(BLOCK_COUNT_PER_SIDE, orientation) for orientation in range(1, 9)]
                    cache[picture.unicode_path] = blocks
                prepared.append(picture)
            except (OSError, ValueError) as e:
@ -125,7 +124,7 @@ def async_compare(ref_ids, other_ids, dbname, threshold, picinfo):
    # picinfo is a dictionary {pic_id: (dimensions, is_ref)}
    cache = get_cache(dbname, readonly=True)
    limit = 100 - threshold
-    ref_pairs = list(cache.get_multiple(ref_ids))
+    ref_pairs = list(cache.get_multiple(ref_ids))  # (rowid, [b, b2, ..., b8])
    if other_ids is not None:
        other_pairs = list(cache.get_multiple(other_ids))
        comparisons_to_do = [(r, o) for r in ref_pairs for o in other_pairs]
@ -137,15 +136,22 @@ def async_compare(ref_ids, other_ids, dbname, threshold, picinfo):
        other_dimensions, other_is_ref = picinfo[other_id]
        if ref_is_ref and other_is_ref:
            continue
-        if ref_dimensions != other_dimensions:
+        rotated_ref_dimensions = (ref_dimensions[1], ref_dimensions[0])
+        if ref_dimensions != other_dimensions and rotated_ref_dimensions != other_dimensions:
            continue
-        try:
-            diff = avgdiff(ref_blocks, other_blocks, limit, MIN_ITERATIONS)
-            percentage = 100 - diff
-        except (DifferentBlockCountError, NoBlocksError):
-            percentage = 0
-        if percentage >= threshold:
-            results.append((ref_id, other_id, percentage))
+        for orientation_ref in range(8):
+            for orientation_other in range(8):
+                try:
+                    diff = avgdiff(ref_blocks[orientation_ref], other_blocks[orientation_other], limit, MIN_ITERATIONS)
+                    percentage = 100 - diff
+                except (DifferentBlockCountError, NoBlocksError):
+                    percentage = 0
+                if percentage >= threshold:
+                    results.append((ref_id, other_id, percentage))
+                    break
+            else:
+                continue
+            break
    cache.close()
    return results

@ -153,7 +159,7 @@ def async_compare(ref_ids, other_ids, dbname, threshold, picinfo):
 def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljob):
    def get_picinfo(p):
        if match_scaled:
-            return (None, p.is_ref)
+            return ((None, None), p.is_ref)
        else:
            return (p.dimensions, p.is_ref)

--- a/core/pe/photo.py
+++ b/core/pe/photo.py
@ -100,5 +100,8 @@ class Photo(fs.File):
        elif field == "exif_timestamp":
            self.exif_timestamp = self._get_exif_timestamp()

-    def get_blocks(self, block_count_per_side):
-        return self._plat_get_blocks(block_count_per_side, self._get_orientation())
+    def get_blocks(self, block_count_per_side, orientation: int = None):
+        if orientation is None:
+            return self._plat_get_blocks(block_count_per_side, self._get_orientation())
+        else:
+            return self._plat_get_blocks(block_count_per_side, orientation)