diff --git a/core/engine.py b/core/engine.py index ca05a61f..934152fb 100644 --- a/core/engine.py +++ b/core/engine.py @@ -304,12 +304,12 @@ def getmatches_by_contents(files, bigsize=0, j=job.nulljob): result.append(Match(first, second, 100)) continue # if digests are the same (and not None) then files match - if first.digest_partial == second.digest_partial and first.digest_partial is not None: + if first.digest_partial is not None and first.digest_partial == second.digest_partial: if bigsize > 0 and first.size > bigsize: - if first.digest_samples == second.digest_samples and first.digest_samples is not None: + if first.digest_samples is not None and first.digest_samples == second.digest_samples: result.append(Match(first, second, 100)) else: - if first.digest == second.digest and first.digest is not None: + if first.digest is not None and first.digest == second.digest: result.append(Match(first, second, 100)) group_count += 1 j.add_progress(desc=PROGRESS_MESSAGE % (len(result), group_count)) diff --git a/core/fs.py b/core/fs.py index 11a29a20..2604caff 100644 --- a/core/fs.py +++ b/core/fs.py @@ -199,7 +199,7 @@ class File: # Slots for File make us save quite a bit of memory. In a memory test I've made with a lot of # files, I saved 35% memory usage with "unread" files (no _read_info() call) and gains become # even greater when we take into account read attributes (70%!). Yeah, it's worth it. - __slots__ = ("path", "is_ref", "words") + tuple(INITIAL_INFO.keys()) + __slots__ = ("path", "unicode_path", "is_ref", "words") + tuple(INITIAL_INFO.keys()) def __init__(self, path): for attrname in self.INITIAL_INFO: @@ -210,6 +210,8 @@ class File: self.mtime = nonone(path.stat().st_mtime, 0) else: self.path = path + if self.path: + self.unicode_path = str(self.path) def __repr__(self): return f"<{self.__class__.__name__} {str(self.path)}>" diff --git a/core/pe/cache_sqlite.py b/core/pe/cache_sqlite.py index 4cb3c588..5aaf2940 100644 --- a/core/pe/cache_sqlite.py +++ b/core/pe/cache_sqlite.py @@ -15,10 +15,10 @@ from core.pe.cache import bytes_to_colors, colors_to_bytes class SqliteCache: """A class to cache picture blocks in a sqlite backend.""" - schema_version = 1 - schema_version_description = "Changed from string to bytes for blocks." + schema_version = 2 + schema_version_description = "Added blocks for all 8 orientations." - create_table_query = "CREATE TABLE IF NOT EXISTS pictures(path TEXT, mtime_ns INTEGER, blocks BLOB)" + create_table_query = "CREATE TABLE IF NOT EXISTS pictures(path TEXT, mtime_ns INTEGER, blocks BLOB, blocks2 BLOB, blocks3 BLOB, blocks4 BLOB, blocks5 BLOB, blocks6 BLOB, blocks7 BLOB, blocks8 BLOB)" create_index_query = "CREATE INDEX IF NOT EXISTS idx_path on pictures (path)" drop_table_query = "DROP TABLE IF EXISTS pictures" drop_index_query = "DROP INDEX IF EXISTS idx_path" @@ -43,12 +43,12 @@ class SqliteCache: # Optimized def __getitem__(self, key): if isinstance(key, int): - sql = "select blocks from pictures where rowid = ?" + sql = "select blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 from pictures where rowid = ?" else: - sql = "select blocks from pictures where path = ?" - result = self.con.execute(sql, [key]).fetchone() - if result: - result = bytes_to_colors(result[0]) + sql = "select blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 from pictures where path = ?" + blocks = self.con.execute(sql, [key]).fetchone() + if blocks: + result = [bytes_to_colors(block) for block in blocks] return result else: raise KeyError(key) @@ -64,17 +64,17 @@ class SqliteCache: return result[0][0] def __setitem__(self, path_str, blocks): - blocks = colors_to_bytes(blocks) + blocks = [colors_to_bytes(block) for block in blocks] if op.exists(path_str): mtime = int(os.stat(path_str).st_mtime) else: mtime = 0 if path_str in self: - sql = "update pictures set blocks = ?, mtime_ns = ? where path = ?" + sql = "update pictures set blocks = ?, blocks2 = ?, blocks3 = ?, blocks4 = ?, blocks5 = ?, blocks6 = ?, blocks7 = ?, blocks8 = ?, mtime_ns = ? where path = ?" else: - sql = "insert into pictures(blocks,mtime_ns,path) values(?,?,?)" + sql = "insert into pictures(blocks,blocks2,blocks3,blocks4,blocks5,blocks6,blocks7,blocks8,mtime_ns,path) values(?,?,?,?,?,?,?,?,?,?)" try: - self.con.execute(sql, [blocks, mtime, path_str]) + self.con.execute(sql, blocks + [mtime, path_str]) except sqlite.OperationalError: logging.warning("Picture cache could not set value for key %r", path_str) except sqlite.DatabaseError as e: @@ -136,9 +136,9 @@ class SqliteCache: raise ValueError(path) def get_multiple(self, rowids): - sql = "select rowid, blocks from pictures where rowid in (%s)" % ",".join(map(str, rowids)) + sql = "select rowid, blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 from pictures where rowid in (%s)" % ",".join(map(str, rowids)) cur = self.con.execute(sql) - return ((rowid, bytes_to_colors(blocks)) for rowid, blocks in cur) + return ((rowid, [bytes_to_colors(blocks), bytes_to_colors(blocks2), bytes_to_colors(blocks3), bytes_to_colors(blocks4), bytes_to_colors(blocks5), bytes_to_colors(blocks6), bytes_to_colors(blocks7), bytes_to_colors(blocks8)]) for rowid, blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 in cur) def purge_outdated(self): """Go through the cache and purge outdated records. diff --git a/core/pe/matchblock.py b/core/pe/matchblock.py index 9af739bd..e8b299b4 100644 --- a/core/pe/matchblock.py +++ b/core/pe/matchblock.py @@ -72,13 +72,12 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob): # entry in iPhoto library. logging.warning("We have a picture with a null path here") continue - picture.unicode_path = str(picture.path) logging.debug("Analyzing picture at %s", picture.unicode_path) if with_dimensions: picture.dimensions # pre-read dimensions try: if picture.unicode_path not in cache: - blocks = picture.get_blocks(BLOCK_COUNT_PER_SIDE) + blocks = [picture.get_blocks(BLOCK_COUNT_PER_SIDE, orientation) for orientation in range(1, 9)] cache[picture.unicode_path] = blocks prepared.append(picture) except (OSError, ValueError) as e: @@ -125,7 +124,7 @@ def async_compare(ref_ids, other_ids, dbname, threshold, picinfo): # picinfo is a dictionary {pic_id: (dimensions, is_ref)} cache = get_cache(dbname, readonly=True) limit = 100 - threshold - ref_pairs = list(cache.get_multiple(ref_ids)) + ref_pairs = list(cache.get_multiple(ref_ids)) # (rowid, [b, b2, ..., b8]) if other_ids is not None: other_pairs = list(cache.get_multiple(other_ids)) comparisons_to_do = [(r, o) for r in ref_pairs for o in other_pairs] @@ -137,15 +136,22 @@ def async_compare(ref_ids, other_ids, dbname, threshold, picinfo): other_dimensions, other_is_ref = picinfo[other_id] if ref_is_ref and other_is_ref: continue - if ref_dimensions != other_dimensions: + rotated_ref_dimensions = (ref_dimensions[1], ref_dimensions[0]) + if ref_dimensions != other_dimensions and rotated_ref_dimensions != other_dimensions: continue - try: - diff = avgdiff(ref_blocks, other_blocks, limit, MIN_ITERATIONS) - percentage = 100 - diff - except (DifferentBlockCountError, NoBlocksError): - percentage = 0 - if percentage >= threshold: - results.append((ref_id, other_id, percentage)) + for orientation_ref in range(8): + for orientation_other in range(8): + try: + diff = avgdiff(ref_blocks[orientation_ref], other_blocks[orientation_other], limit, MIN_ITERATIONS) + percentage = 100 - diff + except (DifferentBlockCountError, NoBlocksError): + percentage = 0 + if percentage >= threshold: + results.append((ref_id, other_id, percentage)) + break + else: + continue + break cache.close() return results @@ -153,7 +159,7 @@ def async_compare(ref_ids, other_ids, dbname, threshold, picinfo): def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljob): def get_picinfo(p): if match_scaled: - return (None, p.is_ref) + return ((None, None), p.is_ref) else: return (p.dimensions, p.is_ref) diff --git a/core/pe/photo.py b/core/pe/photo.py index 128e3c1f..5bc8356f 100644 --- a/core/pe/photo.py +++ b/core/pe/photo.py @@ -100,5 +100,8 @@ class Photo(fs.File): elif field == "exif_timestamp": self.exif_timestamp = self._get_exif_timestamp() - def get_blocks(self, block_count_per_side): - return self._plat_get_blocks(block_count_per_side, self._get_orientation()) + def get_blocks(self, block_count_per_side, orientation: int = None): + if orientation is None: + return self._plat_get_blocks(block_count_per_side, self._get_orientation()) + else: + return self._plat_get_blocks(block_count_per_side, orientation)