1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2026-01-22 14:41:39 +00:00

match all orientations (#1127)

* match all orientations

* use rotation as option

---------

Co-authored-by: Andrew Senetar <arsenetar@gmail.com>
Co-authored-by: Luke <byunghun.hyun26@gmail.com>
This commit is contained in:
Bruno Cabral
2024-02-19 07:19:33 -08:00
committed by GitHub
parent 70d956b4f8
commit 85a4557525
13 changed files with 78 additions and 35 deletions

View File

@@ -72,13 +72,12 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
# entry in iPhoto library.
logging.warning("We have a picture with a null path here")
continue
picture.unicode_path = str(picture.path)
logging.debug("Analyzing picture at %s", picture.unicode_path)
if with_dimensions:
picture.dimensions # pre-read dimensions
try:
if picture.unicode_path not in cache:
blocks = picture.get_blocks(BLOCK_COUNT_PER_SIDE)
blocks = [picture.get_blocks(BLOCK_COUNT_PER_SIDE, orientation) for orientation in range(1, 9)]
cache[picture.unicode_path] = blocks
prepared.append(picture)
except (OSError, ValueError) as e:
@@ -119,13 +118,13 @@ def get_match(first, second, percentage):
return Match(first, second, percentage)
def async_compare(ref_ids, other_ids, dbname, threshold, picinfo):
def async_compare(ref_ids, other_ids, dbname, threshold, picinfo, match_rotated=False):
# The list of ids in ref_ids have to be compared to the list of ids in other_ids. other_ids
# can be None. In this case, ref_ids has to be compared with itself
# picinfo is a dictionary {pic_id: (dimensions, is_ref)}
cache = get_cache(dbname, readonly=True)
limit = 100 - threshold
ref_pairs = list(cache.get_multiple(ref_ids))
ref_pairs = list(cache.get_multiple(ref_ids)) # (rowid, [b, b2, ..., b8])
if other_ids is not None:
other_pairs = list(cache.get_multiple(other_ids))
comparisons_to_do = [(r, o) for r in ref_pairs for o in other_pairs]
@@ -138,22 +137,35 @@ def async_compare(ref_ids, other_ids, dbname, threshold, picinfo):
if ref_is_ref and other_is_ref:
continue
if ref_dimensions != other_dimensions:
continue
try:
diff = avgdiff(ref_blocks, other_blocks, limit, MIN_ITERATIONS)
percentage = 100 - diff
except (DifferentBlockCountError, NoBlocksError):
percentage = 0
if percentage >= threshold:
results.append((ref_id, other_id, percentage))
if match_rotated:
rotated_ref_dimensions = (ref_dimensions[1], ref_dimensions[0])
if rotated_ref_dimensions != other_dimensions:
continue
else:
continue
orientation_range = 1
if match_rotated:
orientation_range = 8
for orientation_ref in range(orientation_range):
try:
diff = avgdiff(ref_blocks[orientation_ref], other_blocks[0], limit, MIN_ITERATIONS)
percentage = 100 - diff
except (DifferentBlockCountError, NoBlocksError):
percentage = 0
if percentage >= threshold:
results.append((ref_id, other_id, percentage))
break
cache.close()
return results
def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljob):
def getmatches(pictures, cache_path, threshold, match_scaled=False, match_rotated=False, j=job.nulljob):
def get_picinfo(p):
if match_scaled:
return (None, p.is_ref)
return ((None, None), p.is_ref)
else:
return (p.dimensions, p.is_ref)
@@ -205,7 +217,7 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
picinfo.update({p.cache_id: get_picinfo(p) for p in other_chunk})
else:
other_ids = None
args = (ref_ids, other_ids, cache_path, threshold, picinfo)
args = (ref_ids, other_ids, cache_path, threshold, picinfo, match_rotated)
async_results.append(pool.apply_async(async_compare, args))
collect_results()
collect_results(collect_all=True)