mirror of
https://github.com/arsenetar/dupeguru.git
synced 2024-10-29 21:05:57 +00:00
match all orientations (#1127)
* match all orientations * use rotation as option --------- Co-authored-by: Andrew Senetar <arsenetar@gmail.com> Co-authored-by: Luke <byunghun.hyun26@gmail.com>
This commit is contained in:
parent
70d956b4f8
commit
85a4557525
@ -304,12 +304,12 @@ def getmatches_by_contents(files, bigsize=0, j=job.nulljob):
|
||||
result.append(Match(first, second, 100))
|
||||
continue
|
||||
# if digests are the same (and not None) then files match
|
||||
if first.digest_partial == second.digest_partial and first.digest_partial is not None:
|
||||
if first.digest_partial is not None and first.digest_partial == second.digest_partial:
|
||||
if bigsize > 0 and first.size > bigsize:
|
||||
if first.digest_samples == second.digest_samples and first.digest_samples is not None:
|
||||
if first.digest_samples is not None and first.digest_samples == second.digest_samples:
|
||||
result.append(Match(first, second, 100))
|
||||
else:
|
||||
if first.digest == second.digest and first.digest is not None:
|
||||
if first.digest is not None and first.digest == second.digest:
|
||||
result.append(Match(first, second, 100))
|
||||
group_count += 1
|
||||
j.add_progress(desc=PROGRESS_MESSAGE % (len(result), group_count))
|
||||
|
@ -206,7 +206,7 @@ class File:
|
||||
# Slots for File make us save quite a bit of memory. In a memory test I've made with a lot of
|
||||
# files, I saved 35% memory usage with "unread" files (no _read_info() call) and gains become
|
||||
# even greater when we take into account read attributes (70%!). Yeah, it's worth it.
|
||||
__slots__ = ("path", "is_ref", "words") + tuple(INITIAL_INFO.keys())
|
||||
__slots__ = ("path", "unicode_path", "is_ref", "words") + tuple(INITIAL_INFO.keys())
|
||||
|
||||
def __init__(self, path):
|
||||
for attrname in self.INITIAL_INFO:
|
||||
@ -217,6 +217,8 @@ class File:
|
||||
self.mtime = nonone(path.stat().st_mtime, 0)
|
||||
else:
|
||||
self.path = path
|
||||
if self.path:
|
||||
self.unicode_path = str(self.path)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<{self.__class__.__name__} {str(self.path)}>"
|
||||
|
@ -15,10 +15,10 @@ from core.pe.cache import bytes_to_colors, colors_to_bytes
|
||||
class SqliteCache:
|
||||
"""A class to cache picture blocks in a sqlite backend."""
|
||||
|
||||
schema_version = 1
|
||||
schema_version_description = "Changed from string to bytes for blocks."
|
||||
schema_version = 2
|
||||
schema_version_description = "Added blocks for all 8 orientations."
|
||||
|
||||
create_table_query = "CREATE TABLE IF NOT EXISTS pictures(path TEXT, mtime_ns INTEGER, blocks BLOB)"
|
||||
create_table_query = "CREATE TABLE IF NOT EXISTS pictures(path TEXT, mtime_ns INTEGER, blocks BLOB, blocks2 BLOB, blocks3 BLOB, blocks4 BLOB, blocks5 BLOB, blocks6 BLOB, blocks7 BLOB, blocks8 BLOB)"
|
||||
create_index_query = "CREATE INDEX IF NOT EXISTS idx_path on pictures (path)"
|
||||
drop_table_query = "DROP TABLE IF EXISTS pictures"
|
||||
drop_index_query = "DROP INDEX IF EXISTS idx_path"
|
||||
@ -43,12 +43,12 @@ class SqliteCache:
|
||||
# Optimized
|
||||
def __getitem__(self, key):
|
||||
if isinstance(key, int):
|
||||
sql = "select blocks from pictures where rowid = ?"
|
||||
sql = "select blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 from pictures where rowid = ?"
|
||||
else:
|
||||
sql = "select blocks from pictures where path = ?"
|
||||
result = self.con.execute(sql, [key]).fetchone()
|
||||
if result:
|
||||
result = bytes_to_colors(result[0])
|
||||
sql = "select blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 from pictures where path = ?"
|
||||
blocks = self.con.execute(sql, [key]).fetchone()
|
||||
if blocks:
|
||||
result = [bytes_to_colors(block) for block in blocks]
|
||||
return result
|
||||
else:
|
||||
raise KeyError(key)
|
||||
@ -64,17 +64,17 @@ class SqliteCache:
|
||||
return result[0][0]
|
||||
|
||||
def __setitem__(self, path_str, blocks):
|
||||
blocks = colors_to_bytes(blocks)
|
||||
blocks = [colors_to_bytes(block) for block in blocks]
|
||||
if op.exists(path_str):
|
||||
mtime = int(os.stat(path_str).st_mtime)
|
||||
else:
|
||||
mtime = 0
|
||||
if path_str in self:
|
||||
sql = "update pictures set blocks = ?, mtime_ns = ? where path = ?"
|
||||
sql = "update pictures set blocks = ?, blocks2 = ?, blocks3 = ?, blocks4 = ?, blocks5 = ?, blocks6 = ?, blocks7 = ?, blocks8 = ?, mtime_ns = ? where path = ?"
|
||||
else:
|
||||
sql = "insert into pictures(blocks,mtime_ns,path) values(?,?,?)"
|
||||
sql = "insert into pictures(blocks,blocks2,blocks3,blocks4,blocks5,blocks6,blocks7,blocks8,mtime_ns,path) values(?,?,?,?,?,?,?,?,?,?)"
|
||||
try:
|
||||
self.con.execute(sql, [blocks, mtime, path_str])
|
||||
self.con.execute(sql, blocks + [mtime, path_str])
|
||||
except sqlite.OperationalError:
|
||||
logging.warning("Picture cache could not set value for key %r", path_str)
|
||||
except sqlite.DatabaseError as e:
|
||||
@ -136,9 +136,9 @@ class SqliteCache:
|
||||
raise ValueError(path)
|
||||
|
||||
def get_multiple(self, rowids):
|
||||
sql = "select rowid, blocks from pictures where rowid in (%s)" % ",".join(map(str, rowids))
|
||||
sql = "select rowid, blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 from pictures where rowid in (%s)" % ",".join(map(str, rowids))
|
||||
cur = self.con.execute(sql)
|
||||
return ((rowid, bytes_to_colors(blocks)) for rowid, blocks in cur)
|
||||
return ((rowid, [bytes_to_colors(blocks), bytes_to_colors(blocks2), bytes_to_colors(blocks3), bytes_to_colors(blocks4), bytes_to_colors(blocks5), bytes_to_colors(blocks6), bytes_to_colors(blocks7), bytes_to_colors(blocks8)]) for rowid, blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 in cur)
|
||||
|
||||
def purge_outdated(self):
|
||||
"""Go through the cache and purge outdated records.
|
||||
|
@ -72,13 +72,12 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
|
||||
# entry in iPhoto library.
|
||||
logging.warning("We have a picture with a null path here")
|
||||
continue
|
||||
picture.unicode_path = str(picture.path)
|
||||
logging.debug("Analyzing picture at %s", picture.unicode_path)
|
||||
if with_dimensions:
|
||||
picture.dimensions # pre-read dimensions
|
||||
try:
|
||||
if picture.unicode_path not in cache:
|
||||
blocks = picture.get_blocks(BLOCK_COUNT_PER_SIDE)
|
||||
blocks = [picture.get_blocks(BLOCK_COUNT_PER_SIDE, orientation) for orientation in range(1, 9)]
|
||||
cache[picture.unicode_path] = blocks
|
||||
prepared.append(picture)
|
||||
except (OSError, ValueError) as e:
|
||||
@ -119,13 +118,13 @@ def get_match(first, second, percentage):
|
||||
return Match(first, second, percentage)
|
||||
|
||||
|
||||
def async_compare(ref_ids, other_ids, dbname, threshold, picinfo):
|
||||
def async_compare(ref_ids, other_ids, dbname, threshold, picinfo, match_rotated=False):
|
||||
# The list of ids in ref_ids have to be compared to the list of ids in other_ids. other_ids
|
||||
# can be None. In this case, ref_ids has to be compared with itself
|
||||
# picinfo is a dictionary {pic_id: (dimensions, is_ref)}
|
||||
cache = get_cache(dbname, readonly=True)
|
||||
limit = 100 - threshold
|
||||
ref_pairs = list(cache.get_multiple(ref_ids))
|
||||
ref_pairs = list(cache.get_multiple(ref_ids)) # (rowid, [b, b2, ..., b8])
|
||||
if other_ids is not None:
|
||||
other_pairs = list(cache.get_multiple(other_ids))
|
||||
comparisons_to_do = [(r, o) for r in ref_pairs for o in other_pairs]
|
||||
@ -138,22 +137,35 @@ def async_compare(ref_ids, other_ids, dbname, threshold, picinfo):
|
||||
if ref_is_ref and other_is_ref:
|
||||
continue
|
||||
if ref_dimensions != other_dimensions:
|
||||
continue
|
||||
try:
|
||||
diff = avgdiff(ref_blocks, other_blocks, limit, MIN_ITERATIONS)
|
||||
percentage = 100 - diff
|
||||
except (DifferentBlockCountError, NoBlocksError):
|
||||
percentage = 0
|
||||
if percentage >= threshold:
|
||||
results.append((ref_id, other_id, percentage))
|
||||
if match_rotated:
|
||||
rotated_ref_dimensions = (ref_dimensions[1], ref_dimensions[0])
|
||||
if rotated_ref_dimensions != other_dimensions:
|
||||
continue
|
||||
else:
|
||||
continue
|
||||
|
||||
orientation_range = 1
|
||||
if match_rotated:
|
||||
orientation_range = 8
|
||||
|
||||
for orientation_ref in range(orientation_range):
|
||||
try:
|
||||
diff = avgdiff(ref_blocks[orientation_ref], other_blocks[0], limit, MIN_ITERATIONS)
|
||||
percentage = 100 - diff
|
||||
except (DifferentBlockCountError, NoBlocksError):
|
||||
percentage = 0
|
||||
if percentage >= threshold:
|
||||
results.append((ref_id, other_id, percentage))
|
||||
break
|
||||
|
||||
cache.close()
|
||||
return results
|
||||
|
||||
|
||||
def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljob):
|
||||
def getmatches(pictures, cache_path, threshold, match_scaled=False, match_rotated=False, j=job.nulljob):
|
||||
def get_picinfo(p):
|
||||
if match_scaled:
|
||||
return (None, p.is_ref)
|
||||
return ((None, None), p.is_ref)
|
||||
else:
|
||||
return (p.dimensions, p.is_ref)
|
||||
|
||||
@ -205,7 +217,7 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
|
||||
picinfo.update({p.cache_id: get_picinfo(p) for p in other_chunk})
|
||||
else:
|
||||
other_ids = None
|
||||
args = (ref_ids, other_ids, cache_path, threshold, picinfo)
|
||||
args = (ref_ids, other_ids, cache_path, threshold, picinfo, match_rotated)
|
||||
async_results.append(pool.apply_async(async_compare, args))
|
||||
collect_results()
|
||||
collect_results(collect_all=True)
|
||||
|
@ -100,5 +100,8 @@ class Photo(fs.File):
|
||||
elif field == "exif_timestamp":
|
||||
self.exif_timestamp = self._get_exif_timestamp()
|
||||
|
||||
def get_blocks(self, block_count_per_side):
|
||||
return self._plat_get_blocks(block_count_per_side, self._get_orientation())
|
||||
def get_blocks(self, block_count_per_side, orientation: int = None):
|
||||
if orientation is None:
|
||||
return self._plat_get_blocks(block_count_per_side, self._get_orientation())
|
||||
else:
|
||||
return self._plat_get_blocks(block_count_per_side, orientation)
|
||||
|
@ -14,6 +14,7 @@ from core.pe import matchblock, matchexif
|
||||
class ScannerPE(Scanner):
|
||||
cache_path = None
|
||||
match_scaled = False
|
||||
match_rotated = False
|
||||
|
||||
@staticmethod
|
||||
def get_scan_options():
|
||||
@ -29,6 +30,7 @@ class ScannerPE(Scanner):
|
||||
cache_path=self.cache_path,
|
||||
threshold=self.min_match_percentage,
|
||||
match_scaled=self.match_scaled,
|
||||
match_rotated=self.match_rotated,
|
||||
j=j,
|
||||
)
|
||||
elif self.scan_type == ScanType.EXIFTIMESTAMP:
|
||||
|
@ -14,6 +14,10 @@ Preferences
|
||||
If you check this box, pictures of different dimensions will be allowed in the same
|
||||
duplicate group.
|
||||
|
||||
**Match pictures of different rotations:**
|
||||
If you check this box, pictures of different rotations will be allowed in the same
|
||||
duplicate group.
|
||||
|
||||
.. _filter-hardness:
|
||||
|
||||
**Filter Hardness:**
|
||||
|
@ -307,6 +307,10 @@ msgstr "Debug mode (restart required)"
|
||||
msgid "Match pictures of different dimensions"
|
||||
msgstr "Match pictures of different dimensions"
|
||||
|
||||
#: qt/pe/preferences_dialog.py:19 cocoa/en.lproj/Localizable.strings:0
|
||||
msgid "Match pictures of different rotations"
|
||||
msgstr "Match pictures of different rotations"
|
||||
|
||||
#: qt/preferences_dialog.py:43
|
||||
msgid "Filter Hardness:"
|
||||
msgstr "Filter Hardness:"
|
||||
|
@ -316,6 +316,10 @@ msgstr "Mode de depuración (se requiere reinicio)"
|
||||
msgid "Match pictures of different dimensions"
|
||||
msgstr "Coincidencia de imágenes de distintas dimensiones"
|
||||
|
||||
#: qt/pe/preferences_dialog.py:19 cocoa/en.lproj/Localizable.strings:0
|
||||
msgid "Match pictures of different rotations"
|
||||
msgstr "Coincidencia de imágenes de distintas rotaciones"
|
||||
|
||||
#: qt/preferences_dialog.py:43
|
||||
msgid "Filter Hardness:"
|
||||
msgstr "Dureza del Filtro:"
|
||||
|
@ -314,6 +314,10 @@ msgstr "Modo de Depuração (requer reinício)"
|
||||
msgid "Match pictures of different dimensions"
|
||||
msgstr "Coincidir fotos de dimensões diferentes"
|
||||
|
||||
#: qt/pe/preferences_dialog.py:19 cocoa/en.lproj/Localizable.strings:0
|
||||
msgid "Match pictures of different rotations"
|
||||
msgstr "Coincidir fotos de rotações diferentes"
|
||||
|
||||
#: qt/preferences_dialog.py:43
|
||||
msgid "Filter Hardness:"
|
||||
msgstr "Pressão do Filtro:"
|
||||
|
@ -192,6 +192,7 @@ class DupeGuru(QObject):
|
||||
scanned_tags.add("year")
|
||||
self.model.options["scanned_tags"] = scanned_tags
|
||||
self.model.options["match_scaled"] = self.prefs.match_scaled
|
||||
self.model.options["match_rotated"] = self.prefs.match_rotated
|
||||
self.model.options["include_exists_check"] = self.prefs.include_exists_check
|
||||
self.model.options["rehash_ignore_mtime"] = self.prefs.rehash_ignore_mtime
|
||||
|
||||
|
@ -21,6 +21,8 @@ class PreferencesDialog(PreferencesDialogBase):
|
||||
self.widgetsVLayout.addLayout(self.filterHardnessHLayout)
|
||||
self._setupAddCheckbox("matchScaledBox", tr("Match pictures of different dimensions"))
|
||||
self.widgetsVLayout.addWidget(self.matchScaledBox)
|
||||
self._setupAddCheckbox("matchRotatedBox", tr("Match pictures of different rotations"))
|
||||
self.widgetsVLayout.addWidget(self.matchRotatedBox)
|
||||
self._setupAddCheckbox("mixFileKindBox", tr("Can mix file kind"))
|
||||
self.widgetsVLayout.addWidget(self.mixFileKindBox)
|
||||
self._setupAddCheckbox("useRegexpBox", tr("Use regular expressions when filtering"))
|
||||
@ -57,6 +59,7 @@ show scrollbars to span the view around"
|
||||
|
||||
def _load(self, prefs, setchecked, section):
|
||||
setchecked(self.matchScaledBox, prefs.match_scaled)
|
||||
setchecked(self.matchRotatedBox, prefs.match_rotated)
|
||||
|
||||
# Update UI state based on selected scan type
|
||||
scan_type = prefs.get_scan_type(AppMode.PICTURE)
|
||||
@ -67,5 +70,6 @@ show scrollbars to span the view around"
|
||||
|
||||
def _save(self, prefs, ischecked):
|
||||
prefs.match_scaled = ischecked(self.matchScaledBox)
|
||||
prefs.match_rotated = ischecked(self.matchRotatedBox)
|
||||
prefs.details_dialog_override_theme_icons = ischecked(self.details_dialog_override_theme_icons)
|
||||
prefs.details_dialog_viewers_show_scrollbars = ischecked(self.details_dialog_viewers_show_scrollbars)
|
||||
|
@ -225,6 +225,7 @@ class Preferences(PreferencesBase):
|
||||
self.scan_tag_genre = get("ScanTagGenre", self.scan_tag_genre)
|
||||
self.scan_tag_year = get("ScanTagYear", self.scan_tag_year)
|
||||
self.match_scaled = get("MatchScaled", self.match_scaled)
|
||||
self.match_rotated = get("MatchRotated", self.match_rotated)
|
||||
|
||||
def reset(self):
|
||||
self.filter_hardness = 95
|
||||
@ -277,6 +278,7 @@ class Preferences(PreferencesBase):
|
||||
self.scan_tag_genre = False
|
||||
self.scan_tag_year = False
|
||||
self.match_scaled = False
|
||||
self.match_rotated = False
|
||||
|
||||
def _save_values(self, settings):
|
||||
set_ = self.set_value
|
||||
@ -330,6 +332,7 @@ class Preferences(PreferencesBase):
|
||||
set_("ScanTagGenre", self.scan_tag_genre)
|
||||
set_("ScanTagYear", self.scan_tag_year)
|
||||
set_("MatchScaled", self.match_scaled)
|
||||
set_("MatchRotated", self.match_rotated)
|
||||
|
||||
# scan_type is special because we save it immediately when we set it.
|
||||
def get_scan_type(self, app_mode):
|
||||
|
Loading…
Reference in New Issue
Block a user