feat: Add additional scan time options

- Add option to include file existence check at end of scan, speeds up end of scan operation time considerably, however if user has removed or moved files since starting a scan there could be later errors when interacting with results. Defaults to existing behavior of including the check, until it can be verified later dialogs and actions handle non-existent items better. - Add option to ignore differences in mtime when checking hash cache. Option is present in advanced tab of preferences. Closes #1022. - Regenerate pot files for translations
2026-06-19 13:37:52 +00:00 · 2023-01-05 23:01:16 -06:00
parent 1db93fd142
commit e30a135451
8 changed files with 86 additions and 26 deletions
--- a/core/app.py
+++ b/core/app.py
@@ -154,6 +154,8 @@ class DupeGuru(Broadcaster):
            "ignore_hardlink_matches": False,
            "copymove_dest_type": DestType.RELATIVE,
            "picture_cache_type": self.PICTURE_CACHE_TYPE,
+            "include_exists_check": True,
+            "rehash_ignore_mtime": False,
        }
        self.selected_dupes = []
        self.details_panel = DetailsPanel(self)
@@ -555,7 +557,9 @@ class DupeGuru(Broadcaster):
                # a workaround to make the damn thing work.
                exepath, args = match.groups()
                path, exename = op.split(exepath)
-                p = subprocess.Popen(exename + args, shell=True, cwd=path, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+                p = subprocess.Popen(
+                    exename + args, shell=True, cwd=path, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
+                )
                output = p.stdout.read()
                logging.info("Custom command %s %s: %s", exename, args, output)
            else:
@@ -792,6 +796,7 @@ class DupeGuru(Broadcaster):
        Scans folders selected in :attr:`directories` and put the results in :attr:`results`
        """
        scanner = self.SCANNER_CLASS()
+        fs.filesdb.ignore_mtime = self.options["rehash_ignore_mtime"] is True
        if not self.directories.has_any_file():
            self.view.show_message(tr("The selected directories contain no scannable file."))
            return
--- a/core/fs.py
+++ b/core/fs.py
@@ -100,11 +100,14 @@ class FilesDB:
    create_table_query = "CREATE TABLE IF NOT EXISTS files (path TEXT PRIMARY KEY, size INTEGER, mtime_ns INTEGER, entry_dt DATETIME, digest BLOB, digest_partial BLOB, digest_samples BLOB)"
    drop_table_query = "DROP TABLE IF EXISTS files;"
    select_query = "SELECT {key} FROM files WHERE path=:path AND size=:size and mtime_ns=:mtime_ns"
+    select_query_ignore_mtime = "SELECT {key} FROM files WHERE path=:path AND size=:size"
    insert_query = """
        INSERT INTO files (path, size, mtime_ns, entry_dt, {key}) VALUES (:path, :size, :mtime_ns, datetime('now'), :value)
        ON CONFLICT(path) DO UPDATE SET size=:size, mtime_ns=:mtime_ns, entry_dt=datetime('now'), {key}=:value;
    """

+    ignore_mtime = False
+
    def __init__(self):
        self.conn = None
        self.cur = None
@@ -146,9 +149,12 @@ class FilesDB:
        mtime_ns = stat.st_mtime_ns
        try:
            with self.lock:
-                self.cur.execute(
-                    self.select_query.format(key=key), {"path": str(path), "size": size, "mtime_ns": mtime_ns}
-                )
+                if self.ignore_mtime:
+                    self.cur.execute(self.select_query_ignore_mtime.format(key=key), {"path": str(path), "size": size})
+                else:
+                    self.cur.execute(
+                        self.select_query.format(key=key), {"path": str(path), "size": size, "mtime_ns": mtime_ns}
+                    )
                result = self.cur.fetchone()

            if result:
--- a/core/scanner.py
+++ b/core/scanner.py
@@ -171,7 +171,8 @@ class Scanner:
            matches = [m for m in matches if m.first.path not in toremove or m.second.path not in toremove]
        if not self.mix_file_kind:
            matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)]
-        matches = [m for m in matches if m.first.path.exists() and m.second.path.exists()]
+        if self.include_exists_check:
+            matches = [m for m in matches if m.first.path.exists() and m.second.path.exists()]
        matches = [m for m in matches if not (m.first.is_ref and m.second.is_ref)]
        if ignore_list:
            matches = [m for m in matches if not ignore_list.are_ignored(str(m.first.path), str(m.second.path))]
@@ -212,3 +213,4 @@ class Scanner:
    large_size_threshold = 0
    big_file_size_threshold = 0
    word_weighting = False
+    include_exists_check = True