From e30a1354511b781fc603457685fac2adc18b4c6c Mon Sep 17 00:00:00 2001 From: Andrew Senetar Date: Thu, 5 Jan 2023 23:01:16 -0600 Subject: [PATCH] feat: Add additional scan time options - Add option to include file existence check at end of scan, speeds up end of scan operation time considerably, however if user has removed or moved files since starting a scan there could be later errors when interacting with results. Defaults to existing behavior of including the check, until it can be verified later dialogs and actions handle non-existent items better. - Add option to ignore differences in mtime when checking hash cache. Option is present in advanced tab of preferences. Closes #1022. - Regenerate pot files for translations --- core/app.py | 7 ++++++- core/fs.py | 12 +++++++++--- core/scanner.py | 4 +++- locale/core.pot | 40 ++++++++++++++++++++-------------------- locale/ui.pot | 14 ++++++++++++++ qt/app.py | 2 ++ qt/preferences.py | 6 ++++++ qt/preferences_dialog.py | 27 ++++++++++++++++++++++++++- 8 files changed, 86 insertions(+), 26 deletions(-) diff --git a/core/app.py b/core/app.py index 0d30680b..9a907aed 100644 --- a/core/app.py +++ b/core/app.py @@ -154,6 +154,8 @@ class DupeGuru(Broadcaster): "ignore_hardlink_matches": False, "copymove_dest_type": DestType.RELATIVE, "picture_cache_type": self.PICTURE_CACHE_TYPE, + "include_exists_check": True, + "rehash_ignore_mtime": False, } self.selected_dupes = [] self.details_panel = DetailsPanel(self) @@ -555,7 +557,9 @@ class DupeGuru(Broadcaster): # a workaround to make the damn thing work. exepath, args = match.groups() path, exename = op.split(exepath) - p = subprocess.Popen(exename + args, shell=True, cwd=path, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + p = subprocess.Popen( + exename + args, shell=True, cwd=path, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) output = p.stdout.read() logging.info("Custom command %s %s: %s", exename, args, output) else: @@ -792,6 +796,7 @@ class DupeGuru(Broadcaster): Scans folders selected in :attr:`directories` and put the results in :attr:`results` """ scanner = self.SCANNER_CLASS() + fs.filesdb.ignore_mtime = self.options["rehash_ignore_mtime"] is True if not self.directories.has_any_file(): self.view.show_message(tr("The selected directories contain no scannable file.")) return diff --git a/core/fs.py b/core/fs.py index cc812238..647a5cfd 100644 --- a/core/fs.py +++ b/core/fs.py @@ -100,11 +100,14 @@ class FilesDB: create_table_query = "CREATE TABLE IF NOT EXISTS files (path TEXT PRIMARY KEY, size INTEGER, mtime_ns INTEGER, entry_dt DATETIME, digest BLOB, digest_partial BLOB, digest_samples BLOB)" drop_table_query = "DROP TABLE IF EXISTS files;" select_query = "SELECT {key} FROM files WHERE path=:path AND size=:size and mtime_ns=:mtime_ns" + select_query_ignore_mtime = "SELECT {key} FROM files WHERE path=:path AND size=:size" insert_query = """ INSERT INTO files (path, size, mtime_ns, entry_dt, {key}) VALUES (:path, :size, :mtime_ns, datetime('now'), :value) ON CONFLICT(path) DO UPDATE SET size=:size, mtime_ns=:mtime_ns, entry_dt=datetime('now'), {key}=:value; """ + ignore_mtime = False + def __init__(self): self.conn = None self.cur = None @@ -146,9 +149,12 @@ class FilesDB: mtime_ns = stat.st_mtime_ns try: with self.lock: - self.cur.execute( - self.select_query.format(key=key), {"path": str(path), "size": size, "mtime_ns": mtime_ns} - ) + if self.ignore_mtime: + self.cur.execute(self.select_query_ignore_mtime.format(key=key), {"path": str(path), "size": size}) + else: + self.cur.execute( + self.select_query.format(key=key), {"path": str(path), "size": size, "mtime_ns": mtime_ns} + ) result = self.cur.fetchone() if result: diff --git a/core/scanner.py b/core/scanner.py index 639b1be9..72550497 100644 --- a/core/scanner.py +++ b/core/scanner.py @@ -171,7 +171,8 @@ class Scanner: matches = [m for m in matches if m.first.path not in toremove or m.second.path not in toremove] if not self.mix_file_kind: matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)] - matches = [m for m in matches if m.first.path.exists() and m.second.path.exists()] + if self.include_exists_check: + matches = [m for m in matches if m.first.path.exists() and m.second.path.exists()] matches = [m for m in matches if not (m.first.is_ref and m.second.is_ref)] if ignore_list: matches = [m for m in matches if not ignore_list.are_ignored(str(m.first.path), str(m.second.path))] @@ -212,3 +213,4 @@ class Scanner: large_size_threshold = 0 big_file_size_threshold = 0 word_weighting = False + include_exists_check = True diff --git a/locale/core.pot b/locale/core.pot index a643ee59..622ec726 100644 --- a/locale/core.pot +++ b/locale/core.pot @@ -36,83 +36,83 @@ msgstr "" msgid "Sending to Trash" msgstr "" -#: core\app.py:291 +#: core\app.py:293 msgid "A previous action is still hanging in there. You can't start a new one yet. Wait a few seconds, then try again." msgstr "" -#: core\app.py:302 +#: core\app.py:304 msgid "No duplicates found." msgstr "" -#: core\app.py:317 +#: core\app.py:319 msgid "All marked files were copied successfully." msgstr "" -#: core\app.py:319 +#: core\app.py:321 msgid "All marked files were moved successfully." msgstr "" -#: core\app.py:321 +#: core\app.py:323 msgid "All marked files were deleted successfully." msgstr "" -#: core\app.py:323 +#: core\app.py:325 msgid "All marked files were successfully sent to Trash." msgstr "" -#: core\app.py:328 +#: core\app.py:330 msgid "Could not load file: {}" msgstr "" -#: core\app.py:384 +#: core\app.py:386 msgid "'{}' already is in the list." msgstr "" -#: core\app.py:386 +#: core\app.py:388 msgid "'{}' does not exist." msgstr "" -#: core\app.py:394 +#: core\app.py:396 msgid "All selected %d matches are going to be ignored in all subsequent scans. Continue?" msgstr "" -#: core\app.py:471 +#: core\app.py:473 msgid "Select a directory to copy marked files to" msgstr "" -#: core\app.py:473 +#: core\app.py:475 msgid "Select a directory to move marked files to" msgstr "" -#: core\app.py:512 +#: core\app.py:514 msgid "Select a destination for your exported CSV" msgstr "" -#: core\app.py:518 core\app.py:773 core\app.py:783 +#: core\app.py:520 core\app.py:781 core\app.py:791 msgid "Couldn't write to file: {}" msgstr "" -#: core\app.py:541 +#: core\app.py:543 msgid "You have no custom command set up. Set it up in your preferences." msgstr "" -#: core\app.py:697 core\app.py:709 +#: core\app.py:705 core\app.py:717 msgid "You are about to remove %d files from results. Continue?" msgstr "" -#: core\app.py:745 +#: core\app.py:753 msgid "{} duplicate groups were changed by the re-prioritization." msgstr "" -#: core\app.py:792 +#: core\app.py:801 msgid "The selected directories contain no scannable file." msgstr "" -#: core\app.py:808 +#: core\app.py:817 msgid "Collecting files to scan" msgstr "" -#: core\app.py:858 +#: core\app.py:867 msgid "%s (%d discarded)" msgstr "" diff --git a/locale/ui.pot b/locale/ui.pot index 642c8d43..f820a13f 100644 --- a/locale/ui.pot +++ b/locale/ui.pot @@ -1092,3 +1092,17 @@ msgstr "" #: qt\search_edit.py:78 msgid "Search..." msgstr "" + +#: qt\preferences_dialog.py:219 +msgid "" +"These options are for advanced users or for very specific situations, most " +"users should not have to modify these." +msgstr "" + +#: qt\preferences_dialog.py:225 +msgid "Include existence check after scan completion" +msgstr "" + +#: qt\preferences_dialog.py:227 +msgid "Ignore difference in mtime when loading cached digests" +msgstr "" diff --git a/qt/app.py b/qt/app.py index 27ac6ed5..d0533743 100644 --- a/qt/app.py +++ b/qt/app.py @@ -193,6 +193,8 @@ class DupeGuru(QObject): self.model.options["scanned_tags"] = scanned_tags self.model.options["match_scaled"] = self.prefs.match_scaled self.model.options["picture_cache_type"] = self.prefs.picture_cache_type + self.model.options["include_exists_check"] = self.prefs.include_exists_check + self.model.options["rehash_ignore_mtime"] = self.prefs.rehash_ignore_mtime if self.details_dialog: self.details_dialog.update_options() diff --git a/qt/preferences.py b/qt/preferences.py index d0b84820..0c4cb651 100644 --- a/qt/preferences.py +++ b/qt/preferences.py @@ -161,6 +161,8 @@ class Preferences(PreferencesBase): self.ignore_hardlink_matches = get("IgnoreHardlinkMatches", self.ignore_hardlink_matches) self.use_regexp = get("UseRegexp", self.use_regexp) self.remove_empty_folders = get("RemoveEmptyFolders", self.remove_empty_folders) + self.rehash_ignore_mtime = get("RehashIgnoreMTime", self.rehash_ignore_mtime) + self.include_exists_check = get("IncludeExistsCheck", self.include_exists_check) self.debug_mode = get("DebugMode", self.debug_mode) self.profile_scan = get("ProfileScan", self.profile_scan) self.destination_type = get("DestinationType", self.destination_type) @@ -231,6 +233,8 @@ class Preferences(PreferencesBase): self.use_regexp = False self.ignore_hardlink_matches = False self.remove_empty_folders = False + self.rehash_ignore_mtime = False + self.include_exists_check = True self.debug_mode = False self.profile_scan = False self.destination_type = 1 @@ -283,6 +287,8 @@ class Preferences(PreferencesBase): set_("IgnoreHardlinkMatches", self.ignore_hardlink_matches) set_("UseRegexp", self.use_regexp) set_("RemoveEmptyFolders", self.remove_empty_folders) + set_("RehashIgnoreMTime", self.rehash_ignore_mtime) + set_("IncludeExistsCheck", self.include_exists_check) set_("DebugMode", self.debug_mode) set_("ProfileScan", self.profile_scan) set_("DestinationType", self.destination_type) diff --git a/qt/preferences_dialog.py b/qt/preferences_dialog.py index caa6eb00..0182a4ec 100644 --- a/qt/preferences_dialog.py +++ b/qt/preferences_dialog.py @@ -47,8 +47,9 @@ class Sections(Flag): GENERAL = auto() DISPLAY = auto() + ADVANCED = auto() DEBUG = auto() - ALL = GENERAL | DISPLAY | DEBUG + ALL = GENERAL | DISPLAY | ADVANCED | DEBUG class PreferencesDialogBase(QDialog): @@ -213,6 +214,19 @@ use the modifier key to drag the floating window around" details_groupbox.setLayout(self.details_groupbox_layout) self.displayVLayout.addWidget(details_groupbox) + def _setup_advanced_page(self): + tab_label = QLabel( + tr( + "These options are for advanced users or for very specific situations, most users should not have to modify these." + ), + wordWrap=True, + ) + self.advanced_vlayout.addWidget(tab_label) + self._setupAddCheckbox("include_exists_check_box", tr("Include existence check after scan completion")) + self.advanced_vlayout.addWidget(self.include_exists_check_box) + self._setupAddCheckbox("rehash_ignore_mtime_box", tr("Ignore difference in mtime when loading cached digests")) + self.advanced_vlayout.addWidget(self.rehash_ignore_mtime_box) + def _setupDebugPage(self): self._setupAddCheckbox("debugModeBox", tr("Debug mode (restart required)")) self._setupAddCheckbox("profile_scan_box", tr("Profile scan operation")) @@ -244,16 +258,20 @@ use the modifier key to drag the floating window around" self.tabwidget = QTabWidget() self.page_general = QWidget() self.page_display = QWidget() + self.page_advanced = QWidget() self.page_debug = QWidget() self.widgetsVLayout = QVBoxLayout() self.page_general.setLayout(self.widgetsVLayout) self.displayVLayout = QVBoxLayout() self.displayVLayout.setSpacing(5) # arbitrary value, might conflict with style self.page_display.setLayout(self.displayVLayout) + self.advanced_vlayout = QVBoxLayout() + self.page_advanced.setLayout(self.advanced_vlayout) self.debugVLayout = QVBoxLayout() self.page_debug.setLayout(self.debugVLayout) self._setupPreferenceWidgets() self._setupDisplayPage() + self._setup_advanced_page() self._setupDebugPage() # self.mainVLayout.addLayout(self.widgetsVLayout) self.buttonBox = QDialogButtonBox(self) @@ -265,9 +283,11 @@ use the modifier key to drag the floating window around" self.layout().setSizeConstraint(QLayout.SetFixedSize) self.tabwidget.addTab(self.page_general, tr("General")) self.tabwidget.addTab(self.page_display, tr("Display")) + self.tabwidget.addTab(self.page_advanced, tr("Advanced")) self.tabwidget.addTab(self.page_debug, tr("Debug")) self.displayVLayout.addStretch(0) self.widgetsVLayout.addStretch(0) + self.advanced_vlayout.addStretch(0) self.debugVLayout.addStretch(0) def _load(self, prefs, setchecked, section): @@ -318,6 +338,9 @@ use the modifier key to drag the floating window around" except KeyError: selected_lang = self.supportedLanguages["en"] self.languageComboBox.setCurrentText(selected_lang) + if section & Sections.ADVANCED: + setchecked(self.rehash_ignore_mtime_box, prefs.rehash_ignore_mtime) + setchecked(self.include_exists_check_box, prefs.include_exists_check) if section & Sections.DEBUG: setchecked(self.debugModeBox, prefs.debug_mode) setchecked(self.profile_scan_box, prefs.profile_scan) @@ -334,6 +357,8 @@ use the modifier key to drag the floating window around" prefs.use_regexp = ischecked(self.useRegexpBox) prefs.remove_empty_folders = ischecked(self.removeEmptyFoldersBox) prefs.ignore_hardlink_matches = ischecked(self.ignoreHardlinkMatches) + prefs.rehash_ignore_mtime = ischecked(self.rehash_ignore_mtime_box) + prefs.include_exists_check = ischecked(self.include_exists_check_box) prefs.debug_mode = ischecked(self.debugModeBox) prefs.profile_scan = ischecked(self.profile_scan_box) prefs.reference_bold_font = ischecked(self.reference_bold_font)