From 470307aa3c1b66bce1fd6551ef9337b43c01693a Mon Sep 17 00:00:00 2001 From: glubsy Date: Mon, 27 Jul 2020 00:12:46 +0200 Subject: [PATCH 01/21] Ignore path and filename based on regex * Added initial draft for test suit * Fixed small logging bug --- core/directories.py | 58 ++++++++++++++++++++++++++-------- core/fs.py | 11 +++++-- core/tests/directories_test.py | 53 ++++++++++++++++++++++++++++++- tox.ini | 2 +- 4 files changed, 106 insertions(+), 18 deletions(-) diff --git a/core/directories.py b/core/directories.py index 66ba8163..9a372166 100644 --- a/core/directories.py +++ b/core/directories.py @@ -5,6 +5,7 @@ # http://www.gnu.org/licenses/gpl-3.0.html import os +import re from xml.etree import ElementTree as ET import logging @@ -52,12 +53,34 @@ class Directories: Then, when the user starts the scan, :meth:`get_files` is called to retrieve all files (wrapped in :mod:`core.fs`) that have to be scanned according to the chosen folders/states. """ + deny_list_str = set() + deny_list_re = set() + deny_list_re_files = set() # ---Override def __init__(self): self._dirs = [] # {path: state} self.states = {} + self.deny_list_str.add(r".*Recycle\.Bin$") + self.deny_list_str.add(r"denyme.*") + self.deny_list_str.add(r".*denyme") + self.deny_list_str.add(r".*/test/denyme*") + self.deny_list_str.add(r".*/test/*denyme") + self.deny_list_str.add(r"denyme") + self.deny_list_str.add(r".*\/\..*") + self.deny_list_str.add(r"^\..*") + self.compile_re() + + def compile_re(self): + for expr in self.deny_list_str: + try: + self.deny_list_re.add(re.compile(expr)) + if os.sep not in expr: + self.deny_list_re_files.add(re.compile(expr)) + except Exception as e: + logging.debug(f"Invalid regular expression \"{expr}\" in exclude list: {e}") + print(f"re_all: {self.deny_list_re}\nre_files: {self.deny_list_re_files}") def __contains__(self, path): for p in self._dirs: @@ -75,12 +98,15 @@ class Directories: return len(self._dirs) # ---Private - def _default_state_for_path(self, path): + def _default_state_for_path(self, path, deny_list_re=deny_list_re): # Override this in subclasses to specify the state of some special folders. - if path.name.startswith("."): # hidden - return DirectoryState.Excluded + # if path.name.startswith("."): # hidden + # return DirectoryState.Excluded + for denied_path_re in deny_list_re: + if denied_path_re.match(str(path)): + return DirectoryState.Excluded - def _get_files(self, from_path, fileclasses, j): + def _get_files(self, from_path, fileclasses, j, deny_list_re=deny_list_re_files): for root, dirs, files in os.walk(str(from_path)): j.check_if_cancelled() root = Path(root) @@ -93,9 +119,15 @@ class Directories: del dirs[:] try: if state != DirectoryState.Excluded: - found_files = [ - fs.get_file(root + f, fileclasses=fileclasses) for f in files - ] + found_files = [] + for f in files: + found = False + for expr in deny_list_re: + found = expr.match(f) + if found: + break + if not found: + found_files.append(fs.get_file(root + f, fileclasses=fileclasses)) found_files = [f for f in found_files if f is not None] # In some cases, directories can be considered as files by dupeGuru, which is # why we have this line below. In fact, there only one case: Bundle files under @@ -108,7 +140,7 @@ class Directories: logging.debug( "Collected %d files in folder %s", len(found_files), - str(from_path), + str(root), ) for file in found_files: file.is_ref = state == DirectoryState.Reference @@ -116,7 +148,7 @@ class Directories: except (EnvironmentError, fs.InvalidPath): pass - def _get_folders(self, from_folder, j): + def _get_folders(self, from_folder, j, deny_list_re=deny_list_re): j.check_if_cancelled() try: for subfolder in from_folder.subfolders: @@ -162,7 +194,7 @@ class Directories: except EnvironmentError: return [] - def get_files(self, fileclasses=None, j=job.nulljob): + def get_files(self, fileclasses=None, j=job.nulljob, deny_list_re=deny_list_re_files): """Returns a list of all files that are not excluded. Returned files also have their ``is_ref`` attr set if applicable. @@ -170,7 +202,7 @@ class Directories: if fileclasses is None: fileclasses = [fs.File] for path in self._dirs: - for file in self._get_files(path, fileclasses=fileclasses, j=j): + for file in self._get_files(path, fileclasses=fileclasses, j=j, deny_list_re=deny_list_re): yield file def get_folders(self, folderclass=None, j=job.nulljob): @@ -185,7 +217,7 @@ class Directories: for folder in self._get_folders(from_folder, j): yield folder - def get_state(self, path): + def get_state(self, path, denylist=deny_list_re): """Returns the state of ``path``. :rtype: :class:`DirectoryState` @@ -193,7 +225,7 @@ class Directories: # direct match? easy result. if path in self.states: return self.states[path] - state = self._default_state_for_path(path) or DirectoryState.Normal + state = self._default_state_for_path(path, denylist) or DirectoryState.Normal prevlen = 0 # we loop through the states to find the longest matching prefix for p, s in self.states.items(): diff --git a/core/fs.py b/core/fs.py index f18186ae..90f400d9 100644 --- a/core/fs.py +++ b/core/fs.py @@ -245,7 +245,7 @@ class Folder(File): return not path.islink() and path.isdir() -def get_file(path, fileclasses=[File]): +def get_file(path, fileclasses=[File], deny_list_re=set()): """Wraps ``path`` around its appropriate :class:`File` class. Whether a class is "appropriate" is decided by :meth:`File.can_handle` @@ -255,10 +255,15 @@ def get_file(path, fileclasses=[File]): """ for fileclass in fileclasses: if fileclass.can_handle(path): + # print(f"returning {path}") + # for expr in deny_list_re: + # if expr.match(str(path.name)): + # print(f"FOUND {repr(expr)} in {str(path.name)}") + # return return fileclass(path) -def get_files(path, fileclasses=[File]): +def get_files(path, fileclasses=[File], deny_list_re=set()): """Returns a list of :class:`File` for each file contained in ``path``. :param Path path: path to scan @@ -268,7 +273,7 @@ def get_files(path, fileclasses=[File]): try: result = [] for path in path.listdir(): - file = get_file(path, fileclasses=fileclasses) + file = get_file(path, fileclasses=fileclasses, deny_list_re=deny_list_re) if file is not None: result.append(file) return result diff --git a/core/tests/directories_test.py b/core/tests/directories_test.py index 05d814b2..7273b566 100644 --- a/core/tests/directories_test.py +++ b/core/tests/directories_test.py @@ -323,7 +323,7 @@ def test_get_state_returns_excluded_by_default_for_hidden_directories(tmpdir): def test_default_path_state_override(tmpdir): # It's possible for a subclass to override the default state of a path class MyDirectories(Directories): - def _default_state_for_path(self, path): + def _default_state_for_path(self, path, denylist): if "foobar" in path: return DirectoryState.Excluded @@ -341,3 +341,54 @@ def test_default_path_state_override(tmpdir): d.set_state(p1["foobar"], DirectoryState.Normal) eq_(d.get_state(p1["foobar"]), DirectoryState.Normal) eq_(len(list(d.get_files())), 2) + + +def test_exclude_list_regular_expressions(tmpdir): + d = Directories() + d.deny_list_str.clear() + d.deny_list_re.clear() + d.deny_list_re_files.clear() + # This should only exlude the directory, but not the contained files if + # its status is set to normal after loading it in the directory tree + d.deny_list_str.add(r".*Recycle\.Bin$") + d.deny_list_str.add(r"denyme.*") + # d.deny_list_str.add(r".*denymetoo") + # d.deny_list_str.add(r"denyme") + d.deny_list_str.add(r".*\/\..*") + d.deny_list_str.add(r"^\..*") + d.compile_re() + p1 = Path(str(tmpdir)) + # Should be ignored on Windows only (by default) + p1["Recycle.Bin"].mkdir() + p1["Recycle.Bin/somerecycledfile"].open("w").close() + + p1["denyme_blah.txt"].open("w").close() + p1["blah_denymetoo"].open("w").close() + p1["blah_denyme"].open("w").close() + + p1[".hidden_file"].open("w").close() + p1[".hidden_dir"].mkdir() + p1[".hidden_dir/somenormalfile1"].open("w").close() + p1[".hidden_dir/somenormalfile2_denyme"].open("w").close() + + p1["foobar"].mkdir() + p1["foobar/somefile"].open("w").close() + d.add_path(p1) + eq_(d.get_state(p1["Recycle.Bin"]), DirectoryState.Excluded) + eq_(d.get_state(p1["foobar"]), DirectoryState.Normal) + files = list(d.get_files()) + files = [file.name for file in files] + print(f"first files: {files}") + assert "somerecycledfile" not in files + assert "denyme_blah.txt" not in files + assert ".hidden_file" not in files + assert "somefile1" not in files + assert "somefile2_denyme" not in files + # Overriding the default state from the Directory Tree + d.set_state(p1["Recycle.Bin"], DirectoryState.Normal) + d.set_state(p1[".hidden_dir"], DirectoryState.Normal) + files = list(d.get_files()) + files = [file.name for file in files] + print(f"second files: {files}") + assert "somerecycledfile" in files + assert "somenormalfile1" in files diff --git a/tox.ini b/tox.ini index fb929642..33d32846 100644 --- a/tox.ini +++ b/tox.ini @@ -10,7 +10,7 @@ setenv = PYTHON="{envpython}" commands = make modules - py.test core hscommon + {posargs:py.test} core hscommon deps = -r{toxinidir}/requirements.txt -r{toxinidir}/requirements-extra.txt From a26de27c479c8b7d74569cc5f2a647e350139f1e Mon Sep 17 00:00:00 2001 From: glubsy Date: Tue, 28 Jul 2020 16:33:28 +0200 Subject: [PATCH 02/21] Implement dialog and base classes for model/view --- core/app.py | 9 +++ core/exclude.py | 97 ++++++++++++++++++++++++++ core/gui/exclude_list_dialog.py | 64 +++++++++++++++++ core/gui/exclude_list_table.py | 117 ++++++++++++++++++++++++++++++++ core/gui/ignore_list_dialog.py | 2 +- qt/app.py | 24 ++++++- qt/directories_dialog.py | 1 + qt/exclude_list_dialog.py | 83 ++++++++++++++++++++++ qt/exclude_list_table.py | 84 +++++++++++++++++++++++ qt/ignore_list_table.py | 2 + qt/tabbed_window.py | 6 ++ 11 files changed, 485 insertions(+), 4 deletions(-) create mode 100644 core/exclude.py create mode 100644 core/gui/exclude_list_dialog.py create mode 100644 core/gui/exclude_list_table.py create mode 100644 qt/exclude_list_dialog.py create mode 100644 qt/exclude_list_table.py diff --git a/core/app.py b/core/app.py index 0a9301a2..53627c8c 100644 --- a/core/app.py +++ b/core/app.py @@ -26,11 +26,13 @@ from .pe.photo import get_delta_dimensions from .util import cmp_value, fix_surrogate_encoding from . import directories, results, export, fs, prioritize from .ignore import IgnoreList +from .exclude import ExcludeList from .scanner import ScanType from .gui.deletion_options import DeletionOptions from .gui.details_panel import DetailsPanel from .gui.directory_tree import DirectoryTree from .gui.ignore_list_dialog import IgnoreListDialog +from .gui.exclude_list_dialog import ExcludeListDialogCore from .gui.problem_dialog import ProblemDialog from .gui.stats_label import StatsLabel @@ -140,6 +142,7 @@ class DupeGuru(Broadcaster): self.directories = directories.Directories() self.results = results.Results(self) self.ignore_list = IgnoreList() + self.exclude_list = ExcludeList(self) # In addition to "app-level" options, this dictionary also holds options that will be # sent to the scanner. They don't have default values because those defaults values are # defined in the scanner class. @@ -155,6 +158,7 @@ class DupeGuru(Broadcaster): self.directory_tree = DirectoryTree(self) self.problem_dialog = ProblemDialog(self) self.ignore_list_dialog = IgnoreListDialog(self) + self.exclude_list_dialog = ExcludeListDialogCore(self) self.stats_label = StatsLabel(self) self.result_table = None self.deletion_options = DeletionOptions() @@ -587,6 +591,9 @@ class DupeGuru(Broadcaster): p = op.join(self.appdata, "ignore_list.xml") self.ignore_list.load_from_xml(p) self.ignore_list_dialog.refresh() + p = op.join(self.appdata, "exclude_list.xml") + self.exclude_list.load_from_xml(p) + self.exclude_list_dialog.refresh() def load_from(self, filename): """Start an async job to load results from ``filename``. @@ -773,6 +780,8 @@ class DupeGuru(Broadcaster): self.directories.save_to_file(op.join(self.appdata, "last_directories.xml")) p = op.join(self.appdata, "ignore_list.xml") self.ignore_list.save_to_xml(p) + p = op.join(self.appdata, "exclude_list.xml") + self.exclude_list.save_to_xml(p) self.notify("save_session") def save_as(self, filename): diff --git a/core/exclude.py b/core/exclude.py new file mode 100644 index 00000000..03b2dc31 --- /dev/null +++ b/core/exclude.py @@ -0,0 +1,97 @@ +# This software is licensed under the "GPLv3" License as described in the "LICENSE" file, +# which should be included with this package. The terms are also available at +# http://www.gnu.org/licenses/gpl-3.0.html + +from .markable import Markable +from xml.etree import ElementTree as ET +from hscommon.util import FileOrPath + + +class ExcludeList(Markable): + """Exclude list of regular expression strings to filter out directories + and files that we want to avoid scanning.""" + + # ---Override + def __init__(self, app): + Markable.__init__(self) + self.app = app + self._excluded = [] # set of strings + self._count = 0 + + def __iter__(self): + for regex in self._excluded: + yield self.is_marked(regex), regex + + def __len__(self): + return self._count + + def _is_markable(self, row): + return True + + # ---Public + def add(self, regex): + self._excluded.insert(0, regex) + self._count = len(self._excluded) + + def isExcluded(self, regex): + if regex in self._excluded: + return True + return False + + def clear(self): + self._excluded = [] + self._count = 0 + + def remove(self, regex): + return self._excluded.remove(regex) + + def rename(self, regex, newregex): + if regex not in self._excluded: + return + marked = self.is_marked(regex) + index = self._excluded.index(regex) + self._excluded[index] = newregex + if marked: + # Not marked by default when added + self.mark(self._excluded[index]) + + def change_index(self, regex, new_index): + item = self._excluded.pop(regex) + self._excluded.insert(new_index, item) + + def load_from_xml(self, infile): + """Loads the ignore list from a XML created with save_to_xml. + + infile can be a file object or a filename. + """ + try: + root = ET.parse(infile).getroot() + except Exception as e: + print(f"Error while loading {infile}: {e}") + return + marked = set() + exclude_elems = (e for e in root if e.tag == "exclude") + for exclude_item in exclude_elems: + regex_string = exclude_item.get("regex") + if not regex_string: + continue + self.add(regex_string) + if exclude_item.get("marked") == "y": + marked.add(regex_string) + for item in marked: + # this adds item to the Markable "marked" set + self.mark(item) + + def save_to_xml(self, outfile): + """Create a XML file that can be used by load_from_xml. + + outfile can be a file object or a filename. + """ + root = ET.Element("exclude_list") + for regex in self._excluded: + exclude_node = ET.SubElement(root, "exclude") + exclude_node.set("regex", str(regex)) + exclude_node.set("marked", ("y" if self.is_marked(regex) else "n")) + tree = ET.ElementTree(root) + with FileOrPath(outfile, "wb") as fp: + tree.write(fp, encoding="utf-8") diff --git a/core/gui/exclude_list_dialog.py b/core/gui/exclude_list_dialog.py new file mode 100644 index 00000000..1d258033 --- /dev/null +++ b/core/gui/exclude_list_dialog.py @@ -0,0 +1,64 @@ +# Created On: 2012/03/13 +# Copyright 2015 Hardcoded Software (http://www.hardcoded.net) +# +# This software is licensed under the "GPLv3" License as described in the "LICENSE" file, +# which should be included with this package. The terms are also available at +# http://www.gnu.org/licenses/gpl-3.0.html + +# from hscommon.trans import tr +from .exclude_list_table import ExcludeListTable + +default_regexes = [".*thumbs", "\.DS.Store", "\.Trash", "Trash-Bin"] + + +class ExcludeListDialogCore: + # --- View interface + # show() + # + + def __init__(self, app): + self.app = app + self.exclude_list = self.app.exclude_list # Markable from exclude.py + self.exclude_list_table = ExcludeListTable(self, app) # GUITable, this is the "model" + + def restore_defaults(self): + for _, regex in self.exclude_list: + if regex not in default_regexes: + self.exclude_list.unmark(regex) + for default_regex in default_regexes: + if not self.exclude_list.isExcluded(default_regex): + self.exclude_list.add(default_regex) + self.exclude_list.mark(default_regex) + self.refresh() + + def refresh(self): + self.exclude_list_table.refresh() + + def remove_selected(self): + for row in self.exclude_list_table.selected_rows: + self.exclude_list_table.remove(row) + self.exclude_list.remove(row.regex) + self.refresh() + + def rename_selected(self, newregex): + """Renames the selected regex to ``newregex``. + If there's more than one selected row, the first one is used. + :param str newregex: The regex to rename the row's regex to. + """ + try: + r = self.exclude_list_table.selected_rows[0] + self.exclude_list.rename(r.regex, newregex) + self.refresh() + return True + except Exception as e: + print(f"dupeGuru Warning: {e}") + return False + + def add(self, regex): + self.exclude_list.add(regex) + self.exclude_list.mark(regex) + # TODO make checks here before adding to GUI + self.exclude_list_table.add(regex) + + def show(self): + self.view.show() diff --git a/core/gui/exclude_list_table.py b/core/gui/exclude_list_table.py new file mode 100644 index 00000000..0d495a86 --- /dev/null +++ b/core/gui/exclude_list_table.py @@ -0,0 +1,117 @@ +# This software is licensed under the "GPLv3" License as described in the "LICENSE" file, +# which should be included with this package. The terms are also available at +# http://www.gnu.org/licenses/gpl-3.0.html + +from .base import DupeGuruGUIObject +from hscommon.gui.table import GUITable, Row +from hscommon.gui.column import Column, Columns +from hscommon.trans import trget +tr = trget("ui") + + +class ExcludeListTable(GUITable, DupeGuruGUIObject): + COLUMNS = [ + Column("marked", ""), + Column("regex", tr("Regex")) + ] + + def __init__(self, exclude_list_dialog, app): + GUITable.__init__(self) + DupeGuruGUIObject.__init__(self, app) + # self.columns = Columns(self, prefaccess=app, savename="ExcludeTable") + self.columns = Columns(self) + self.dialog = exclude_list_dialog + + def rename_selected(self, newname): + row = self.selected_row + if row is None: + # There's all kinds of way the current row can be swept off during rename. When it + # happens, selected_row will be None. + return False + row._data = None + return self.dialog.rename_selected(newname) + + # --- Virtual + def _do_add(self, regex): + """(Virtual) Creates a new row, adds it in the table. + Returns ``(row, insert_index)``. + """ + # Return index 0 to insert at the top + return ExcludeListRow(self, self.dialog.exclude_list.is_marked(regex), regex), 0 + + def _do_delete(self): + self.dalog.exclude_list.remove(self.selected_row.regex) + + # --- Override + def add(self, regex): + row, insert_index = self._do_add(regex) + self.insert(insert_index, row) + # self.select([insert_index]) + self.view.refresh() + + def _fill(self): + for enabled, regex in self.dialog.exclude_list: + self.append(ExcludeListRow(self, enabled, regex)) + + # def remove(self): + # super().remove(super().selected_rows) + + # def _update_selection(self): + # # rows = self.selected_rows + # # self.dialog._select_rows(list(map(attrgetter("_dupe"), rows))) + # self.dialog.remove_selected() + + def refresh(self, refresh_view=True): + """Override to avoid keeping previous selection in case of multiple rows + selected previously.""" + self.cancel_edits() + del self[:] + self._fill() + # sd = self._sort_descriptor + # if sd is not None: + # super().sort_by(self, column_name=sd.column, desc=sd.desc) + if refresh_view: + self.view.refresh() + + +class ExcludeListRow(Row): + def __init__(self, table, enabled, regex): + Row.__init__(self, table) + self._app = table.app + self._data = None + self.enabled_original = enabled + self.regex_original = regex + self.enabled = str(enabled) + self.regex = str(regex) + + @property + def data(self): + def get_display_info(row): + return {"marked": row.enabled, "regex": row.regex} + + if self._data is None: + self._data = get_display_info(self) + return self._data + + @property + def markable(self): + return True + + @property + def marked(self): + return self._app.exclude_list.is_marked(self.regex) + + @marked.setter + def marked(self, value): + if value: + self._app.exclude_list.mark(self.regex) + else: + self._app.exclude_list.unmark(self.regex) + + # @property + # def regex(self): + # return self.regex + + # @regex.setter + # def regex(self, value): + # self._app.exclude_list.add(self._regex, value) \ No newline at end of file diff --git a/core/gui/ignore_list_dialog.py b/core/gui/ignore_list_dialog.py index 1590c837..cd3a0996 100644 --- a/core/gui/ignore_list_dialog.py +++ b/core/gui/ignore_list_dialog.py @@ -17,7 +17,7 @@ class IgnoreListDialog: def __init__(self, app): self.app = app self.ignore_list = self.app.ignore_list - self.ignore_list_table = IgnoreListTable(self) + self.ignore_list_table = IgnoreListTable(self) # GUITable def clear(self): if not self.ignore_list: diff --git a/qt/app.py b/qt/app.py index 4ed70b54..2c14b6a2 100644 --- a/qt/app.py +++ b/qt/app.py @@ -27,6 +27,7 @@ from .result_window import ResultWindow from .directories_dialog import DirectoriesDialog from .problem_dialog import ProblemDialog from .ignore_list_dialog import IgnoreListDialog +from .exclude_list_dialog import ExcludeListDialog from .deletion_options import DeletionOptions from .se.details_dialog import DetailsDialog as DetailsDialogStandard from .me.details_dialog import DetailsDialog as DetailsDialogMusic @@ -87,10 +88,16 @@ class DupeGuru(QObject): parent=self.main_window, model=self.model.ignore_list_dialog) self.ignoreListDialog.accepted.connect(self.main_window.onDialogAccepted) + + self.excludeListDialog = self.main_window.createPage( + "ExcludeListDialog", + app=self, + parent=self.main_window, + model=self.model.exclude_list_dialog) else: self.ignoreListDialog = IgnoreListDialog( - parent=parent_window, model=self.model.ignore_list_dialog - ) + parent=parent_window, model=self.model.ignore_list_dialog) + self.excludeDialog = ExcludeListDialog(parent=parent_window) self.deletionOptions = DeletionOptions( parent=parent_window, @@ -130,6 +137,7 @@ class DupeGuru(QObject): tr("Clear Picture Cache"), self.clearPictureCacheTriggered, ), + ("actionExcludeList", "", "", tr("Exclude list"), self.excludeListTriggered), ("actionShowHelp", "F1", "", tr("dupeGuru Help"), self.showHelpTriggered), ("actionAbout", "", "", tr("About dupeGuru"), self.showAboutBoxTriggered), ( @@ -276,10 +284,20 @@ class DupeGuru(QObject): # if not self.main_window.tabWidget.isTabVisible(index): self.main_window.setTabVisible(index, True) self.main_window.setCurrentIndex(index) - return else: self.model.ignore_list_dialog.show() + def excludeListTriggered(self): + if self.main_window: + index = self.main_window.indexOfWidget(self.excludeListDialog) + if index < 0: + index = self.main_window.addTab( + self.excludeListDialog, "Exclude List", switch=True) + self.main_window.setTabVisible(index, True) + self.main_window.setCurrentIndex(index) + else: + self.excludeListDialog.show() + def openDebugLogTriggered(self): debugLogPath = op.join(self.model.appdata, "debug.log") desktop.open_path(debugLogPath) diff --git a/qt/directories_dialog.py b/qt/directories_dialog.py index 45e53be6..82f2896d 100644 --- a/qt/directories_dialog.py +++ b/qt/directories_dialog.py @@ -132,6 +132,7 @@ class DirectoriesDialog(QMainWindow): self.menuView.addAction(self.app.actionDirectoriesWindow) self.menuView.addAction(self.actionShowResultsWindow) self.menuView.addAction(self.app.actionIgnoreList) + self.menuView.addAction(self.app.actionExcludeList) self.menuView.addSeparator() self.menuView.addAction(self.app.actionPreferences) diff --git a/qt/exclude_list_dialog.py b/qt/exclude_list_dialog.py new file mode 100644 index 00000000..3c8a1872 --- /dev/null +++ b/qt/exclude_list_dialog.py @@ -0,0 +1,83 @@ +# This software is licensed under the "GPLv3" License as described in the "LICENSE" file, +# which should be included with this package. The terms are also available at +# http://www.gnu.org/licenses/gpl-3.0.html + +from PyQt5.QtCore import Qt, pyqtSlot +from PyQt5.QtWidgets import ( + QPushButton, QLineEdit, QVBoxLayout, QGridLayout, QDialog, + QTableView, QAbstractItemView, QSpacerItem, QSizePolicy, QHeaderView +) +from .exclude_list_table import ExcludeListTable, ExcludeView + +from hscommon.trans import trget +tr = trget("ui") + + +class ExcludeListDialog(QDialog): + def __init__(self, app, parent, model, **kwargs): + flags = Qt.CustomizeWindowHint | Qt.WindowTitleHint | Qt.WindowSystemMenuHint + super().__init__(parent, flags, **kwargs) + self.specific_actions = frozenset() + self._setupUI() + self.model = model # ExcludeListDialogCore + self.model.view = self + self.table = ExcludeListTable(app, view=self.tableView) + + self.buttonAdd.clicked.connect(self.addItem) + self.buttonRemove.clicked.connect(self.removeItem) + self.buttonRestore.clicked.connect(self.restoreDefaults) + self.buttonClose.clicked.connect(self.accept) + + def _setupUI(self): + layout = QVBoxLayout(self) + gridlayout = QGridLayout() + self.buttonAdd = QPushButton(tr("Add")) + self.buttonRemove = QPushButton(tr("Remove Selected")) + self.buttonRestore = QPushButton(tr("Restore defaults")) + self.buttonClose = QPushButton(tr("Close")) + self.linedit = QLineEdit() + self.tableView = ExcludeView() + triggers = ( + QAbstractItemView.DoubleClicked + | QAbstractItemView.EditKeyPressed + | QAbstractItemView.SelectedClicked + ) + self.tableView.setEditTriggers(triggers) + self.tableView.horizontalHeader().setVisible(True) + self.tableView.setSelectionMode(QTableView.ExtendedSelection) + self.tableView.setSelectionBehavior(QTableView.SelectRows) + # vheader = self.tableView.verticalHeader() + # vheader.setSectionsMovable(True) + # vheader.setVisible(True) + # vheader.setDefaultSectionSize(50) + hheader = self.tableView.horizontalHeader() + hheader.setSectionsMovable(False) + hheader.setSectionResizeMode(QHeaderView.Fixed) + hheader.setStretchLastSection(True) + hheader.setHighlightSections(False) + gridlayout.addWidget(self.linedit, 0, 0) + gridlayout.addWidget(self.buttonAdd, 0, 1, Qt.AlignLeft) + gridlayout.addWidget(self.buttonRemove, 1, 1, Qt.AlignLeft) + gridlayout.addWidget(self.buttonRestore, 2, 1, Qt.AlignLeft) + gridlayout.addWidget(self.tableView, 1, 0, 4, 1) + gridlayout.addItem(QSpacerItem(0, 0, QSizePolicy.Minimum, QSizePolicy.Expanding), 3, 1) + gridlayout.addWidget(self.buttonClose, 4, 1) + layout.addLayout(gridlayout) + + # --- model --> view + def show(self): + super().show() + + @pyqtSlot() + def addItem(self): + text = self.linedit.text() + if not text: + return + self.model.add(text) + self.linedit.clear() + + def removeItem(self): + self.model.remove_selected() + + def restoreDefaults(self): + self.model.restore_defaults() diff --git a/qt/exclude_list_table.py b/qt/exclude_list_table.py new file mode 100644 index 00000000..d6f57003 --- /dev/null +++ b/qt/exclude_list_table.py @@ -0,0 +1,84 @@ +# This software is licensed under the "GPLv3" License as described in the "LICENSE" file, +# which should be included with this package. The terms are also available at +# http://www.gnu.org/licenses/gpl-3.0.html + +from PyQt5.QtCore import Qt, QModelIndex, pyqtSignal +from PyQt5.QtGui import QBrush, QFont, QFontMetrics, QColor +from PyQt5.QtWidgets import QTableView + +from qtlib.column import Column +from qtlib.table import Table + + +class ExcludeListTable(Table): + """Model for exclude list""" + COLUMNS = [ + Column("marked", defaultWidth=15), + Column("regex", defaultWidth=230) + ] + + def __init__(self, app, view, **kwargs): + model = app.model.exclude_list_dialog.exclude_list_table # pointer to GUITable + super().__init__(model, view, **kwargs) + view.horizontalHeader().setSortIndicator(1, Qt.AscendingOrder) + font = view.font() + font.setPointSize(app.prefs.tableFontSize) + view.setFont(font) + fm = QFontMetrics(font) + view.verticalHeader().setDefaultSectionSize(fm.height() + 2) + app.willSavePrefs.connect(self.appWillSavePrefs) + + def _getData(self, row, column, role): + if column.name == "marked": + if role == Qt.CheckStateRole and row.markable: + return Qt.Checked if row.marked else Qt.Unchecked + return None + if role == Qt.DisplayRole: + return row.data[column.name] + elif role == Qt.FontRole: + return QFont(self.view.font()) + elif role == Qt.EditRole: + if column.name == "regex": + return row.data[column.name] + return None + + def _getFlags(self, row, column): + flags = Qt.ItemIsEnabled | Qt.ItemIsSelectable + if column.name == "marked": + if row.markable: + flags |= Qt.ItemIsUserCheckable + elif column.name == "regex": + flags |= Qt.ItemIsEditable + return flags + + def _setData(self, row, column, value, role): + if role == Qt.CheckStateRole: + if column.name == "marked": + row.marked = bool(value) + return True + elif role == Qt.EditRole: + if column.name == "regex": + return self.model.rename_selected(value) + return False + + def sort(self, column, order): + column = self.model.COLUMNS[column] + self.model.sort(column.name, order == Qt.AscendingOrder) + + # --- Events + def appWillSavePrefs(self): + self.model.columns.save_columns() + + # --- model --> view + def invalidate_markings(self): + # redraw view + # HACK. this is the only way I found to update the widget without reseting everything + self.view.scroll(0, 1) + self.view.scroll(0, -1) + + +class ExcludeView(QTableView): + def mouseDoubleClickEvent(self, event): + # FIXME this doesn't seem to do anything relevant + self.doubleClicked.emit(QModelIndex()) + # We don't call the superclass' method because the default behavior is to rename the cell. diff --git a/qt/ignore_list_table.py b/qt/ignore_list_table.py index 96bc51ec..46a9c175 100644 --- a/qt/ignore_list_table.py +++ b/qt/ignore_list_table.py @@ -10,6 +10,8 @@ from qtlib.table import Table class IgnoreListTable(Table): + """ Ignore list model""" + COLUMNS = [ Column("path1", defaultWidth=230), Column("path2", defaultWidth=230), diff --git a/qt/tabbed_window.py b/qt/tabbed_window.py index f7fc13d7..36a92e1a 100644 --- a/qt/tabbed_window.py +++ b/qt/tabbed_window.py @@ -18,6 +18,7 @@ from qtlib.util import moveToScreenCenter, createActions from .directories_dialog import DirectoriesDialog from .result_window import ResultWindow from .ignore_list_dialog import IgnoreListDialog +from .exclude_list_dialog import ExcludeListDialog tr = trget("ui") @@ -157,6 +158,11 @@ class TabWindow(QMainWindow): parent = kwargs.get("parent", self) model = kwargs.get("model") page = IgnoreListDialog(parent, model) + elif cls == "ExcludeListDialog": + app = kwargs.get("app", app) + parent = kwargs.get("parent", self) + model = kwargs.get("model") + page = ExcludeListDialog(app, parent, model) self.pages[cls] = page return page From 2eaf7e7893be7e09cba17a65c6ef80eaa741398a Mon Sep 17 00:00:00 2001 From: glubsy Date: Mon, 17 Aug 2020 04:13:20 +0200 Subject: [PATCH 03/21] Implement exclude list dialog on the Qt side --- core/app.py | 4 +- core/directories.py | 29 +-- core/exclude.py | 365 +++++++++++++++++++++++++++++--- core/gui/exclude_list_dialog.py | 16 +- core/gui/exclude_list_table.py | 17 +- images/dialog-error.png | Bin 0 -> 1398 bytes qt/dg.qrc | 1 + qt/exclude_list_dialog.py | 46 ++-- qt/exclude_list_table.py | 14 +- 9 files changed, 400 insertions(+), 92 deletions(-) create mode 100644 images/dialog-error.png diff --git a/core/app.py b/core/app.py index 53627c8c..46cff0a8 100644 --- a/core/app.py +++ b/core/app.py @@ -26,7 +26,7 @@ from .pe.photo import get_delta_dimensions from .util import cmp_value, fix_surrogate_encoding from . import directories, results, export, fs, prioritize from .ignore import IgnoreList -from .exclude import ExcludeList +from .exclude import ExcludeList as ExcludeList from .scanner import ScanType from .gui.deletion_options import DeletionOptions from .gui.details_panel import DetailsPanel @@ -139,10 +139,10 @@ class DupeGuru(Broadcaster): os.makedirs(self.appdata) self.app_mode = AppMode.Standard self.discarded_file_count = 0 + self.exclude_list = ExcludeList() self.directories = directories.Directories() self.results = results.Results(self) self.ignore_list = IgnoreList() - self.exclude_list = ExcludeList(self) # In addition to "app-level" options, this dictionary also holds options that will be # sent to the scanner. They don't have default values because those defaults values are # defined in the scanner class. diff --git a/core/directories.py b/core/directories.py index 9a372166..5f465818 100644 --- a/core/directories.py +++ b/core/directories.py @@ -5,7 +5,6 @@ # http://www.gnu.org/licenses/gpl-3.0.html import os -import re from xml.etree import ElementTree as ET import logging @@ -14,6 +13,7 @@ from hscommon.path import Path from hscommon.util import FileOrPath from . import fs +from .exclude import ExcludeList __all__ = [ "Directories", @@ -53,34 +53,17 @@ class Directories: Then, when the user starts the scan, :meth:`get_files` is called to retrieve all files (wrapped in :mod:`core.fs`) that have to be scanned according to the chosen folders/states. """ + # FIXME: if there is zero item in these sets, the for each loops will yield NOTHING deny_list_str = set() deny_list_re = set() deny_list_re_files = set() # ---Override - def __init__(self): + def __init__(self, excluded=ExcludeList()): self._dirs = [] # {path: state} self.states = {} - self.deny_list_str.add(r".*Recycle\.Bin$") - self.deny_list_str.add(r"denyme.*") - self.deny_list_str.add(r".*denyme") - self.deny_list_str.add(r".*/test/denyme*") - self.deny_list_str.add(r".*/test/*denyme") - self.deny_list_str.add(r"denyme") - self.deny_list_str.add(r".*\/\..*") - self.deny_list_str.add(r"^\..*") - self.compile_re() - - def compile_re(self): - for expr in self.deny_list_str: - try: - self.deny_list_re.add(re.compile(expr)) - if os.sep not in expr: - self.deny_list_re_files.add(re.compile(expr)) - except Exception as e: - logging.debug(f"Invalid regular expression \"{expr}\" in exclude list: {e}") - print(f"re_all: {self.deny_list_re}\nre_files: {self.deny_list_re_files}") + self._excluded = excluded def __contains__(self, path): for p in self._dirs: @@ -217,7 +200,7 @@ class Directories: for folder in self._get_folders(from_folder, j): yield folder - def get_state(self, path, denylist=deny_list_re): + def get_state(self, path, deny_list_re=deny_list_re): """Returns the state of ``path``. :rtype: :class:`DirectoryState` @@ -225,7 +208,7 @@ class Directories: # direct match? easy result. if path in self.states: return self.states[path] - state = self._default_state_for_path(path, denylist) or DirectoryState.Normal + state = self._default_state_for_path(path, deny_list_re) or DirectoryState.Normal prevlen = 0 # we loop through the states to find the longest matching prefix for p, s in self.states.items(): diff --git a/core/exclude.py b/core/exclude.py index 03b2dc31..74c44c2d 100644 --- a/core/exclude.py +++ b/core/exclude.py @@ -4,38 +4,172 @@ from .markable import Markable from xml.etree import ElementTree as ET +import re +from os import sep +import logging +import functools from hscommon.util import FileOrPath +import time + +default_regexes = [r".*thumbs", r"\.DS.Store", r"\.Trash", r".*Trash-Bin"] +forbidden_regexes = [r".*", r"\/.*", r".*\/.*"] + + +def timer(func): + @functools.wraps(func) + def wrapper_timer(*args): + start = time.perf_counter_ns() + value = func(*args) + end = time.perf_counter_ns() + print(f"DEBUG: func {func.__name__!r} took {end - start} ns.") + return value + return wrapper_timer + + +def memoize(func): + func.cache = dict() + + @functools.wraps(func) + def _memoize(*args): + if args not in func.cache: + func.cache[args] = func(*args) + return func.cache[args] + return _memoize + + +class AlreadyThereException(Exception): + """Expression already in the list""" + def __init__(self, arg="Expression is already in excluded list."): + super().__init__(arg) class ExcludeList(Markable): """Exclude list of regular expression strings to filter out directories - and files that we want to avoid scanning.""" + and files that we want to avoid scanning. + The list() class allows us to preserve item order without too much hassle. + The downside is we have to compare strings every time we look for an item in the list + since we use regex strings as keys. + [regex:str, compilable:bool, error:Exception, compiled:Pattern]) + """ # ---Override - def __init__(self, app): + def __init__(self): Markable.__init__(self) - self.app = app - self._excluded = [] # set of strings + self._excluded = [] self._count = 0 + self._excluded_compiled = set() + + def __debug_test(self): + self.test_regexes = [ + r".*Recycle\.Bin$", r"denyme.*", r".*denyme", r".*/test/denyme*", + r".*/test/*denyme", r"denyme", r".*\/\..*", r"^\..*"] + for regex in self.test_regexes: + try: + self.add(regex) + except Exception as e: + print(f"Exception loading test regex {regex}: {e}") + continue + try: + self.mark(regex) + except Exception as e: + print(f"Exception marking test regex {regex}: {e}") def __iter__(self): - for regex in self._excluded: + """Iterate in order.""" + for item in self._excluded: + regex = item[0] yield self.is_marked(regex), regex def __len__(self): return self._count - def _is_markable(self, row): - return True + def is_markable(self, regex): + return self._is_markable(regex) + + def _is_markable(self, regex): + """Return the cached result of "compilable" property""" + # FIXME save result of compilation via memoization + # return self._excluded.get(regex)[0] + for item in self._excluded: + if item[0] == regex: + return item[1] + return False # FIXME should not be needed + + def _did_mark(self, regex): + for item in self._excluded: + if item[0] == regex: + # no need to test if already present since it's a set() + self._excluded_compiled.add(item[3]) + + def _did_unmark(self, regex): + self._remove_compiled(regex) + + def _remove_compiled(self, regex): + for item in self._excluded_compiled: + if regex in item.pattern: + self._excluded_compiled.remove(item) + break + + # @timer + @memoize + def _do_compile(self, expr): + try: + return re.compile(expr) + except Exception as e: + raise(e) + + # @timer + # @memoize # probably not worth memoizing this one if we memoize the above + def compile_re(self, regex): + compiled = None + try: + compiled = self._do_compile(regex) + except Exception as e: + return False, e, compiled + return True, None, compiled + + def error(self, regex): + """Return the compilation error Exception for regex. It should have a "msg" attr.""" + for item in self._excluded: + if item[0] == regex: + return item[2] + + @property + def compiled(self): + """Should be used by other classes to retrieve the up-to-date list of patterns.""" + return self._excluded_compiled + + @property + def compiled_files(self): + """Should be used by other classes to retrieve the up-to-date list of patterns + for files only.""" + return [compiled_pattern for compiled_pattern in self.compiled if sep not in compiled_pattern.pattern] # ---Public - def add(self, regex): - self._excluded.insert(0, regex) + def add(self, regex, forced=False): + """This interface should throw exceptions if there is an error during regex compilation""" + if self.isExcluded(regex): + # This exception should never be ignored + raise AlreadyThereException() + if regex in forbidden_regexes: + raise Exception("Forbidden (dangerous) expression.") + + iscompilable, exception, compiled = self.compile_re(regex) + if not iscompilable and not forced: + # This exception can be ignored, but taken into account to avoid adding to compiled set + raise exception + else: + self._do_add(regex, iscompilable, exception, compiled) + + def _do_add(self, regex, iscompilable, exception, compiled): + # We need to insert at the top + self._excluded.insert(0, [regex, iscompilable, exception, compiled]) self._count = len(self._excluded) def isExcluded(self, regex): - if regex in self._excluded: - return True + for item in self._excluded: + if regex == item[0]: + return True return False def clear(self): @@ -43,21 +177,48 @@ class ExcludeList(Markable): self._count = 0 def remove(self, regex): - return self._excluded.remove(regex) + for item in self._excluded: + if item[0] == regex: + self._excluded.remove(item) + self._remove_compiled(regex) def rename(self, regex, newregex): - if regex not in self._excluded: + # if regex not in self._excluded or regex == newregex: + # return + if regex == newregex: + return + found = False + for item in self._excluded: + if regex == item[0]: + found = True + break + if not found: return - marked = self.is_marked(regex) - index = self._excluded.index(regex) - self._excluded[index] = newregex - if marked: - # Not marked by default when added - self.mark(self._excluded[index]) - def change_index(self, regex, new_index): - item = self._excluded.pop(regex) - self._excluded.insert(new_index, item) + was_marked = self.is_marked(regex) + is_compilable, exception, compiled = self.compile_re(newregex) + for item in self._excluded: + if item[0] == regex: + # We overwrite the found entry + self._excluded[self._excluded.index(item)] =\ + [newregex, is_compilable, exception, compiled] + if is_compilable and was_marked: + # Not marked by default when added, add it back + self.mark(newregex) + + # def change_index(self, regex, new_index): + # """Internal list must be a list, not dict.""" + # item = self._excluded.pop(regex) + # self._excluded.insert(new_index, item) + + def restore_defaults(self): + for _, regex in self: + if regex not in default_regexes: + self.unmark(regex) + for default_regex in default_regexes: + if not self.isExcluded(default_regex): + self.add(default_regex) + self.mark(default_regex) def load_from_xml(self, infile): """Loads the ignore list from a XML created with save_to_xml. @@ -67,20 +228,29 @@ class ExcludeList(Markable): try: root = ET.parse(infile).getroot() except Exception as e: - print(f"Error while loading {infile}: {e}") - return + logging.warning(f"Error while loading {infile}: {e}") + self.restore_defaults() + self.__debug_test() + return e + marked = set() exclude_elems = (e for e in root if e.tag == "exclude") for exclude_item in exclude_elems: regex_string = exclude_item.get("regex") if not regex_string: continue - self.add(regex_string) + try: + # "forced" avoids compilation exceptions and adds anyway + self.add(regex_string, forced=True) + except AlreadyThereException: + logging.error(f"Regex \"{regex_string}\" loaded from XML was already present in the list.") + continue if exclude_item.get("marked") == "y": marked.add(regex_string) - for item in marked: - # this adds item to the Markable "marked" set - self.mark(item) + + for item in marked: + self.mark(item) + self.__debug_test() def save_to_xml(self, outfile): """Create a XML file that can be used by load_from_xml. @@ -88,10 +258,143 @@ class ExcludeList(Markable): outfile can be a file object or a filename. """ root = ET.Element("exclude_list") - for regex in self._excluded: + # reversed in order to keep order of entries when reloading from xml later + for item in reversed(self._excluded): exclude_node = ET.SubElement(root, "exclude") - exclude_node.set("regex", str(regex)) - exclude_node.set("marked", ("y" if self.is_marked(regex) else "n")) + exclude_node.set("regex", str(item[0])) + exclude_node.set("marked", ("y" if self.is_marked(item[0]) else "n")) tree = ET.ElementTree(root) with FileOrPath(outfile, "wb") as fp: tree.write(fp, encoding="utf-8") + + +class ExcludeDict(ExcludeList): + """Version implemented around a dictionary instead of a list, which implies + to keep the index of each string-key as its sub-element and keep it updated + whenever insert/remove is done.""" + + def __init__(self): + Markable.__init__(self) + # { "regex": { "index": int, "compilable": bool, "error": str, "compiled": Pattern or None}} + # Note: "compilable" key should only be updated on add / rename + self._excluded = {} + self._count = 0 + self._excluded_compiled = set() + + def __iter__(self): + """Iterate in order.""" + for regex in ordered_keys(self._excluded): + yield self.is_marked(regex), regex + + def __len__(self): + return self._count + + def is_markable(self, regex): + return self._is_markable(regex) + + def _is_markable(self, regex): + """Return the cached result of "compilable" property""" + exists = self._excluded.get(regex) + if exists: + return exists.get("compilable") + return False + + def _did_mark(self, regex): + # self._excluded[regex][0] = True # is compilable + try: + self._excluded_compiled.add(self._excluded[regex]["compiled"]) + except Exception as e: + print(f"Exception while adding regex {regex} to compiled set: {e}") + return + + def _did_unmark(self, regex): + self._remove_compiled(regex) + + def is_compilable(self, regex): + """Returns the cached "compilable" value""" + return self._excluded[regex]["compilable"] + + def error(self, regex): + """Return the compilation error message for regex string""" + return self._excluded.get(regex).get("error") + + @property + def compiled(self): + """Should be used by other classes to retrieve the up-to-date list of patterns.""" + return self._excluded_compiled + + @property + def compiled_files(self): + """Should be used by other classes to retrieve the up-to-date list of patterns + for files only.""" + return [compiled_pattern for compiled_pattern in self.compiled if sep not in compiled_pattern.pattern] + + # ---Public + def _do_add(self, regex, iscompilable, exception, compiled): + # We always insert at the top, so index should be 0 and other indices should be pushed by one + for value in self._excluded.values(): + value["index"] += 1 + self._excluded[regex] = {"index": 0, "compilable": iscompilable, "error": exception, "compiled": compiled} + self._count = len(self._excluded) + + def isExcluded(self, regex): + if regex in self._excluded.keys(): + return True + return False + + def clear(self): + self._excluded = {} + self._count = 0 + + def remove(self, regex): + old_value = self._excluded.pop(regex) + # Bring down all indices which where above it + index = old_value["index"] + if index == len(self._excluded): + self._remove_compiled(regex) + return + + for value in self._excluded.values(): + if value.get("index") > old_value["index"]: + value["index"] -= 1 + self._remove_compiled(regex) + + def rename(self, regex, newregex): + if regex == newregex or regex not in self._excluded.keys(): + return + was_marked = self.is_marked(regex) + previous = self._excluded.pop(regex) + iscompilable, error, compiled = self.compile_re(newregex) + self._excluded[newregex] = {"index": previous["index"], "compilable": iscompilable, "error": error, "compiled": compiled} + if was_marked and iscompilable: + self.mark(newregex) + + def save_to_xml(self, outfile): + """Create a XML file that can be used by load_from_xml. + + outfile can be a file object or a filename. + """ + root = ET.Element("exclude_list") + # reversed in order to keep order of entries when reloading from xml later + reversed_list = [] + for key in ordered_keys(self._excluded): + reversed_list.append(key) + for item in reversed(reversed_list): + exclude_node = ET.SubElement(root, "exclude") + exclude_node.set("regex", str(item)) + exclude_node.set("marked", ("y" if self.is_marked(item) else "n")) + tree = ET.ElementTree(root) + with FileOrPath(outfile, "wb") as fp: + tree.write(fp, encoding="utf-8") + + +def ordered_keys(_dict): + """Returns an iterator over the keys of dictionary sorted by "index" key""" + if not len(_dict): + return + list_of_items = [] + for item in _dict.items(): + list_of_items.append(item) + list_of_items.sort(key=lambda x: x[1].get("index")) + for item in list_of_items: + yield item[0] diff --git a/core/gui/exclude_list_dialog.py b/core/gui/exclude_list_dialog.py index 1d258033..e35d4c9f 100644 --- a/core/gui/exclude_list_dialog.py +++ b/core/gui/exclude_list_dialog.py @@ -8,8 +8,6 @@ # from hscommon.trans import tr from .exclude_list_table import ExcludeListTable -default_regexes = [".*thumbs", "\.DS.Store", "\.Trash", "Trash-Bin"] - class ExcludeListDialogCore: # --- View interface @@ -22,13 +20,7 @@ class ExcludeListDialogCore: self.exclude_list_table = ExcludeListTable(self, app) # GUITable, this is the "model" def restore_defaults(self): - for _, regex in self.exclude_list: - if regex not in default_regexes: - self.exclude_list.unmark(regex) - for default_regex in default_regexes: - if not self.exclude_list.isExcluded(default_regex): - self.exclude_list.add(default_regex) - self.exclude_list.mark(default_regex) + self.exclude_list.restore_defaults() self.refresh() def refresh(self): @@ -55,9 +47,11 @@ class ExcludeListDialogCore: return False def add(self, regex): - self.exclude_list.add(regex) + try: + self.exclude_list.add(regex) + except Exception as e: + raise(e) self.exclude_list.mark(regex) - # TODO make checks here before adding to GUI self.exclude_list_table.add(regex) def show(self): diff --git a/core/gui/exclude_list_table.py b/core/gui/exclude_list_table.py index 0d495a86..6a0294f7 100644 --- a/core/gui/exclude_list_table.py +++ b/core/gui/exclude_list_table.py @@ -12,21 +12,18 @@ tr = trget("ui") class ExcludeListTable(GUITable, DupeGuruGUIObject): COLUMNS = [ Column("marked", ""), - Column("regex", tr("Regex")) + Column("regex", tr("Regular Expressions")) ] def __init__(self, exclude_list_dialog, app): GUITable.__init__(self) DupeGuruGUIObject.__init__(self, app) - # self.columns = Columns(self, prefaccess=app, savename="ExcludeTable") self.columns = Columns(self) self.dialog = exclude_list_dialog def rename_selected(self, newname): row = self.selected_row if row is None: - # There's all kinds of way the current row can be swept off during rename. When it - # happens, selected_row will be None. return False row._data = None return self.dialog.rename_selected(newname) @@ -95,7 +92,7 @@ class ExcludeListRow(Row): @property def markable(self): - return True + return self._app.exclude_list.is_markable(self.regex) @property def marked(self): @@ -108,10 +105,18 @@ class ExcludeListRow(Row): else: self._app.exclude_list.unmark(self.regex) + @property + def error(self): + # This assumes error() returns an Exception() + message = self._app.exclude_list.error(self.regex) + if hasattr(message, "msg"): + return self._app.exclude_list.error(self.regex).msg + else: + return message # Exception object # @property # def regex(self): # return self.regex # @regex.setter # def regex(self, value): - # self._app.exclude_list.add(self._regex, value) \ No newline at end of file + # self._app.exclude_list.add(self._regex, value) diff --git a/images/dialog-error.png b/images/dialog-error.png new file mode 100644 index 0000000000000000000000000000000000000000..625c7ff83899dd179075d2c0f0e38a172f424e87 GIT binary patch literal 1398 zcmV-+1&R8JP)B6i9Q-Ev1`6sDvauxc&06ryRm*JrTPZf9^rdcgitKCAW6otNT zc+I90bh~20Uqbkb2|sw6>ZR)f@HLSq34CyHdb&Ju!y8s=3kyaPj(Q#%1c+rJg#xiE z?)meqoIBU9A3v_iwoiNTuIH+5Tou4KMQ)<-gYwkW4R3n4yHy4%6@q%5Xk~?{)dE07 z$Yv1R#u^*LOl*|Pocq_myz~3^`5MQsdWPy3`vK&ueh?x*xaqy`T^_&T4NdRVDZ=ID z?XY}!erhHF&W&zFXc+AK#V=ak=H_RP!Rz0l8utbGipaxk$N9l+9{SKy(Q)V=JcvdS zi1Y=>PwnYx?D=_){QB3e^$Qn%{4~|4k(0rW_>#!Y4UzBOHVH3;A)S5uw!-_V2Sl*A zh=1e=w|Mx&%jL1L$9+}gjXeRrEn?d6^IIfsFPJ9Y@#COMUb*V~a0!v?5}Z8AZ65Wg za-mp!+t)?zby)x(oDNS_unV9JPa0WRunU$&kkIal;y8uihIhJC zMc{3_01SBM*xXz-SX$zLO7+UN<<_USa&>kVO!Ic%78wCB(&suN_Zymi`$LP6rGlKbHLSCO7H(yzlvB%;q(JN7y!g zvx&ed0LzC9X18lc?KZ*YCSsb9SW;V7I_>$8k)0|*1hOgHE#IxE(E!V$<+^bMCjhMH zsxEy^q}5(sEha;ez3X!_j4dGB-m+z3IL^*;3aPp~IJ@oQPA7GGUNi`>CnwohTWc%) z5x`soh+{M{K|nt#y=4px5J5TwQh?tA7ytraO{t7p4K)lDuGn#A z?d`4VmzP7}hfh+CwgH}|`n$mM_BS>+V{3Hus=~F8rBHxsmHOhMAHiGmuVVyWacOzE zBZGs;z`%8?I?|uwOI{pBUwoG8sXPErRsClihTm9PTx_PT!m_Tu>YVMVDO#SV zn=31B2yg58N!Emywc71NE9>ii3W053ebrgz_QV9CZL@gsVrK;&|7_LOo&YZa>ud0W zRo7jrZ)|k!@o}VBysE0PES#irPejk2JLmmF^)0W4MG-0Y{3;fa5nyBv=H3YJA9bAD zPS$DzMjWSaF|_|}h!_TDwTe-xu+eBl>#p1RCcN|K@MmBZSW(q_FMt{_0*qRTUja|K z6Fjw4ELtO_l5L8ho<|%6JI@A@G!(I{^gc&yn^vczYmG*DE{^wo7CwIhPG#Y1zC;|hS3z=;o1_Hp_F>RoussVlUUkb7oV@J=PasU7T07*qoM6N<$ Eg8I^+NdN!< literal 0 HcmV?d00001 diff --git a/qt/dg.qrc b/qt/dg.qrc index 760f2a85..7b2846bf 100644 --- a/qt/dg.qrc +++ b/qt/dg.qrc @@ -10,5 +10,6 @@ ../images/old_zoom_out.png ../images/old_zoom_original.png ../images/old_zoom_best_fit.png + ../images/dialog-error.png diff --git a/qt/exclude_list_dialog.py b/qt/exclude_list_dialog.py index 3c8a1872..96389568 100644 --- a/qt/exclude_list_dialog.py +++ b/qt/exclude_list_dialog.py @@ -9,6 +9,7 @@ from PyQt5.QtWidgets import ( ) from .exclude_list_table import ExcludeListTable, ExcludeView +from core.exclude import AlreadyThereException from hscommon.trans import trget tr = trget("ui") @@ -17,16 +18,18 @@ class ExcludeListDialog(QDialog): def __init__(self, app, parent, model, **kwargs): flags = Qt.CustomizeWindowHint | Qt.WindowTitleHint | Qt.WindowSystemMenuHint super().__init__(parent, flags, **kwargs) + self.app = app self.specific_actions = frozenset() self._setupUI() self.model = model # ExcludeListDialogCore self.model.view = self - self.table = ExcludeListTable(app, view=self.tableView) + self.table = ExcludeListTable(app, view=self.tableView) # Qt ExcludeListTable - self.buttonAdd.clicked.connect(self.addItem) - self.buttonRemove.clicked.connect(self.removeItem) + self.buttonAdd.clicked.connect(self.addStringFromLineEdit) + self.buttonRemove.clicked.connect(self.removeSelected) self.buttonRestore.clicked.connect(self.restoreDefaults) self.buttonClose.clicked.connect(self.accept) + self.buttonHelp.clicked.connect(self.display_help_message) def _setupUI(self): layout = QVBoxLayout(self) @@ -35,6 +38,7 @@ class ExcludeListDialog(QDialog): self.buttonRemove = QPushButton(tr("Remove Selected")) self.buttonRestore = QPushButton(tr("Restore defaults")) self.buttonClose = QPushButton(tr("Close")) + self.buttonHelp = QPushButton(tr("Help")) self.linedit = QLineEdit() self.tableView = ExcludeView() triggers = ( @@ -43,25 +47,26 @@ class ExcludeListDialog(QDialog): | QAbstractItemView.SelectedClicked ) self.tableView.setEditTriggers(triggers) - self.tableView.horizontalHeader().setVisible(True) self.tableView.setSelectionMode(QTableView.ExtendedSelection) self.tableView.setSelectionBehavior(QTableView.SelectRows) - # vheader = self.tableView.verticalHeader() - # vheader.setSectionsMovable(True) - # vheader.setVisible(True) - # vheader.setDefaultSectionSize(50) + self.tableView.setShowGrid(False) + vheader = self.tableView.verticalHeader() + vheader.setSectionsMovable(True) + vheader.setVisible(False) hheader = self.tableView.horizontalHeader() hheader.setSectionsMovable(False) hheader.setSectionResizeMode(QHeaderView.Fixed) hheader.setStretchLastSection(True) hheader.setHighlightSections(False) + hheader.setVisible(True) gridlayout.addWidget(self.linedit, 0, 0) gridlayout.addWidget(self.buttonAdd, 0, 1, Qt.AlignLeft) gridlayout.addWidget(self.buttonRemove, 1, 1, Qt.AlignLeft) gridlayout.addWidget(self.buttonRestore, 2, 1, Qt.AlignLeft) - gridlayout.addWidget(self.tableView, 1, 0, 4, 1) - gridlayout.addItem(QSpacerItem(0, 0, QSizePolicy.Minimum, QSizePolicy.Expanding), 3, 1) - gridlayout.addWidget(self.buttonClose, 4, 1) + gridlayout.addWidget(self.buttonHelp, 3, 1, Qt.AlignLeft) + gridlayout.addWidget(self.tableView, 1, 0, 5, 1) + gridlayout.addItem(QSpacerItem(0, 0, QSizePolicy.Minimum, QSizePolicy.Expanding), 4, 1) + gridlayout.addWidget(self.buttonClose, 5, 1) layout.addLayout(gridlayout) # --- model --> view @@ -69,15 +74,28 @@ class ExcludeListDialog(QDialog): super().show() @pyqtSlot() - def addItem(self): + def addStringFromLineEdit(self): text = self.linedit.text() if not text: return - self.model.add(text) + try: + self.model.add(text) + except AlreadyThereException: + self.app.show_message("Expression already in the list.") + return + except Exception as e: + self.app.show_message(f"Expression is invalid: {e}") + return self.linedit.clear() - def removeItem(self): + def removeSelected(self): self.model.remove_selected() def restoreDefaults(self): self.model.restore_defaults() + + def display_help_message(self): + self.app.show_message("""\ +These python regular expressions will filter out files and directory paths \ +specified here.\nDuring directory selection, paths filtered here will be added as \ +"Skipped" by default, but regular files will be ignored altogether during scans.""") diff --git a/qt/exclude_list_table.py b/qt/exclude_list_table.py index d6f57003..5f729dfa 100644 --- a/qt/exclude_list_table.py +++ b/qt/exclude_list_table.py @@ -2,8 +2,8 @@ # which should be included with this package. The terms are also available at # http://www.gnu.org/licenses/gpl-3.0.html -from PyQt5.QtCore import Qt, QModelIndex, pyqtSignal -from PyQt5.QtGui import QBrush, QFont, QFontMetrics, QColor +from PyQt5.QtCore import Qt, QModelIndex +from PyQt5.QtGui import QFont, QFontMetrics, QIcon from PyQt5.QtWidgets import QTableView from qtlib.column import Column @@ -20,7 +20,7 @@ class ExcludeListTable(Table): def __init__(self, app, view, **kwargs): model = app.model.exclude_list_dialog.exclude_list_table # pointer to GUITable super().__init__(model, view, **kwargs) - view.horizontalHeader().setSortIndicator(1, Qt.AscendingOrder) + # view.horizontalHeader().setSortIndicator(1, Qt.AscendingOrder) font = view.font() font.setPointSize(app.prefs.tableFontSize) view.setFont(font) @@ -32,6 +32,10 @@ class ExcludeListTable(Table): if column.name == "marked": if role == Qt.CheckStateRole and row.markable: return Qt.Checked if row.marked else Qt.Unchecked + if role == Qt.ToolTipRole and not row.markable: + return "Compilation error: " + row.get_cell_value("error") + if role == Qt.DecorationRole and not row.markable: + return QIcon.fromTheme("dialog-error", QIcon(":/error")) return None if role == Qt.DisplayRole: return row.data[column.name] @@ -43,12 +47,12 @@ class ExcludeListTable(Table): return None def _getFlags(self, row, column): - flags = Qt.ItemIsEnabled | Qt.ItemIsSelectable + flags = Qt.ItemIsEnabled if column.name == "marked": if row.markable: flags |= Qt.ItemIsUserCheckable elif column.name == "regex": - flags |= Qt.ItemIsEditable + flags |= Qt.ItemIsEditable | Qt.ItemIsSelectable | Qt.ItemIsDragEnabled | Qt.ItemIsDropEnabled return flags def _setData(self, row, column, value, role): From 9f223f3964fca3d0471d2994b1636f5b0b3f1433 Mon Sep 17 00:00:00 2001 From: glubsy Date: Thu, 20 Aug 2020 02:46:06 +0200 Subject: [PATCH 04/21] Concatenate regexes prio to compilation * Concatenating regexes into one Pattern might yield better performance under (un)certain conditions. * Filenames are tested against regexes with no os.sep in them. This may or may not be what we want to do. And alternative would be to test against the whole (absolute) path of each file, which would filter more agressively. --- core/app.py | 2 +- core/directories.py | 70 ++++++++++++++++------------- core/exclude.py | 94 +++++++++++++++++++++++++-------------- qt/exclude_list_dialog.py | 3 ++ 4 files changed, 102 insertions(+), 67 deletions(-) diff --git a/core/app.py b/core/app.py index 46cff0a8..ee31a114 100644 --- a/core/app.py +++ b/core/app.py @@ -140,7 +140,7 @@ class DupeGuru(Broadcaster): self.app_mode = AppMode.Standard self.discarded_file_count = 0 self.exclude_list = ExcludeList() - self.directories = directories.Directories() + self.directories = directories.Directories(self.exclude_list) self.results = results.Results(self) self.ignore_list = IgnoreList() # In addition to "app-level" options, this dictionary also holds options that will be diff --git a/core/directories.py b/core/directories.py index 5f465818..781ced90 100644 --- a/core/directories.py +++ b/core/directories.py @@ -13,7 +13,6 @@ from hscommon.path import Path from hscommon.util import FileOrPath from . import fs -from .exclude import ExcludeList __all__ = [ "Directories", @@ -53,17 +52,15 @@ class Directories: Then, when the user starts the scan, :meth:`get_files` is called to retrieve all files (wrapped in :mod:`core.fs`) that have to be scanned according to the chosen folders/states. """ - # FIXME: if there is zero item in these sets, the for each loops will yield NOTHING - deny_list_str = set() - deny_list_re = set() - deny_list_re_files = set() # ---Override - def __init__(self, excluded=ExcludeList()): + def __init__(self, exclude_list=None): self._dirs = [] # {path: state} self.states = {} - self._excluded = excluded + self._exclude_list = exclude_list + if exclude_list is not None: + exclude_list._combined_regex = False # TODO make a setter def __contains__(self, path): for p in self._dirs: @@ -81,49 +78,58 @@ class Directories: return len(self._dirs) # ---Private - def _default_state_for_path(self, path, deny_list_re=deny_list_re): + def _default_state_for_path(self, path): + # New logic with regex filters + if self._exclude_list is not None and len(self._exclude_list) > 0: + # We iterate even if we only have one item here + for denied_path_re in self._exclude_list.compiled_combined: + if denied_path_re.match(str(path)): + return DirectoryState.Excluded + return None + # Old default logic, still used during initialization of DirectoryTree: # Override this in subclasses to specify the state of some special folders. - # if path.name.startswith("."): # hidden - # return DirectoryState.Excluded - for denied_path_re in deny_list_re: - if denied_path_re.match(str(path)): - return DirectoryState.Excluded + if path.name.startswith("."): + return DirectoryState.Excluded - def _get_files(self, from_path, fileclasses, j, deny_list_re=deny_list_re_files): + def _get_files(self, from_path, fileclasses, j): for root, dirs, files in os.walk(str(from_path)): j.check_if_cancelled() - root = Path(root) + rootPath = Path(root) state = self.get_state(root) if state == DirectoryState.Excluded: # Recursively get files from folders with lots of subfolder is expensive. However, there # might be a subfolder in this path that is not excluded. What we want to do is to skim # through self.states and see if we must continue, or we can stop right here to save time - if not any(p[: len(root)] == root for p in self.states): + if not any(p[: len(rootPath)] == rootPath for p in self.states): del dirs[:] try: if state != DirectoryState.Excluded: - found_files = [] - for f in files: - found = False - for expr in deny_list_re: - found = expr.match(f) - if found: - break - if not found: - found_files.append(fs.get_file(root + f, fileclasses=fileclasses)) + # Old logic + if self._exclude_list is None or not len(self._exclude_list): + found_files = [fs.get_file(rootPath + f, fileclasses=fileclasses) for f in files] + else: + found_files = [] + for f in files: + found = False + for expr in self._exclude_list.compiled_files_combined: + found = expr.match(f) + if found: + break + if not found: + found_files.append(fs.get_file(rootPath + f, fileclasses=fileclasses)) found_files = [f for f in found_files if f is not None] # In some cases, directories can be considered as files by dupeGuru, which is # why we have this line below. In fact, there only one case: Bundle files under # OS X... In other situations, this forloop will do nothing. for d in dirs[:]: - f = fs.get_file(root + d, fileclasses=fileclasses) + f = fs.get_file(rootPath + d, fileclasses=fileclasses) if f is not None: found_files.append(f) dirs.remove(d) logging.debug( "Collected %d files in folder %s", len(found_files), - str(root), + str(rootPath), ) for file in found_files: file.is_ref = state == DirectoryState.Reference @@ -131,7 +137,7 @@ class Directories: except (EnvironmentError, fs.InvalidPath): pass - def _get_folders(self, from_folder, j, deny_list_re=deny_list_re): + def _get_folders(self, from_folder, j): j.check_if_cancelled() try: for subfolder in from_folder.subfolders: @@ -177,7 +183,7 @@ class Directories: except EnvironmentError: return [] - def get_files(self, fileclasses=None, j=job.nulljob, deny_list_re=deny_list_re_files): + def get_files(self, fileclasses=None, j=job.nulljob): """Returns a list of all files that are not excluded. Returned files also have their ``is_ref`` attr set if applicable. @@ -185,7 +191,7 @@ class Directories: if fileclasses is None: fileclasses = [fs.File] for path in self._dirs: - for file in self._get_files(path, fileclasses=fileclasses, j=j, deny_list_re=deny_list_re): + for file in self._get_files(path, fileclasses=fileclasses, j=j): yield file def get_folders(self, folderclass=None, j=job.nulljob): @@ -200,7 +206,7 @@ class Directories: for folder in self._get_folders(from_folder, j): yield folder - def get_state(self, path, deny_list_re=deny_list_re): + def get_state(self, path): """Returns the state of ``path``. :rtype: :class:`DirectoryState` @@ -208,7 +214,7 @@ class Directories: # direct match? easy result. if path in self.states: return self.states[path] - state = self._default_state_for_path(path, deny_list_re) or DirectoryState.Normal + state = self._default_state_for_path(path) or DirectoryState.Normal prevlen = 0 # we loop through the states to find the longest matching prefix for p, s in self.states.items(): diff --git a/core/exclude.py b/core/exclude.py index 74c44c2d..29bab540 100644 --- a/core/exclude.py +++ b/core/exclude.py @@ -4,6 +4,8 @@ from .markable import Markable from xml.etree import ElementTree as ET +# TODO: perhaps use regex module for better Unicode support? https://pypi.org/project/regex/ +# or perhaps also https://pypi.org/project/re2/ import re from os import sep import logging @@ -50,14 +52,18 @@ class ExcludeList(Markable): The downside is we have to compare strings every time we look for an item in the list since we use regex strings as keys. [regex:str, compilable:bool, error:Exception, compiled:Pattern]) + If combined_regex is True, the compiled regexes will be combined into one Pattern + instead of returned as separate Patterns. """ # ---Override - def __init__(self): + def __init__(self, combined_regex=False): Markable.__init__(self) + self._combined_regex = combined_regex self._excluded = [] self._count = 0 self._excluded_compiled = set() + self._dirty = True def __debug_test(self): self.test_regexes = [ @@ -81,30 +87,38 @@ class ExcludeList(Markable): yield self.is_marked(regex), regex def __len__(self): - return self._count + """Returns the number of marked regexes.""" + return len([x for marked, x in self if marked]) def is_markable(self, regex): return self._is_markable(regex) def _is_markable(self, regex): """Return the cached result of "compilable" property""" - # FIXME save result of compilation via memoization - # return self._excluded.get(regex)[0] for item in self._excluded: if item[0] == regex: return item[1] - return False # FIXME should not be needed + return False # should not be needed def _did_mark(self, regex): + self._add_compiled(regex) + + def _did_unmark(self, regex): + self._remove_compiled(regex) + + def _add_compiled(self, regex): + if self._combined_regex: + self._dirty = True + return for item in self._excluded: if item[0] == regex: # no need to test if already present since it's a set() self._excluded_compiled.add(item[3]) - def _did_unmark(self, regex): - self._remove_compiled(regex) - def _remove_compiled(self, regex): + if self._combined_regex: + self._dirty = True + return for item in self._excluded_compiled: if regex in item.pattern: self._excluded_compiled.remove(item) @@ -137,13 +151,41 @@ class ExcludeList(Markable): @property def compiled(self): """Should be used by other classes to retrieve the up-to-date list of patterns.""" - return self._excluded_compiled + if not self._combined_regex: + return self._excluded_compiled + else: + return self.compiled_combined @property def compiled_files(self): """Should be used by other classes to retrieve the up-to-date list of patterns for files only.""" - return [compiled_pattern for compiled_pattern in self.compiled if sep not in compiled_pattern.pattern] + if not self._combined_regex: + # Return each compiled element separately + # return [compiled_pattern for compiled_pattern in self.compiled if sep not in compiled_pattern.pattern] + for compiled in self.compiled: + if sep not in compiled.pattern: + yield compiled + else: + return self.compiled_files_combined + + @property + def compiled_combined(self): + if self._dirty: + self._cached_compiled_combined =\ + re.compile('|'.join(x for marked, x in self if marked)) + # Must compute the filtered out version as well + self._cached_compiled_combined_files =\ + re.compile('|'.join(x for marked, x in self + if marked and sep not in x)) + self._dirty = False + # returned as a tuple to get a free iterator and to avoid subclassing + return (self._cached_compiled_combined,) + + @property + def compiled_files_combined(self): + # returned as a tuple to get a free iterator and to avoid subclassing + return (self._cached_compiled_combined_files,) # ---Public def add(self, regex, forced=False): @@ -164,7 +206,7 @@ class ExcludeList(Markable): def _do_add(self, regex, iscompilable, exception, compiled): # We need to insert at the top self._excluded.insert(0, [regex, iscompilable, exception, compiled]) - self._count = len(self._excluded) + # self._count = len(self._excluded) def isExcluded(self, regex): for item in self._excluded: @@ -174,7 +216,6 @@ class ExcludeList(Markable): def clear(self): self._excluded = [] - self._count = 0 def remove(self, regex): for item in self._excluded: @@ -286,9 +327,6 @@ class ExcludeDict(ExcludeList): for regex in ordered_keys(self._excluded): yield self.is_marked(regex), regex - def __len__(self): - return self._count - def is_markable(self, regex): return self._is_markable(regex) @@ -299,17 +337,16 @@ class ExcludeDict(ExcludeList): return exists.get("compilable") return False - def _did_mark(self, regex): - # self._excluded[regex][0] = True # is compilable + def _add_compiled(self, regex): + if self._combined_regex: + self._dirty = True + return try: self._excluded_compiled.add(self._excluded[regex]["compiled"]) except Exception as e: print(f"Exception while adding regex {regex} to compiled set: {e}") return - def _did_unmark(self, regex): - self._remove_compiled(regex) - def is_compilable(self, regex): """Returns the cached "compilable" value""" return self._excluded[regex]["compilable"] @@ -318,24 +355,13 @@ class ExcludeDict(ExcludeList): """Return the compilation error message for regex string""" return self._excluded.get(regex).get("error") - @property - def compiled(self): - """Should be used by other classes to retrieve the up-to-date list of patterns.""" - return self._excluded_compiled - - @property - def compiled_files(self): - """Should be used by other classes to retrieve the up-to-date list of patterns - for files only.""" - return [compiled_pattern for compiled_pattern in self.compiled if sep not in compiled_pattern.pattern] - # ---Public def _do_add(self, regex, iscompilable, exception, compiled): # We always insert at the top, so index should be 0 and other indices should be pushed by one for value in self._excluded.values(): value["index"] += 1 self._excluded[regex] = {"index": 0, "compilable": iscompilable, "error": exception, "compiled": compiled} - self._count = len(self._excluded) + # self._count = len(self._excluded) def isExcluded(self, regex): if regex in self._excluded.keys(): @@ -344,13 +370,13 @@ class ExcludeDict(ExcludeList): def clear(self): self._excluded = {} - self._count = 0 def remove(self, regex): old_value = self._excluded.pop(regex) # Bring down all indices which where above it index = old_value["index"] - if index == len(self._excluded): + if index == len(self._excluded) - 1: # we start at 0... + # Old index was at the end, no need to update other indices self._remove_compiled(regex) return diff --git a/qt/exclude_list_dialog.py b/qt/exclude_list_dialog.py index 96389568..f251d1e2 100644 --- a/qt/exclude_list_dialog.py +++ b/qt/exclude_list_dialog.py @@ -68,10 +68,13 @@ class ExcludeListDialog(QDialog): gridlayout.addItem(QSpacerItem(0, 0, QSizePolicy.Minimum, QSizePolicy.Expanding), 4, 1) gridlayout.addWidget(self.buttonClose, 5, 1) layout.addLayout(gridlayout) + self.linedit.setPlaceholderText("Type a regular expression here...") + self.linedit.setFocus() # --- model --> view def show(self): super().show() + self.linedit.setFocus() @pyqtSlot() def addStringFromLineEdit(self): From 3382bd5e5b5da1424b6e6d9b0eed256b503867fb Mon Sep 17 00:00:00 2001 From: glubsy Date: Thu, 20 Aug 2020 17:12:39 +0200 Subject: [PATCH 05/21] Fix crash when recreating Results window/tab * We need to set the Details Dialog's previous instance to None when recreating a new Results window otherwise Qt crashes since we are probably dereferencing a dangling reference. * Also fixes Results tab not showing up when selecting it from the View menu. --- qt/app.py | 24 +++++++++++++++++------- qt/tabbed_window.py | 9 ++++++--- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/qt/app.py b/qt/app.py index 2c14b6a2..5e091a70 100644 --- a/qt/app.py +++ b/qt/app.py @@ -87,7 +87,6 @@ class DupeGuru(QObject): "IgnoreListDialog", parent=self.main_window, model=self.model.ignore_list_dialog) - self.ignoreListDialog.accepted.connect(self.main_window.onDialogAccepted) self.excludeListDialog = self.main_window.createPage( "ExcludeListDialog", @@ -97,7 +96,8 @@ class DupeGuru(QObject): else: self.ignoreListDialog = IgnoreListDialog( parent=parent_window, model=self.model.ignore_list_dialog) - self.excludeDialog = ExcludeListDialog(parent=parent_window) + self.excludeDialog = ExcludeListDialog( + app=self, parent=parent_window, model=self.model.exclude_list_dialog) self.deletionOptions = DeletionOptions( parent=parent_window, @@ -231,6 +231,10 @@ class DupeGuru(QObject): def showResultsWindow(self): if self.resultWindow is not None: if self.use_tabs: + if self.main_window.indexOfWidget(self.resultWindow) < 0: + self.main_window.addTab( + self.resultWindow, "Results", switch=True) + return self.main_window.showTab(self.resultWindow) else: self.resultWindow.show() @@ -281,18 +285,25 @@ class DupeGuru(QObject): # we have not instantiated and populated it in their internal list yet index = self.main_window.addTab( self.ignoreListDialog, "Ignore List", switch=True) - # if not self.main_window.tabWidget.isTabVisible(index): + elif not self.ignoreListDialog.isVisible() and not self.main_window.isTabVisible(index): + index = self.main_window.addTab( + self.ignoreListDialog, "Ignore List", switch=True) + # self.main_window.showTab(self.ignoreListDialog) self.main_window.setTabVisible(index, True) self.main_window.setCurrentIndex(index) else: self.model.ignore_list_dialog.show() def excludeListTriggered(self): - if self.main_window: + if self.use_tabs: index = self.main_window.indexOfWidget(self.excludeListDialog) if index < 0: index = self.main_window.addTab( self.excludeListDialog, "Exclude List", switch=True) + elif not self.excludeListDialog.isVisible() and not self.main_window.isTabVisible(index): + index = self.main_window.addTab( + self.excludeListDialog, "Exclude List", switch=True) + # self.main_window.showTab(self.excludeListDialog) self.main_window.setTabVisible(index, True) self.main_window.setCurrentIndex(index) else: @@ -362,15 +373,14 @@ class DupeGuru(QObject): # or simply delete it on close which is probably cleaner: self.details_dialog.setAttribute(Qt.WA_DeleteOnClose) self.details_dialog.close() - # self.details_dialog.setParent(None) # seems unnecessary + # if we don't do the following, Qt will crash when we recreate the Results dialog + self.details_dialog.setParent(None) if self.resultWindow is not None: self.resultWindow.close() self.resultWindow.setParent(None) if self.use_tabs: self.resultWindow = self.main_window.createPage( "ResultWindow", parent=self.main_window, app=self) - self.main_window.addTab( - self.resultWindow, "Results", switch=False) else: # We don't use a tab widget, regular floating QMainWindow self.resultWindow = ResultWindow(self.directories_dialog, self) self.directories_dialog._updateActionsState() diff --git a/qt/tabbed_window.py b/qt/tabbed_window.py index 36a92e1a..0f030ac4 100644 --- a/qt/tabbed_window.py +++ b/qt/tabbed_window.py @@ -26,7 +26,7 @@ class TabWindow(QMainWindow): def __init__(self, app, **kwargs): super().__init__(None, **kwargs) self.app = app - self.pages = {} + self.pages = {} # This is currently not used anywhere self.menubar = None self.menuList = set() self.last_index = -1 @@ -142,6 +142,7 @@ class TabWindow(QMainWindow): and not page_type == "IgnoreListDialog" else False) self.app.actionDirectoriesWindow.setEnabled( False if page_type == "DirectoriesDialog" else True) + # FIXME add ExcludeListDialog here too self.previous_widget_actions = active_widget.specific_actions self.last_index = current_index @@ -158,12 +159,14 @@ class TabWindow(QMainWindow): parent = kwargs.get("parent", self) model = kwargs.get("model") page = IgnoreListDialog(parent, model) + page.accepted.connect(self.onDialogAccepted) elif cls == "ExcludeListDialog": app = kwargs.get("app", app) parent = kwargs.get("parent", self) model = kwargs.get("model") page = ExcludeListDialog(app, parent, model) - self.pages[cls] = page + page.accepted.connect(self.onDialogAccepted) + self.pages[cls] = page # Not used, might remove return page def addTab(self, page, title, switch=False): @@ -208,7 +211,7 @@ class TabWindow(QMainWindow): # --- Events def appWillSavePrefs(self): - # Right now this is useless since the first spawn dialog inside the + # Right now this is useless since the first spawned dialog inside the # QTabWidget will assign its geometry after restoring it prefs = self.app.prefs prefs.mainWindowIsMaximized = self.isMaximized() From 26d18945b1c169b00f07d117456ef311c745556c Mon Sep 17 00:00:00 2001 From: glubsy Date: Sun, 23 Aug 2020 16:49:43 +0200 Subject: [PATCH 06/21] Fix tab indices not aligned with stackwidget's * The custom QStackWidget+QTabBar class did not manage the tabs properly because the indices in the stackwidget were not aligned with the ones in the tab bar. * Properly disable exclude list action when it is the currently displayed widget. * Merge action callbacks for triggering ignore list or exclude list to avoid repeating code and remove unused checks for tab visibility. * Remove unused SetTabVisible() function. --- qt/app.py | 39 ++++++++++------------------ qt/tabbed_window.py | 62 ++++++++++++++++++++++----------------------- 2 files changed, 45 insertions(+), 56 deletions(-) diff --git a/qt/app.py b/qt/app.py index 5e091a70..c2032388 100644 --- a/qt/app.py +++ b/qt/app.py @@ -279,35 +279,24 @@ class DupeGuru(QObject): def ignoreListTriggered(self): if self.use_tabs: - # Fetch the index in the TabWidget or the StackWidget (depends on class): - index = self.main_window.indexOfWidget(self.ignoreListDialog) - if index < 0: - # we have not instantiated and populated it in their internal list yet - index = self.main_window.addTab( - self.ignoreListDialog, "Ignore List", switch=True) - elif not self.ignoreListDialog.isVisible() and not self.main_window.isTabVisible(index): - index = self.main_window.addTab( - self.ignoreListDialog, "Ignore List", switch=True) - # self.main_window.showTab(self.ignoreListDialog) - self.main_window.setTabVisible(index, True) - self.main_window.setCurrentIndex(index) - else: + self.showTriggeredTabbedDialog(self.ignoreListDialog, "Ignore List") + else: # floating windows self.model.ignore_list_dialog.show() def excludeListTriggered(self): if self.use_tabs: - index = self.main_window.indexOfWidget(self.excludeListDialog) - if index < 0: - index = self.main_window.addTab( - self.excludeListDialog, "Exclude List", switch=True) - elif not self.excludeListDialog.isVisible() and not self.main_window.isTabVisible(index): - index = self.main_window.addTab( - self.excludeListDialog, "Exclude List", switch=True) - # self.main_window.showTab(self.excludeListDialog) - self.main_window.setTabVisible(index, True) - self.main_window.setCurrentIndex(index) - else: - self.excludeListDialog.show() + self.showTriggeredTabbedDialog(self.excludeListDialog, "Exclude List") + else: # floating windows + self.model.exclude_list_dialog.show() + + def showTriggeredTabbedDialog(self, dialog, desc_string): + """Add tab for dialog, name the tab with desc_string, then show it.""" + index = self.main_window.indexOfWidget(dialog) + # Create the tab if it doesn't exist already + if index < 0: # or (not dialog.isVisible() and not self.main_window.isTabVisible(index)): + index = self.main_window.addTab(dialog, desc_string, switch=True) + # Show the tab for that widget + self.main_window.setCurrentIndex(index) def openDebugLogTriggered(self): debugLogPath = op.join(self.model.appdata, "debug.log") diff --git a/qt/tabbed_window.py b/qt/tabbed_window.py index 0f030ac4..193323ca 100644 --- a/qt/tabbed_window.py +++ b/qt/tabbed_window.py @@ -109,7 +109,7 @@ class TabWindow(QMainWindow): self.menuList.add(self.menuHelp) @pyqtSlot(int) - def updateMenuBar(self, page_index=None): + def updateMenuBar(self, page_index=-1): if page_index < 0: return current_index = self.getCurrentIndex() @@ -142,7 +142,9 @@ class TabWindow(QMainWindow): and not page_type == "IgnoreListDialog" else False) self.app.actionDirectoriesWindow.setEnabled( False if page_type == "DirectoriesDialog" else True) - # FIXME add ExcludeListDialog here too + self.app.actionExcludeList.setEnabled( + True if self.app.excludeListDialog is not None + and not page_type == "ExcludeListDialog" else False) self.previous_widget_actions = active_widget.specific_actions self.last_index = current_index @@ -182,7 +184,6 @@ class TabWindow(QMainWindow): def showTab(self, page): index = self.indexOfWidget(page) - self.setTabVisible(index, True) self.setCurrentIndex(index) def indexOfWidget(self, widget): @@ -191,9 +192,6 @@ class TabWindow(QMainWindow): def setCurrentIndex(self, index): return self.tabWidget.setCurrentIndex(index) - def setTabVisible(self, index, value): - return self.tabWidget.setTabVisible(index, value) - def removeTab(self, index): return self.tabWidget.removeTab(index) @@ -232,14 +230,13 @@ class TabWindow(QMainWindow): # menu or shortcut. But this is useless if we don't have a button # set up to make a close request anyway. This check could be removed. return - current_widget.close() - self.setTabVisible(index, False) + # current_widget.close() # seems unnecessary # self.tabWidget.widget(index).hide() self.removeTab(index) @pyqtSlot() def onDialogAccepted(self): - """Remove tabbed dialog when Accepted/Done.""" + """Remove tabbed dialog when Accepted/Done (close button clicked).""" widget = self.sender() index = self.indexOfWidget(widget) if index > -1: @@ -277,7 +274,7 @@ class TabBarWindow(TabWindow): self.verticalLayout.addLayout(self.horizontalLayout) self.verticalLayout.addWidget(self.stackedWidget) - self.tabBar.currentChanged.connect(self.showWidget) + self.tabBar.currentChanged.connect(self.showTabIndex) self.tabBar.tabCloseRequested.connect(self.onTabCloseRequested) self.stackedWidget.currentChanged.connect(self.updateMenuBar) @@ -287,50 +284,48 @@ class TabBarWindow(TabWindow): self.restoreGeometry() def addTab(self, page, title, switch=True): - stack_index = self.stackedWidget.insertWidget(-1, page) - tab_index = self.tabBar.addTab(title) + stack_index = self.stackedWidget.addWidget(page) + self.tabBar.insertTab(stack_index, title) if isinstance(page, DirectoriesDialog): self.tabBar.setTabButton( - tab_index, QTabBar.RightSide, None) + stack_index, QTabBar.RightSide, None) if switch: # switch to the added tab immediately upon creation - self.setTabIndex(tab_index) - self.stackedWidget.setCurrentWidget(page) + self.setTabIndex(stack_index) return stack_index @pyqtSlot(int) - def showWidget(self, index): - if index >= 0 and index <= self.stackedWidget.count() - 1: + def showTabIndex(self, index): + # The tab bar's indices should be aligned with the stackwidget's + if index >= 0 and index <= self.stackedWidget.count(): self.stackedWidget.setCurrentIndex(index) - # if not self.tabBar.isTabVisible(index): - self.setTabVisible(index, True) def indexOfWidget(self, widget): # Warning: this may return -1 if widget is not a child of stackedwidget return self.stackedWidget.indexOf(widget) def setCurrentIndex(self, tab_index): - # The signal will handle switching the stackwidget's widget self.setTabIndex(tab_index) + # The signal will handle switching the stackwidget's widget # self.stackedWidget.setCurrentWidget(self.stackedWidget.widget(tab_index)) + def setCurrentWidget(self, widget): + """Sets the current Tab on TabBar for this widget.""" + self.tabBar.setCurrentIndex(self.indexOfWidget(widget)) + @pyqtSlot(int) def setTabIndex(self, index): if index is None: return self.tabBar.setCurrentIndex(index) - def setTabVisible(self, index, value): - return self.tabBar.setTabVisible(index, value) - @pyqtSlot(int) def onRemovedWidget(self, index): self.removeTab(index) @pyqtSlot(int) def removeTab(self, index): - # No need to remove the widget here: - # self.stackedWidget.removeWidget(self.stackedWidget.widget(index)) + """Remove the tab, but not the widget (it should already be removed)""" return self.tabBar.removeTab(index) @pyqtSlot(int) @@ -357,13 +352,18 @@ class TabBarWindow(TabWindow): @pyqtSlot(int) def onTabCloseRequested(self, index): - current_widget = self.getWidgetAtIndex(index) - if isinstance(current_widget, DirectoriesDialog): + target_widget = self.getWidgetAtIndex(index) + if isinstance(target_widget, DirectoriesDialog): # On MacOS, the tab has a close button even though we explicitely # set it to None in order to hide it. This should prevent # the "Directories" tab from closing by mistake. return - current_widget.close() - self.stackedWidget.removeWidget(current_widget) - # In this case the signal will take care of the tab itself after removing the widget - # self.removeTab(index) + # target_widget.close() # seems unnecessary + # Removing the widget should trigger tab removal via the signal + self.removeWidget(self.getWidgetAtIndex(index)) + + @pyqtSlot() + def onDialogAccepted(self): + """Remove tabbed dialog when Accepted/Done (close button clicked).""" + widget = self.sender() + self.removeWidget(widget) From 4a1641e39d66afd1c25ef5a621278f90353a3081 Mon Sep 17 00:00:00 2001 From: glubsy Date: Sat, 29 Aug 2020 03:57:00 +0200 Subject: [PATCH 07/21] Add test suite, fix bugs --- core/app.py | 2 +- core/directories.py | 31 ++-- core/exclude.py | 197 ++++++++++++++--------- core/fs.py | 11 +- core/tests/directories_test.py | 236 ++++++++++++++++++++++------ core/tests/exclude_test.py | 277 +++++++++++++++++++++++++++++++++ tox.ini | 2 +- 7 files changed, 613 insertions(+), 143 deletions(-) create mode 100644 core/tests/exclude_test.py diff --git a/core/app.py b/core/app.py index ee31a114..3f4c3266 100644 --- a/core/app.py +++ b/core/app.py @@ -26,7 +26,7 @@ from .pe.photo import get_delta_dimensions from .util import cmp_value, fix_surrogate_encoding from . import directories, results, export, fs, prioritize from .ignore import IgnoreList -from .exclude import ExcludeList as ExcludeList +from .exclude import ExcludeDict as ExcludeList from .scanner import ScanType from .gui.deletion_options import DeletionOptions from .gui.details_panel import DetailsPanel diff --git a/core/directories.py b/core/directories.py index 781ced90..aa6298d4 100644 --- a/core/directories.py +++ b/core/directories.py @@ -80,13 +80,12 @@ class Directories: # ---Private def _default_state_for_path(self, path): # New logic with regex filters - if self._exclude_list is not None and len(self._exclude_list) > 0: + if self._exclude_list is not None and self._exclude_list.mark_count > 0: # We iterate even if we only have one item here - for denied_path_re in self._exclude_list.compiled_combined: - if denied_path_re.match(str(path)): + for denied_path_re in self._exclude_list.compiled: + if denied_path_re.match(str(path.name)): return DirectoryState.Excluded - return None - # Old default logic, still used during initialization of DirectoryTree: + # return # We still use the old logic to force state on hidden dirs # Override this in subclasses to specify the state of some special folders. if path.name.startswith("."): return DirectoryState.Excluded @@ -95,7 +94,7 @@ class Directories: for root, dirs, files in os.walk(str(from_path)): j.check_if_cancelled() rootPath = Path(root) - state = self.get_state(root) + state = self.get_state(rootPath) if state == DirectoryState.Excluded: # Recursively get files from folders with lots of subfolder is expensive. However, there # might be a subfolder in this path that is not excluded. What we want to do is to skim @@ -105,16 +104,22 @@ class Directories: try: if state != DirectoryState.Excluded: # Old logic - if self._exclude_list is None or not len(self._exclude_list): + if self._exclude_list is None or not self._exclude_list.mark_count: found_files = [fs.get_file(rootPath + f, fileclasses=fileclasses) for f in files] else: found_files = [] + # print(f"len of files: {len(files)} {files}") for f in files: found = False - for expr in self._exclude_list.compiled_files_combined: - found = expr.match(f) - if found: + for expr in self._exclude_list.compiled_files: + if expr.match(f): + found = True break + if not found: + for expr in self._exclude_list.compiled_paths: + if expr.match(root + os.sep + f): + found = True + break if not found: found_files.append(fs.get_file(rootPath + f, fileclasses=fileclasses)) found_files = [f for f in found_files if f is not None] @@ -215,8 +220,14 @@ class Directories: if path in self.states: return self.states[path] state = self._default_state_for_path(path) or DirectoryState.Normal + # Save non-default states in cache, necessary for _get_files() + if state != DirectoryState.Normal: + self.states[path] = state + return state + prevlen = 0 # we loop through the states to find the longest matching prefix + # if the parent has a state in cache, return that state for p, s in self.states.items(): if p.is_parent_of(path) and len(p) > prevlen: prevlen = len(p) diff --git a/core/exclude.py b/core/exclude.py index 29bab540..e0e8a901 100644 --- a/core/exclude.py +++ b/core/exclude.py @@ -5,7 +5,8 @@ from .markable import Markable from xml.etree import ElementTree as ET # TODO: perhaps use regex module for better Unicode support? https://pypi.org/project/regex/ -# or perhaps also https://pypi.org/project/re2/ +# also https://pypi.org/project/re2/ +# TODO update the Result list with newly added regexes if possible import re from os import sep import logging @@ -13,8 +14,14 @@ import functools from hscommon.util import FileOrPath import time -default_regexes = [r".*thumbs", r"\.DS.Store", r"\.Trash", r".*Trash-Bin"] -forbidden_regexes = [r".*", r"\/.*", r".*\/.*"] +default_regexes = [r"^thumbs\.db$", # Obsolete after WindowsXP + r"^\.DS_Store$", # MacOS metadata + r"^\.Trash\-.*", # Linux trash directories + r"^\$Recycle\.Bin$", # Windows + r"^\..*" # Hidden files + ] +# These are too agressive +forbidden_regexes = [r".*", r"\/.*", r".*\/.*", r".*\..*"] def timer(func): @@ -59,36 +66,37 @@ class ExcludeList(Markable): # ---Override def __init__(self, combined_regex=False): Markable.__init__(self) - self._combined_regex = combined_regex + self._use_combined = combined_regex self._excluded = [] - self._count = 0 self._excluded_compiled = set() self._dirty = True - def __debug_test(self): - self.test_regexes = [ - r".*Recycle\.Bin$", r"denyme.*", r".*denyme", r".*/test/denyme*", - r".*/test/*denyme", r"denyme", r".*\/\..*", r"^\..*"] - for regex in self.test_regexes: - try: - self.add(regex) - except Exception as e: - print(f"Exception loading test regex {regex}: {e}") - continue - try: - self.mark(regex) - except Exception as e: - print(f"Exception marking test regex {regex}: {e}") - def __iter__(self): """Iterate in order.""" for item in self._excluded: regex = item[0] yield self.is_marked(regex), regex + def __contains__(self, item): + return self.isExcluded(item) + def __len__(self): - """Returns the number of marked regexes.""" - return len([x for marked, x in self if marked]) + """Returns the total number of regexes regardless of mark status.""" + return len(self._excluded) + + def __getitem__(self, key): + for item in self._excluded: + if item[0] == key: + return item + raise KeyError(f"Key {key} is not in exclusion list.") + + def __setitem__(self, key, value): + # TODO if necessary + pass + + def __delitem__(self, key): + # TODO if necessary + pass def is_markable(self, regex): return self._is_markable(regex) @@ -98,7 +106,7 @@ class ExcludeList(Markable): for item in self._excluded: if item[0] == regex: return item[1] - return False # should not be needed + return False # should not be necessary, regex SHOULD be in there def _did_mark(self, regex): self._add_compiled(regex) @@ -107,17 +115,19 @@ class ExcludeList(Markable): self._remove_compiled(regex) def _add_compiled(self, regex): - if self._combined_regex: - self._dirty = True + self._dirty = True + if self._use_combined: return for item in self._excluded: + # FIXME probably faster to just rebuild the set from the compiled instead of comparing strings if item[0] == regex: # no need to test if already present since it's a set() self._excluded_compiled.add(item[3]) + break def _remove_compiled(self, regex): - if self._combined_regex: - self._dirty = True + self._dirty = True + if self._use_combined: return for item in self._excluded_compiled: if regex in item.pattern: @@ -148,44 +158,65 @@ class ExcludeList(Markable): if item[0] == regex: return item[2] + def build_compiled_caches(self, combined=False): + if not combined: + self._cached_compiled_files =\ + [x for x in self._excluded_compiled if sep not in x.pattern] + self._cached_compiled_paths =\ + [x for x in self._excluded_compiled if sep in x.pattern] + return + # HACK returned as a tuple to get a free iterator to keep interface the same + # regardless of whether the client asked for combined or not + marked_count = [x for marked, x in self if marked] + # If there is no item, the compiled Pattern will be '' and match everything! + if not marked_count: + self._cached_compiled_combined_all = [] + self._cached_compiled_combined_files = [] + self._cached_compiled_combined_paths = [] + else: + self._cached_compiled_combined_all =\ + (re.compile('|'.join(marked_count)),) + files_marked = [x for x in marked_count if sep not in x] + if not files_marked: + self._cached_compiled_combined_files = tuple() + else: + self._cached_compiled_combined_files =\ + (re.compile('|'.join(files_marked)),) + paths_marked = [x for x in marked_count if sep in x] + if not paths_marked: + self._cached_compiled_combined_paths = tuple() + else: + self._cached_compiled_combined_paths =\ + (re.compile('|'.join(paths_marked)),) + @property def compiled(self): """Should be used by other classes to retrieve the up-to-date list of patterns.""" - if not self._combined_regex: - return self._excluded_compiled - else: - return self.compiled_combined + if self._use_combined: + if self._dirty: + self.build_compiled_caches(True) + self._dirty = False + return self._cached_compiled_combined_all + return self._excluded_compiled @property def compiled_files(self): - """Should be used by other classes to retrieve the up-to-date list of patterns - for files only.""" - if not self._combined_regex: - # Return each compiled element separately - # return [compiled_pattern for compiled_pattern in self.compiled if sep not in compiled_pattern.pattern] - for compiled in self.compiled: - if sep not in compiled.pattern: - yield compiled - else: - return self.compiled_files_combined - - @property - def compiled_combined(self): + """When matching against filenames only, we probably won't be seeing any + directory separator, so we filter out regexes with os.sep in them. + The interface should be expected to be a generator, even if it returns only + one item (one Pattern in the combined case).""" if self._dirty: - self._cached_compiled_combined =\ - re.compile('|'.join(x for marked, x in self if marked)) - # Must compute the filtered out version as well - self._cached_compiled_combined_files =\ - re.compile('|'.join(x for marked, x in self - if marked and sep not in x)) + self.build_compiled_caches(True if self._use_combined else False) self._dirty = False - # returned as a tuple to get a free iterator and to avoid subclassing - return (self._cached_compiled_combined,) + return self._cached_compiled_combined_files if self._use_combined else self._cached_compiled_files @property - def compiled_files_combined(self): - # returned as a tuple to get a free iterator and to avoid subclassing - return (self._cached_compiled_combined_files,) + def compiled_paths(self): + """Returns patterns with only separators in them, for more precise filtering.""" + if self._dirty: + self.build_compiled_caches(True if self._use_combined else False) + self._dirty = False + return self._cached_compiled_combined_paths if self._use_combined else self._cached_compiled_paths # ---Public def add(self, regex, forced=False): @@ -206,7 +237,11 @@ class ExcludeList(Markable): def _do_add(self, regex, iscompilable, exception, compiled): # We need to insert at the top self._excluded.insert(0, [regex, iscompilable, exception, compiled]) - # self._count = len(self._excluded) + + @property + def marked_count(self): + """Returns the number of marked regexes only.""" + return len([x for marked, x in self if marked]) def isExcluded(self, regex): for item in self._excluded: @@ -215,6 +250,7 @@ class ExcludeList(Markable): return False def clear(self): + """Not used and needs refactoring""" self._excluded = [] def remove(self, regex): @@ -224,25 +260,24 @@ class ExcludeList(Markable): self._remove_compiled(regex) def rename(self, regex, newregex): - # if regex not in self._excluded or regex == newregex: - # return + # if regex not in self._excluded: return if regex == newregex: return found = False - for item in self._excluded: - if regex == item[0]: - found = True - break - if not found: - return - - was_marked = self.is_marked(regex) - is_compilable, exception, compiled = self.compile_re(newregex) + was_marked = False + is_compilable = False for item in self._excluded: if item[0] == regex: + found = True + was_marked = self.is_marked(regex) + is_compilable, exception, compiled = self.compile_re(newregex) # We overwrite the found entry self._excluded[self._excluded.index(item)] =\ [newregex, is_compilable, exception, compiled] + self._remove_compiled(regex) + break + if not found: + return if is_compilable and was_marked: # Not marked by default when added, add it back self.mark(newregex) @@ -271,7 +306,6 @@ class ExcludeList(Markable): except Exception as e: logging.warning(f"Error while loading {infile}: {e}") self.restore_defaults() - self.__debug_test() return e marked = set() @@ -291,7 +325,6 @@ class ExcludeList(Markable): for item in marked: self.mark(item) - self.__debug_test() def save_to_xml(self, outfile): """Create a XML file that can be used by load_from_xml. @@ -314,13 +347,14 @@ class ExcludeDict(ExcludeList): to keep the index of each string-key as its sub-element and keep it updated whenever insert/remove is done.""" - def __init__(self): + def __init__(self, combined_regex=False): Markable.__init__(self) + self._use_combined = combined_regex # { "regex": { "index": int, "compilable": bool, "error": str, "compiled": Pattern or None}} # Note: "compilable" key should only be updated on add / rename self._excluded = {} - self._count = 0 self._excluded_compiled = set() + self._dirty = True def __iter__(self): """Iterate in order.""" @@ -338,8 +372,8 @@ class ExcludeDict(ExcludeList): return False def _add_compiled(self, regex): - if self._combined_regex: - self._dirty = True + self._dirty = True + if self._use_combined: return try: self._excluded_compiled.add(self._excluded[regex]["compiled"]) @@ -360,8 +394,12 @@ class ExcludeDict(ExcludeList): # We always insert at the top, so index should be 0 and other indices should be pushed by one for value in self._excluded.values(): value["index"] += 1 - self._excluded[regex] = {"index": 0, "compilable": iscompilable, "error": exception, "compiled": compiled} - # self._count = len(self._excluded) + self._excluded[regex] = { + "index": 0, + "compilable": iscompilable, + "error": exception, + "compiled": compiled + } def isExcluded(self, regex): if regex in self._excluded.keys(): @@ -369,6 +407,7 @@ class ExcludeDict(ExcludeList): return False def clear(self): + """Not used, need refactoring""" self._excluded = {} def remove(self, regex): @@ -391,7 +430,13 @@ class ExcludeDict(ExcludeList): was_marked = self.is_marked(regex) previous = self._excluded.pop(regex) iscompilable, error, compiled = self.compile_re(newregex) - self._excluded[newregex] = {"index": previous["index"], "compilable": iscompilable, "error": error, "compiled": compiled} + self._excluded[newregex] = { + "index": previous["index"], + "compilable": iscompilable, + "error": error, + "compiled": compiled + } + self._remove_compiled(regex) if was_marked and iscompilable: self.mark(newregex) diff --git a/core/fs.py b/core/fs.py index 90f400d9..f18186ae 100644 --- a/core/fs.py +++ b/core/fs.py @@ -245,7 +245,7 @@ class Folder(File): return not path.islink() and path.isdir() -def get_file(path, fileclasses=[File], deny_list_re=set()): +def get_file(path, fileclasses=[File]): """Wraps ``path`` around its appropriate :class:`File` class. Whether a class is "appropriate" is decided by :meth:`File.can_handle` @@ -255,15 +255,10 @@ def get_file(path, fileclasses=[File], deny_list_re=set()): """ for fileclass in fileclasses: if fileclass.can_handle(path): - # print(f"returning {path}") - # for expr in deny_list_re: - # if expr.match(str(path.name)): - # print(f"FOUND {repr(expr)} in {str(path.name)}") - # return return fileclass(path) -def get_files(path, fileclasses=[File], deny_list_re=set()): +def get_files(path, fileclasses=[File]): """Returns a list of :class:`File` for each file contained in ``path``. :param Path path: path to scan @@ -273,7 +268,7 @@ def get_files(path, fileclasses=[File], deny_list_re=set()): try: result = [] for path in path.listdir(): - file = get_file(path, fileclasses=fileclasses, deny_list_re=deny_list_re) + file = get_file(path, fileclasses=fileclasses) if file is not None: result.append(file) return result diff --git a/core/tests/directories_test.py b/core/tests/directories_test.py index 7273b566..1ce84fb4 100644 --- a/core/tests/directories_test.py +++ b/core/tests/directories_test.py @@ -20,6 +20,7 @@ from ..directories import ( AlreadyThereError, InvalidPathError, ) +from ..exclude import ExcludeList, ExcludeDict def create_fake_fs(rootpath): @@ -323,7 +324,7 @@ def test_get_state_returns_excluded_by_default_for_hidden_directories(tmpdir): def test_default_path_state_override(tmpdir): # It's possible for a subclass to override the default state of a path class MyDirectories(Directories): - def _default_state_for_path(self, path, denylist): + def _default_state_for_path(self, path): if "foobar" in path: return DirectoryState.Excluded @@ -343,52 +344,193 @@ def test_default_path_state_override(tmpdir): eq_(len(list(d.get_files())), 2) -def test_exclude_list_regular_expressions(tmpdir): - d = Directories() - d.deny_list_str.clear() - d.deny_list_re.clear() - d.deny_list_re_files.clear() - # This should only exlude the directory, but not the contained files if - # its status is set to normal after loading it in the directory tree - d.deny_list_str.add(r".*Recycle\.Bin$") - d.deny_list_str.add(r"denyme.*") - # d.deny_list_str.add(r".*denymetoo") - # d.deny_list_str.add(r"denyme") - d.deny_list_str.add(r".*\/\..*") - d.deny_list_str.add(r"^\..*") - d.compile_re() - p1 = Path(str(tmpdir)) - # Should be ignored on Windows only (by default) - p1["Recycle.Bin"].mkdir() - p1["Recycle.Bin/somerecycledfile"].open("w").close() +class TestExcludeList(): + def setup_method(self, method): + self.d = Directories(exclude_list=ExcludeList(combined_regex=False)) - p1["denyme_blah.txt"].open("w").close() - p1["blah_denymetoo"].open("w").close() - p1["blah_denyme"].open("w").close() + def get_files_and_expect_num_result(self, num_result): + """Calls get_files(), get the filenames only, print for debugging. + num_result is how many files are expected as a result.""" + print(f"EXCLUDED REGEX: paths {self.d._exclude_list.compiled_paths} \ +files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled}") + files = list(self.d.get_files()) + files = [file.name for file in files] + print(f"FINAL FILES {files}") + eq_(len(files), num_result) + return files - p1[".hidden_file"].open("w").close() - p1[".hidden_dir"].mkdir() - p1[".hidden_dir/somenormalfile1"].open("w").close() - p1[".hidden_dir/somenormalfile2_denyme"].open("w").close() + def test_exclude_recycle_bin_by_default(self, tmpdir): + regex = r"^.*Recycle\.Bin$" + self.d._exclude_list.add(regex) + self.d._exclude_list.mark(regex) + p1 = Path(str(tmpdir)) + p1["$Recycle.Bin"].mkdir() + p1["$Recycle.Bin"]["subdir"].mkdir() + self.d.add_path(p1) + eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.Excluded) + # By default, subdirs should be excluded too, but this can be overriden separately + eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Excluded) + self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.Normal) + eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal) - p1["foobar"].mkdir() - p1["foobar/somefile"].open("w").close() - d.add_path(p1) - eq_(d.get_state(p1["Recycle.Bin"]), DirectoryState.Excluded) - eq_(d.get_state(p1["foobar"]), DirectoryState.Normal) - files = list(d.get_files()) - files = [file.name for file in files] - print(f"first files: {files}") - assert "somerecycledfile" not in files - assert "denyme_blah.txt" not in files - assert ".hidden_file" not in files - assert "somefile1" not in files - assert "somefile2_denyme" not in files - # Overriding the default state from the Directory Tree - d.set_state(p1["Recycle.Bin"], DirectoryState.Normal) - d.set_state(p1[".hidden_dir"], DirectoryState.Normal) - files = list(d.get_files()) - files = [file.name for file in files] - print(f"second files: {files}") - assert "somerecycledfile" in files - assert "somenormalfile1" in files + def test_exclude_refined(self, tmpdir): + regex1 = r"^\$Recycle\.Bin$" + self.d._exclude_list.add(regex1) + self.d._exclude_list.mark(regex1) + p1 = Path(str(tmpdir)) + p1["$Recycle.Bin"].mkdir() + p1["$Recycle.Bin"]["somefile.png"].open("w").close() + p1["$Recycle.Bin"]["some_unwanted_file.jpg"].open("w").close() + p1["$Recycle.Bin"]["subdir"].mkdir() + p1["$Recycle.Bin"]["subdir"]["somesubdirfile.png"].open("w").close() + p1["$Recycle.Bin"]["subdir"]["unwanted_subdirfile.gif"].open("w").close() + p1["$Recycle.Bin"]["subdar"].mkdir() + p1["$Recycle.Bin"]["subdar"]["somesubdarfile.jpeg"].open("w").close() + p1["$Recycle.Bin"]["subdar"]["unwanted_subdarfile.png"].open("w").close() + self.d.add_path(p1["$Recycle.Bin"]) + + # Filter should set the default state to Excluded + eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.Excluded) + # The subdir should inherit its parent state + eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Excluded) + eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.Excluded) + # Override a child path's state + self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.Normal) + eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal) + # Parent should keep its default state, and the other child too + eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.Excluded) + eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.Excluded) + # print(f"get_folders(): {[x for x in self.d.get_folders()]}") + + # only the 2 files directly under the Normal directory + files = self.get_files_and_expect_num_result(2) + assert "somefile.png" not in files + assert "some_unwanted_file.jpg" not in files + assert "somesubdarfile.jpeg" not in files + assert "unwanted_subdarfile.png" not in files + assert "somesubdirfile.png" in files + assert "unwanted_subdirfile.gif" in files + # Overriding the parent should enable all children + self.d.set_state(p1["$Recycle.Bin"], DirectoryState.Normal) + eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.Normal) + # all files there + files = self.get_files_and_expect_num_result(6) + assert "somefile.png" in files + assert "some_unwanted_file.jpg" in files + + # This should still filter out files under directory, despite the Normal state + regex2 = r".*unwanted.*" + self.d._exclude_list.add(regex2) + self.d._exclude_list.mark(regex2) + files = self.get_files_and_expect_num_result(3) + assert "somefile.png" in files + assert "some_unwanted_file.jpg" not in files + assert "unwanted_subdirfile.gif" not in files + assert "unwanted_subdarfile.png" not in files + + regex3 = r".*Recycle\.Bin\/.*unwanted.*subdirfile.*" + self.d._exclude_list.rename(regex2, regex3) + assert self.d._exclude_list.error(regex3) is None + # print(f"get_folders(): {[x for x in self.d.get_folders()]}") + # Directory shouldn't change its state here, unless explicitely done by user + eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal) + files = self.get_files_and_expect_num_result(5) + assert "unwanted_subdirfile.gif" not in files + assert "unwanted_subdarfile.png" in files + + # using end of line character should only filter the directory, or file ending with subdir + regex4 = r".*subdir$" + self.d._exclude_list.rename(regex3, regex4) + assert self.d._exclude_list.error(regex4) is None + p1["$Recycle.Bin"]["subdar"]["file_ending_with_subdir"].open("w").close() + eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Excluded) + files = self.get_files_and_expect_num_result(4) + assert "file_ending_with_subdir" not in files + assert "somesubdarfile.jpeg" in files + assert "somesubdirfile.png" not in files + assert "unwanted_subdirfile.gif" not in files + self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.Normal) + eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal) + # print(f"get_folders(): {[x for x in self.d.get_folders()]}") + files = self.get_files_and_expect_num_result(6) + assert "file_ending_with_subdir" not in files + assert "somesubdirfile.png" in files + assert "unwanted_subdirfile.gif" in files + + regex5 = r".*subdir.*" + self.d._exclude_list.rename(regex4, regex5) + # Files containing substring should be filtered + eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal) + # The path should not match, only the filename, the "subdir" in the directory name shouldn't matter + p1["$Recycle.Bin"]["subdir"]["file_which_shouldnt_match"].open("w").close() + files = self.get_files_and_expect_num_result(5) + assert "somesubdirfile.png" not in files + assert "unwanted_subdirfile.gif" not in files + assert "file_ending_with_subdir" not in files + assert "file_which_shouldnt_match" in files + + def test_japanese_unicode(self, tmpdir): + p1 = Path(str(tmpdir)) + p1["$Recycle.Bin"].mkdir() + p1["$Recycle.Bin"]["somerecycledfile.png"].open("w").close() + p1["$Recycle.Bin"]["some_unwanted_file.jpg"].open("w").close() + p1["$Recycle.Bin"]["subdir"].mkdir() + p1["$Recycle.Bin"]["subdir"]["過去白濁物語~]_カラー.jpg"].open("w").close() + p1["$Recycle.Bin"]["思叫物語"].mkdir() + p1["$Recycle.Bin"]["思叫物語"]["なししろ会う前"].open("w").close() + p1["$Recycle.Bin"]["思叫物語"]["堂~ロ"].open("w").close() + self.d.add_path(p1["$Recycle.Bin"]) + regex3 = r".*物語.*" + self.d._exclude_list.add(regex3) + self.d._exclude_list.mark(regex3) + # print(f"get_folders(): {[x for x in self.d.get_folders()]}") + eq_(self.d.get_state(p1["$Recycle.Bin"]["思叫物語"]), DirectoryState.Excluded) + files = self.get_files_and_expect_num_result(2) + assert "過去白濁物語~]_カラー.jpg" not in files + assert "なししろ会う前" not in files + assert "堂~ロ" not in files + # using end of line character should only filter that directory, not affecting its files + regex4 = r".*物語$" + self.d._exclude_list.rename(regex3, regex4) + assert self.d._exclude_list.error(regex4) is None + self.d.set_state(p1["$Recycle.Bin"]["思叫物語"], DirectoryState.Normal) + files = self.get_files_and_expect_num_result(5) + assert "過去白濁物語~]_カラー.jpg" in files + assert "なししろ会う前" in files + assert "堂~ロ" in files + + def test_get_state_returns_excluded_for_hidden_directories_and_files(self, tmpdir): + # This regex only work for files, not paths + regex = r"^\..*$" + self.d._exclude_list.add(regex) + self.d._exclude_list.mark(regex) + p1 = Path(str(tmpdir)) + p1["foobar"].mkdir() + p1["foobar"][".hidden_file.txt"].open("w").close() + p1["foobar"][".hidden_dir"].mkdir() + p1["foobar"][".hidden_dir"]["foobar.jpg"].open("w").close() + p1["foobar"][".hidden_dir"][".hidden_subfile.png"].open("w").close() + self.d.add_path(p1["foobar"]) + # It should not inherit its parent's state originally + eq_(self.d.get_state(p1["foobar"][".hidden_dir"]), DirectoryState.Excluded) + self.d.set_state(p1["foobar"][".hidden_dir"], DirectoryState.Normal) + # The files should still be filtered + files = self.get_files_and_expect_num_result(1) + assert ".hidden_file.txt" not in files + assert ".hidden_subfile.png" not in files + assert "foobar.jpg" in files + + +class TestExcludeDict(TestExcludeList): + def setup_method(self, method): + self.d = Directories(exclude_list=ExcludeDict(combined_regex=False)) + + +class TestExcludeListCombined(TestExcludeList): + def setup_method(self, method): + self.d = Directories(exclude_list=ExcludeList(combined_regex=True)) + + +class TestExcludeDictCombined(TestExcludeList): + def setup_method(self, method): + self.d = Directories(exclude_list=ExcludeDict(combined_regex=True)) diff --git a/core/tests/exclude_test.py b/core/tests/exclude_test.py new file mode 100644 index 00000000..0dc4a033 --- /dev/null +++ b/core/tests/exclude_test.py @@ -0,0 +1,277 @@ +# Copyright 2016 Hardcoded Software (http://www.hardcoded.net) +# +# This software is licensed under the "GPLv3" License as described in the "LICENSE" file, +# which should be included with this package. The terms are also available at +# http://www.gnu.org/licenses/gpl-3.0.html + +import io +# import os.path as op + +from xml.etree import ElementTree as ET + +# from pytest import raises +from hscommon.testutil import eq_ + +from .base import DupeGuru +from ..exclude import ExcludeList, ExcludeDict, default_regexes, AlreadyThereException + +from re import error + + +# Two slightly different implementations here, one around a list of lists, +# and another around a dictionary. + + +class TestCaseListXMLLoading: + def setup_method(self, method): + self.exclude_list = ExcludeList() + + def test_load_non_existant_file(self): + # Loads the pre-defined regexes + self.exclude_list.load_from_xml("non_existant.xml") + eq_(len(default_regexes), len(self.exclude_list)) + # they should also be marked by default + eq_(len(default_regexes), self.exclude_list.marked_count) + + def test_save_to_xml(self): + f = io.BytesIO() + self.exclude_list.save_to_xml(f) + f.seek(0) + doc = ET.parse(f) + root = doc.getroot() + eq_("exclude_list", root.tag) + + def test_save_and_load(self, tmpdir): + e1 = ExcludeList() + e2 = ExcludeList() + eq_(len(e1), 0) + e1.add(r"one") + e1.mark(r"one") + e1.add(r"two") + tmpxml = str(tmpdir.join("exclude_testunit.xml")) + e1.save_to_xml(tmpxml) + e2.load_from_xml(tmpxml) + # We should have the default regexes + assert r"one" in e2 + assert r"two" in e2 + eq_(len(e2), 2) + eq_(e2.marked_count, 1) + + def test_load_xml_with_garbage_and_missing_elements(self): + root = ET.Element("foobar") # The root element shouldn't matter + exclude_node = ET.SubElement(root, "bogus") + exclude_node.set("regex", "None") + exclude_node.set("marked", "y") + + exclude_node = ET.SubElement(root, "exclude") + exclude_node.set("regex", "one") + # marked field invalid + exclude_node.set("markedddd", "y") + + exclude_node = ET.SubElement(root, "exclude") + exclude_node.set("regex", "two") + # missing marked field + + exclude_node = ET.SubElement(root, "exclude") + exclude_node.set("regex", "three") + exclude_node.set("markedddd", "pazjbjepo") + + f = io.BytesIO() + tree = ET.ElementTree(root) + tree.write(f, encoding="utf-8") + f.seek(0) + self.exclude_list.load_from_xml(f) + print(f"{[x for x in self.exclude_list]}") + # only the two "exclude" nodes should be added, + eq_(3, len(self.exclude_list)) + # None should be marked + eq_(0, self.exclude_list.marked_count) + + +class TestCaseDictXMLLoading(TestCaseListXMLLoading): + def setup_method(self, method): + self.exclude_list = ExcludeDict() + + +class TestCaseListEmpty: + def setup_method(self, method): + self.app = DupeGuru() + self.app.exclude_list = ExcludeList() + self.exclude_list = self.app.exclude_list + + def test_add_mark_and_remove_regex(self): + regex1 = r"one" + regex2 = r"two" + self.exclude_list.add(regex1) + assert(regex1 in self.exclude_list) + self.exclude_list.add(regex2) + self.exclude_list.mark(regex1) + self.exclude_list.mark(regex2) + eq_(len(self.exclude_list), 2) + eq_(len(self.exclude_list.compiled), 2) + compiled_files = [x for x in self.exclude_list.compiled_files] + eq_(len(compiled_files), 2) + self.exclude_list.remove(regex2) + assert(regex2 not in self.exclude_list) + eq_(len(self.exclude_list), 1) + + def test_add_duplicate(self): + self.exclude_list.add(r"one") + eq_(1 , len(self.exclude_list)) + try: + self.exclude_list.add(r"one") + except Exception: + pass + eq_(1 , len(self.exclude_list)) + + def test_add_not_compilable(self): + # Trying to add a non-valid regex should not work and raise exception + regex = r"one))" + try: + self.exclude_list.add(regex) + except Exception as e: + # Make sure we raise a re.error so that the interface can process it + eq_(type(e), error) + added = self.exclude_list.mark(regex) + eq_(added, False) + eq_(len(self.exclude_list), 0) + eq_(len(self.exclude_list.compiled), 0) + compiled_files = [x for x in self.exclude_list.compiled_files] + eq_(len(compiled_files), 0) + + def test_force_add_not_compilable(self): + """Used when loading from XML for example""" + regex = r"one))" + try: + self.exclude_list.add(regex, forced=True) + except Exception as e: + # Should not get an exception here unless it's a duplicate regex + raise e + marked = self.exclude_list.mark(regex) + eq_(marked, False) # can't be marked since not compilable + eq_(len(self.exclude_list), 1) + eq_(len(self.exclude_list.compiled), 0) + compiled_files = [x for x in self.exclude_list.compiled_files] + eq_(len(compiled_files), 0) + # adding a duplicate + regex = r"one))" + try: + self.exclude_list.add(regex, forced=True) + except Exception as e: + # we should have this exception, and it shouldn't be added + assert type(e) is AlreadyThereException + eq_(len(self.exclude_list), 1) + eq_(len(self.exclude_list.compiled), 0) + + def test_rename_regex(self): + regex = r"one" + self.exclude_list.add(regex) + self.exclude_list.mark(regex) + regex_renamed = r"one))" + # Not compilable, can't be marked + self.exclude_list.rename(regex, regex_renamed) + assert regex not in self.exclude_list + assert regex_renamed in self.exclude_list + eq_(self.exclude_list.is_marked(regex_renamed), False) + self.exclude_list.mark(regex_renamed) + eq_(self.exclude_list.is_marked(regex_renamed), False) + regex_renamed_compilable = r"two" + self.exclude_list.rename(regex_renamed, regex_renamed_compilable) + assert regex_renamed_compilable in self.exclude_list + eq_(self.exclude_list.is_marked(regex_renamed), False) + self.exclude_list.mark(regex_renamed_compilable) + eq_(self.exclude_list.is_marked(regex_renamed_compilable), True) + eq_(len(self.exclude_list), 1) + # Should still be marked after rename + regex_compilable = r"three" + self.exclude_list.rename(regex_renamed_compilable, regex_compilable) + eq_(self.exclude_list.is_marked(regex_compilable), True) + + def test_restore_default(self): + """Only unmark previously added regexes and mark the pre-defined ones""" + regex = r"one" + self.exclude_list.add(regex) + self.exclude_list.mark(regex) + self.exclude_list.restore_defaults() + eq_(len(default_regexes), self.exclude_list.marked_count) + # added regex shouldn't be marked + eq_(self.exclude_list.is_marked(regex), False) + # added regex shouldn't be in compiled list either + compiled = [x for x in self.exclude_list.compiled] + assert regex not in compiled + # Only default regexes marked and in compiled list + for re in default_regexes: + assert self.exclude_list.is_marked(re) + found = False + for compiled_re in compiled: + if compiled_re.pattern == re: + found = True + if not found: + raise(Exception(f"Default RE {re} not found in compiled list.")) + continue + eq_(len(default_regexes), len(self.exclude_list.compiled)) + + +class TestCaseDictEmpty(TestCaseListEmpty): + """Same, but with dictionary implementation""" + def setup_method(self, method): + self.app = DupeGuru() + self.app.exclude_list = ExcludeDict() + self.exclude_list = self.app.exclude_list + + +def split_combined(pattern_object): + """Returns list of strings for each combined pattern""" + return [x for x in pattern_object.pattern.split("|")] + + +class TestCaseCompiledList(): + """Test consistency between combined or not""" + def setup_method(self, method): + self.e_separate = ExcludeList(combined_regex=False) + self.e_separate.restore_defaults() + self.e_combined = ExcludeList(combined_regex=True) + self.e_combined.restore_defaults() + + def test_same_number_of_expressions(self): + # We only get one combined Pattern item in a tuple, which is made of however many parts + eq_(len(split_combined(self.e_combined.compiled[0])), len(default_regexes)) + # We get as many as there are marked items + eq_(len(self.e_separate.compiled), len(default_regexes)) + exprs = split_combined(self.e_combined.compiled[0]) + # We should have the same number and the same expressions + eq_(len(exprs), len(self.e_separate.compiled)) + for expr in self.e_separate.compiled: + assert expr.pattern in exprs + + def test_compiled_files(self): + # test is separator is indeed checked properly to yield the output + regex1 = r"test/one/sub" + self.e_separate.add(regex1) + self.e_separate.mark(regex1) + self.e_combined.add(regex1) + self.e_combined.mark(regex1) + separate_compiled_dirs = self.e_separate.compiled + separate_compiled_files = [x for x in self.e_separate.compiled_files] + # HACK we need to call compiled property FIRST to generate the cache + combined_compiled_dirs = self.e_combined.compiled + # print(f"type: {type(self.e_combined.compiled_files[0])}") + # A generator returning only one item... ugh + combined_compiled_files = [x for x in self.e_combined.compiled_files][0] + print(f"compiled files: {combined_compiled_files}") + # Separate should give several plus the one added + eq_(len(separate_compiled_dirs), len(default_regexes) + 1) + # regex1 shouldn't be in the "files" version + eq_(len(separate_compiled_files), len(default_regexes)) + # Only one Pattern returned, which when split should be however many + 1 + eq_(len(split_combined(combined_compiled_dirs[0])), len(default_regexes) + 1) + # regex1 shouldn't be here either + eq_(len(split_combined(combined_compiled_files)), len(default_regexes)) + + +class TestCaseCompiledDict(TestCaseCompiledList): + def setup_method(self, method): + self.e_separate = ExcludeDict(combined_regex=False) + self.e_separate.restore_defaults() + self.e_combined = ExcludeDict(combined_regex=True) + self.e_combined.restore_defaults() diff --git a/tox.ini b/tox.ini index 33d32846..6a8b14be 100644 --- a/tox.ini +++ b/tox.ini @@ -10,7 +10,7 @@ setenv = PYTHON="{envpython}" commands = make modules - {posargs:py.test} core hscommon + {posargs:py.test core hscommon} deps = -r{toxinidir}/requirements.txt -r{toxinidir}/requirements-extra.txt From 584e9c92d9a28af6b9f627c40e78cbcc58bac455 Mon Sep 17 00:00:00 2001 From: glubsy Date: Mon, 31 Aug 2020 21:17:08 +0200 Subject: [PATCH 08/21] Fix duplicate items in menubar * When recreating the Results window, the menubar had duplicate items added each time. * Removing the underlying C++ object is apparently enough to fix the issue. * SetParent(None) can still be used in case of floating windows --- qt/app.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/qt/app.py b/qt/app.py index c2032388..12f0cbdf 100644 --- a/qt/app.py +++ b/qt/app.py @@ -366,7 +366,8 @@ class DupeGuru(QObject): self.details_dialog.setParent(None) if self.resultWindow is not None: self.resultWindow.close() - self.resultWindow.setParent(None) + # This is better for tabs, as it takes care of duplicate items in menu bar + self.resultWindow.deleteLater() if self.use_tabs else self.resultWindow.setParent(None) if self.use_tabs: self.resultWindow = self.main_window.createPage( "ResultWindow", parent=self.main_window, app=self) From ea11a566af4e351760b9fc2aca1fa1fa9cc73366 Mon Sep 17 00:00:00 2001 From: glubsy Date: Tue, 1 Sep 2020 23:02:58 +0200 Subject: [PATCH 09/21] Highlight rows when testing regex string * Add testing feature to Exclusion dialog to allow users to test regexes against an arbitrary string. * Fixed test suites. * Improve comments and help dialog box. --- core/directories.py | 2 - core/exclude.py | 138 ++++++++++++++++++-------------- core/gui/exclude_list_dialog.py | 22 +++-- core/gui/exclude_list_table.py | 30 +------ core/tests/directories_test.py | 12 +-- core/tests/exclude_test.py | 45 ++++++----- qt/app.py | 4 +- qt/exclude_list_dialog.py | 90 +++++++++++++++++---- qt/exclude_list_table.py | 37 +++------ 9 files changed, 216 insertions(+), 164 deletions(-) diff --git a/core/directories.py b/core/directories.py index aa6298d4..7cd103fc 100644 --- a/core/directories.py +++ b/core/directories.py @@ -59,8 +59,6 @@ class Directories: # {path: state} self.states = {} self._exclude_list = exclude_list - if exclude_list is not None: - exclude_list._combined_regex = False # TODO make a setter def __contains__(self, path): for p in self._dirs: diff --git a/core/exclude.py b/core/exclude.py index e0e8a901..d0807b86 100644 --- a/core/exclude.py +++ b/core/exclude.py @@ -20,7 +20,7 @@ default_regexes = [r"^thumbs\.db$", # Obsolete after WindowsXP r"^\$Recycle\.Bin$", # Windows r"^\..*" # Hidden files ] -# These are too agressive +# These are too broad forbidden_regexes = [r".*", r"\/.*", r".*\/.*", r".*\..*"] @@ -53,20 +53,21 @@ class AlreadyThereException(Exception): class ExcludeList(Markable): - """Exclude list of regular expression strings to filter out directories - and files that we want to avoid scanning. - The list() class allows us to preserve item order without too much hassle. - The downside is we have to compare strings every time we look for an item in the list - since we use regex strings as keys. - [regex:str, compilable:bool, error:Exception, compiled:Pattern]) - If combined_regex is True, the compiled regexes will be combined into one Pattern - instead of returned as separate Patterns. - """ + """A list of lists holding regular expression strings and the compiled re.Pattern""" + + # Used to filter out directories and files that we would rather avoid scanning. + # The list() class allows us to preserve item order without too much hassle. + # The downside is we have to compare strings every time we look for an item in the list + # since we use regex strings as keys. + # If _use_union is True, the compiled regexes will be combined into one single + # Pattern instead of separate Patterns which may or may not give better + # performance compared to looping through each Pattern individually. # ---Override - def __init__(self, combined_regex=False): + def __init__(self, union_regex=True): Markable.__init__(self) - self._use_combined = combined_regex + self._use_union = union_regex + # list([str regex, bool iscompilable, re.error exception, Pattern compiled], ...) self._excluded = [] self._excluded_compiled = set() self._dirty = True @@ -85,6 +86,7 @@ class ExcludeList(Markable): return len(self._excluded) def __getitem__(self, key): + """Returns the list item corresponding to key.""" for item in self._excluded: if item[0] == key: return item @@ -98,6 +100,10 @@ class ExcludeList(Markable): # TODO if necessary pass + def get_compiled(self, key): + """Returns the (precompiled) Pattern for key""" + return self.__getitem__(key)[3] + def is_markable(self, regex): return self._is_markable(regex) @@ -106,7 +112,7 @@ class ExcludeList(Markable): for item in self._excluded: if item[0] == regex: return item[1] - return False # should not be necessary, regex SHOULD be in there + return False # should not be necessary, the regex SHOULD be in there def _did_mark(self, regex): self._add_compiled(regex) @@ -116,7 +122,7 @@ class ExcludeList(Markable): def _add_compiled(self, regex): self._dirty = True - if self._use_combined: + if self._use_union: return for item in self._excluded: # FIXME probably faster to just rebuild the set from the compiled instead of comparing strings @@ -127,7 +133,7 @@ class ExcludeList(Markable): def _remove_compiled(self, regex): self._dirty = True - if self._use_combined: + if self._use_union: return for item in self._excluded_compiled: if regex in item.pattern: @@ -153,50 +159,51 @@ class ExcludeList(Markable): return True, None, compiled def error(self, regex): - """Return the compilation error Exception for regex. It should have a "msg" attr.""" + """Return the compilation error Exception for regex. + It should have a "msg" attr.""" for item in self._excluded: if item[0] == regex: return item[2] - def build_compiled_caches(self, combined=False): - if not combined: + def build_compiled_caches(self, union=False): + if not union: self._cached_compiled_files =\ [x for x in self._excluded_compiled if sep not in x.pattern] self._cached_compiled_paths =\ [x for x in self._excluded_compiled if sep in x.pattern] return - # HACK returned as a tuple to get a free iterator to keep interface the same - # regardless of whether the client asked for combined or not marked_count = [x for marked, x in self if marked] # If there is no item, the compiled Pattern will be '' and match everything! if not marked_count: - self._cached_compiled_combined_all = [] - self._cached_compiled_combined_files = [] - self._cached_compiled_combined_paths = [] + self._cached_compiled_union_all = [] + self._cached_compiled_union_files = [] + self._cached_compiled_union_paths = [] else: - self._cached_compiled_combined_all =\ + # HACK returned as a tuple to get a free iterator and keep interface + # the same regardless of whether the client asked for union or not + self._cached_compiled_union_all =\ (re.compile('|'.join(marked_count)),) files_marked = [x for x in marked_count if sep not in x] if not files_marked: - self._cached_compiled_combined_files = tuple() + self._cached_compiled_union_files = tuple() else: - self._cached_compiled_combined_files =\ + self._cached_compiled_union_files =\ (re.compile('|'.join(files_marked)),) paths_marked = [x for x in marked_count if sep in x] if not paths_marked: - self._cached_compiled_combined_paths = tuple() + self._cached_compiled_union_paths = tuple() else: - self._cached_compiled_combined_paths =\ + self._cached_compiled_union_paths =\ (re.compile('|'.join(paths_marked)),) @property def compiled(self): """Should be used by other classes to retrieve the up-to-date list of patterns.""" - if self._use_combined: + if self._use_union: if self._dirty: self.build_compiled_caches(True) self._dirty = False - return self._cached_compiled_combined_all + return self._cached_compiled_union_all return self._excluded_compiled @property @@ -204,23 +211,26 @@ class ExcludeList(Markable): """When matching against filenames only, we probably won't be seeing any directory separator, so we filter out regexes with os.sep in them. The interface should be expected to be a generator, even if it returns only - one item (one Pattern in the combined case).""" + one item (one Pattern in the union case).""" if self._dirty: - self.build_compiled_caches(True if self._use_combined else False) + self.build_compiled_caches(True if self._use_union else False) self._dirty = False - return self._cached_compiled_combined_files if self._use_combined else self._cached_compiled_files + return self._cached_compiled_union_files if self._use_union\ + else self._cached_compiled_files @property def compiled_paths(self): """Returns patterns with only separators in them, for more precise filtering.""" if self._dirty: - self.build_compiled_caches(True if self._use_combined else False) + self.build_compiled_caches(True if self._use_union else False) self._dirty = False - return self._cached_compiled_combined_paths if self._use_combined else self._cached_compiled_paths + return self._cached_compiled_union_paths if self._use_union\ + else self._cached_compiled_paths # ---Public def add(self, regex, forced=False): - """This interface should throw exceptions if there is an error during regex compilation""" + """This interface should throw exceptions if there is an error during + regex compilation""" if self.isExcluded(regex): # This exception should never be ignored raise AlreadyThereException() @@ -229,7 +239,8 @@ class ExcludeList(Markable): iscompilable, exception, compiled = self.compile_re(regex) if not iscompilable and not forced: - # This exception can be ignored, but taken into account to avoid adding to compiled set + # This exception can be ignored, but taken into account + # to avoid adding to compiled set raise exception else: self._do_add(regex, iscompilable, exception, compiled) @@ -249,10 +260,6 @@ class ExcludeList(Markable): return True return False - def clear(self): - """Not used and needs refactoring""" - self._excluded = [] - def remove(self, regex): for item in self._excluded: if item[0] == regex: @@ -260,7 +267,6 @@ class ExcludeList(Markable): self._remove_compiled(regex) def rename(self, regex, newregex): - # if regex not in self._excluded: return if regex == newregex: return found = False @@ -318,7 +324,8 @@ class ExcludeList(Markable): # "forced" avoids compilation exceptions and adds anyway self.add(regex_string, forced=True) except AlreadyThereException: - logging.error(f"Regex \"{regex_string}\" loaded from XML was already present in the list.") + logging.error(f"Regex \"{regex_string}\" \ +loaded from XML was already present in the list.") continue if exclude_item.get("marked") == "y": marked.add(regex_string) @@ -328,9 +335,7 @@ class ExcludeList(Markable): def save_to_xml(self, outfile): """Create a XML file that can be used by load_from_xml. - - outfile can be a file object or a filename. - """ + outfile can be a file object or a filename.""" root = ET.Element("exclude_list") # reversed in order to keep order of entries when reloading from xml later for item in reversed(self._excluded): @@ -343,15 +348,23 @@ class ExcludeList(Markable): class ExcludeDict(ExcludeList): - """Version implemented around a dictionary instead of a list, which implies - to keep the index of each string-key as its sub-element and keep it updated - whenever insert/remove is done.""" + """Exclusion list holding a set of regular expressions as keys, the compiled + Pattern, compilation error and compilable boolean as values.""" + # Implemntation around a dictionary instead of a list, which implies + # to keep the index of each string-key as its sub-element and keep it updated + # whenever insert/remove is done. - def __init__(self, combined_regex=False): + def __init__(self, union_regex=False): Markable.__init__(self) - self._use_combined = combined_regex - # { "regex": { "index": int, "compilable": bool, "error": str, "compiled": Pattern or None}} - # Note: "compilable" key should only be updated on add / rename + self._use_union = union_regex + # { "regex string": + # { + # "index": int, + # "compilable": bool, + # "error": str, + # "compiled": Pattern or None + # } + # } self._excluded = {} self._excluded_compiled = set() self._dirty = True @@ -361,6 +374,14 @@ class ExcludeDict(ExcludeList): for regex in ordered_keys(self._excluded): yield self.is_marked(regex), regex + def __getitem__(self, key): + """Returns the dict item correponding to key""" + return self._excluded.__getitem__(key) + + def get_compiled(self, key): + """Returns the compiled item for key""" + return self.__getitem__(key).get("compiled") + def is_markable(self, regex): return self._is_markable(regex) @@ -373,12 +394,12 @@ class ExcludeDict(ExcludeList): def _add_compiled(self, regex): self._dirty = True - if self._use_combined: + if self._use_union: return try: self._excluded_compiled.add(self._excluded[regex]["compiled"]) except Exception as e: - print(f"Exception while adding regex {regex} to compiled set: {e}") + logging.warning(f"Exception while adding regex {regex} to compiled set: {e}") return def is_compilable(self, regex): @@ -391,7 +412,8 @@ class ExcludeDict(ExcludeList): # ---Public def _do_add(self, regex, iscompilable, exception, compiled): - # We always insert at the top, so index should be 0 and other indices should be pushed by one + # We always insert at the top, so index should be 0 + # and other indices should be pushed by one for value in self._excluded.values(): value["index"] += 1 self._excluded[regex] = { @@ -406,10 +428,6 @@ class ExcludeDict(ExcludeList): return True return False - def clear(self): - """Not used, need refactoring""" - self._excluded = {} - def remove(self, regex): old_value = self._excluded.pop(regex) # Bring down all indices which where above it diff --git a/core/gui/exclude_list_dialog.py b/core/gui/exclude_list_dialog.py index e35d4c9f..c6409ef7 100644 --- a/core/gui/exclude_list_dialog.py +++ b/core/gui/exclude_list_dialog.py @@ -7,13 +7,10 @@ # from hscommon.trans import tr from .exclude_list_table import ExcludeListTable +import logging class ExcludeListDialogCore: - # --- View interface - # show() - # - def __init__(self, app): self.app = app self.exclude_list = self.app.exclude_list # Markable from exclude.py @@ -43,7 +40,7 @@ class ExcludeListDialogCore: self.refresh() return True except Exception as e: - print(f"dupeGuru Warning: {e}") + logging.warning(f"Error while renaming regex to {newregex}: {e}") return False def add(self, regex): @@ -54,5 +51,20 @@ class ExcludeListDialogCore: self.exclude_list.mark(regex) self.exclude_list_table.add(regex) + def test_string(self, test_string): + """Sets property on row to highlight if its regex matches test_string supplied.""" + matched = False + for row in self.exclude_list_table.rows: + if self.exclude_list.get_compiled(row.regex).match(test_string): + matched = True + row.highlight = True + else: + row.highlight = False + return matched + + def reset_rows_highlight(self): + for row in self.exclude_list_table.rows: + row.highlight = False + def show(self): self.view.show() diff --git a/core/gui/exclude_list_table.py b/core/gui/exclude_list_table.py index 6a0294f7..8875d330 100644 --- a/core/gui/exclude_list_table.py +++ b/core/gui/exclude_list_table.py @@ -31,8 +31,7 @@ class ExcludeListTable(GUITable, DupeGuruGUIObject): # --- Virtual def _do_add(self, regex): """(Virtual) Creates a new row, adds it in the table. - Returns ``(row, insert_index)``. - """ + Returns ``(row, insert_index)``.""" # Return index 0 to insert at the top return ExcludeListRow(self, self.dialog.exclude_list.is_marked(regex), regex), 0 @@ -43,30 +42,18 @@ class ExcludeListTable(GUITable, DupeGuruGUIObject): def add(self, regex): row, insert_index = self._do_add(regex) self.insert(insert_index, row) - # self.select([insert_index]) self.view.refresh() def _fill(self): for enabled, regex in self.dialog.exclude_list: self.append(ExcludeListRow(self, enabled, regex)) - # def remove(self): - # super().remove(super().selected_rows) - - # def _update_selection(self): - # # rows = self.selected_rows - # # self.dialog._select_rows(list(map(attrgetter("_dupe"), rows))) - # self.dialog.remove_selected() - def refresh(self, refresh_view=True): """Override to avoid keeping previous selection in case of multiple rows selected previously.""" self.cancel_edits() del self[:] self._fill() - # sd = self._sort_descriptor - # if sd is not None: - # super().sort_by(self, column_name=sd.column, desc=sd.desc) if refresh_view: self.view.refresh() @@ -76,18 +63,14 @@ class ExcludeListRow(Row): Row.__init__(self, table) self._app = table.app self._data = None - self.enabled_original = enabled - self.regex_original = regex self.enabled = str(enabled) self.regex = str(regex) + self.highlight = False @property def data(self): - def get_display_info(row): - return {"marked": row.enabled, "regex": row.regex} - if self._data is None: - self._data = get_display_info(self) + self._data = {"marked": self.enabled, "regex": self.regex} return self._data @property @@ -113,10 +96,3 @@ class ExcludeListRow(Row): return self._app.exclude_list.error(self.regex).msg else: return message # Exception object - # @property - # def regex(self): - # return self.regex - - # @regex.setter - # def regex(self, value): - # self._app.exclude_list.add(self._regex, value) diff --git a/core/tests/directories_test.py b/core/tests/directories_test.py index 1ce84fb4..061e1476 100644 --- a/core/tests/directories_test.py +++ b/core/tests/directories_test.py @@ -346,7 +346,7 @@ def test_default_path_state_override(tmpdir): class TestExcludeList(): def setup_method(self, method): - self.d = Directories(exclude_list=ExcludeList(combined_regex=False)) + self.d = Directories(exclude_list=ExcludeList(union_regex=False)) def get_files_and_expect_num_result(self, num_result): """Calls get_files(), get the filenames only, print for debugging. @@ -523,14 +523,14 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled class TestExcludeDict(TestExcludeList): def setup_method(self, method): - self.d = Directories(exclude_list=ExcludeDict(combined_regex=False)) + self.d = Directories(exclude_list=ExcludeDict(union_regex=False)) -class TestExcludeListCombined(TestExcludeList): +class TestExcludeListunion(TestExcludeList): def setup_method(self, method): - self.d = Directories(exclude_list=ExcludeList(combined_regex=True)) + self.d = Directories(exclude_list=ExcludeList(union_regex=True)) -class TestExcludeDictCombined(TestExcludeList): +class TestExcludeDictunion(TestExcludeList): def setup_method(self, method): - self.d = Directories(exclude_list=ExcludeDict(combined_regex=True)) + self.d = Directories(exclude_list=ExcludeDict(union_regex=True)) diff --git a/core/tests/exclude_test.py b/core/tests/exclude_test.py index 0dc4a033..3745ac21 100644 --- a/core/tests/exclude_test.py +++ b/core/tests/exclude_test.py @@ -96,7 +96,7 @@ class TestCaseDictXMLLoading(TestCaseListXMLLoading): class TestCaseListEmpty: def setup_method(self, method): self.app = DupeGuru() - self.app.exclude_list = ExcludeList() + self.app.exclude_list = ExcludeList(union_regex=False) self.exclude_list = self.app.exclude_list def test_add_mark_and_remove_regex(self): @@ -216,29 +216,29 @@ class TestCaseDictEmpty(TestCaseListEmpty): """Same, but with dictionary implementation""" def setup_method(self, method): self.app = DupeGuru() - self.app.exclude_list = ExcludeDict() + self.app.exclude_list = ExcludeDict(union_regex=False) self.exclude_list = self.app.exclude_list -def split_combined(pattern_object): - """Returns list of strings for each combined pattern""" +def split_union(pattern_object): + """Returns list of strings for each union pattern""" return [x for x in pattern_object.pattern.split("|")] class TestCaseCompiledList(): - """Test consistency between combined or not""" + """Test consistency between union or and separate versions.""" def setup_method(self, method): - self.e_separate = ExcludeList(combined_regex=False) + self.e_separate = ExcludeList(union_regex=False) self.e_separate.restore_defaults() - self.e_combined = ExcludeList(combined_regex=True) - self.e_combined.restore_defaults() + self.e_union = ExcludeList(union_regex=True) + self.e_union.restore_defaults() def test_same_number_of_expressions(self): - # We only get one combined Pattern item in a tuple, which is made of however many parts - eq_(len(split_combined(self.e_combined.compiled[0])), len(default_regexes)) + # We only get one union Pattern item in a tuple, which is made of however many parts + eq_(len(split_union(self.e_union.compiled[0])), len(default_regexes)) # We get as many as there are marked items eq_(len(self.e_separate.compiled), len(default_regexes)) - exprs = split_combined(self.e_combined.compiled[0]) + exprs = split_union(self.e_union.compiled[0]) # We should have the same number and the same expressions eq_(len(exprs), len(self.e_separate.compiled)) for expr in self.e_separate.compiled: @@ -249,29 +249,30 @@ class TestCaseCompiledList(): regex1 = r"test/one/sub" self.e_separate.add(regex1) self.e_separate.mark(regex1) - self.e_combined.add(regex1) - self.e_combined.mark(regex1) + self.e_union.add(regex1) + self.e_union.mark(regex1) separate_compiled_dirs = self.e_separate.compiled separate_compiled_files = [x for x in self.e_separate.compiled_files] # HACK we need to call compiled property FIRST to generate the cache - combined_compiled_dirs = self.e_combined.compiled - # print(f"type: {type(self.e_combined.compiled_files[0])}") + union_compiled_dirs = self.e_union.compiled + # print(f"type: {type(self.e_union.compiled_files[0])}") # A generator returning only one item... ugh - combined_compiled_files = [x for x in self.e_combined.compiled_files][0] - print(f"compiled files: {combined_compiled_files}") + union_compiled_files = [x for x in self.e_union.compiled_files][0] + print(f"compiled files: {union_compiled_files}") # Separate should give several plus the one added eq_(len(separate_compiled_dirs), len(default_regexes) + 1) # regex1 shouldn't be in the "files" version eq_(len(separate_compiled_files), len(default_regexes)) # Only one Pattern returned, which when split should be however many + 1 - eq_(len(split_combined(combined_compiled_dirs[0])), len(default_regexes) + 1) + eq_(len(split_union(union_compiled_dirs[0])), len(default_regexes) + 1) # regex1 shouldn't be here either - eq_(len(split_combined(combined_compiled_files)), len(default_regexes)) + eq_(len(split_union(union_compiled_files)), len(default_regexes)) class TestCaseCompiledDict(TestCaseCompiledList): + """Test the dictionary version""" def setup_method(self, method): - self.e_separate = ExcludeDict(combined_regex=False) + self.e_separate = ExcludeDict(union_regex=False) self.e_separate.restore_defaults() - self.e_combined = ExcludeDict(combined_regex=True) - self.e_combined.restore_defaults() + self.e_union = ExcludeDict(union_regex=True) + self.e_union.restore_defaults() diff --git a/qt/app.py b/qt/app.py index 12f0cbdf..574b6a21 100644 --- a/qt/app.py +++ b/qt/app.py @@ -137,7 +137,7 @@ class DupeGuru(QObject): tr("Clear Picture Cache"), self.clearPictureCacheTriggered, ), - ("actionExcludeList", "", "", tr("Exclude list"), self.excludeListTriggered), + ("actionExcludeList", "", "", tr("Exclusion Filters"), self.excludeListTriggered), ("actionShowHelp", "F1", "", tr("dupeGuru Help"), self.showHelpTriggered), ("actionAbout", "", "", tr("About dupeGuru"), self.showAboutBoxTriggered), ( @@ -285,7 +285,7 @@ class DupeGuru(QObject): def excludeListTriggered(self): if self.use_tabs: - self.showTriggeredTabbedDialog(self.excludeListDialog, "Exclude List") + self.showTriggeredTabbedDialog(self.excludeListDialog, "Exclusion Filters") else: # floating windows self.model.exclude_list_dialog.show() diff --git a/qt/exclude_list_dialog.py b/qt/exclude_list_dialog.py index f251d1e2..3c23b83f 100644 --- a/qt/exclude_list_dialog.py +++ b/qt/exclude_list_dialog.py @@ -2,12 +2,13 @@ # which should be included with this package. The terms are also available at # http://www.gnu.org/licenses/gpl-3.0.html +import re from PyQt5.QtCore import Qt, pyqtSlot from PyQt5.QtWidgets import ( QPushButton, QLineEdit, QVBoxLayout, QGridLayout, QDialog, QTableView, QAbstractItemView, QSpacerItem, QSizePolicy, QHeaderView ) -from .exclude_list_table import ExcludeListTable, ExcludeView +from .exclude_list_table import ExcludeListTable from core.exclude import AlreadyThereException from hscommon.trans import trget @@ -24,12 +25,18 @@ class ExcludeListDialog(QDialog): self.model = model # ExcludeListDialogCore self.model.view = self self.table = ExcludeListTable(app, view=self.tableView) # Qt ExcludeListTable + self._row_matched = False # test if at least one row matched our test string + self._input_styled = False self.buttonAdd.clicked.connect(self.addStringFromLineEdit) self.buttonRemove.clicked.connect(self.removeSelected) self.buttonRestore.clicked.connect(self.restoreDefaults) self.buttonClose.clicked.connect(self.accept) self.buttonHelp.clicked.connect(self.display_help_message) + self.buttonTestString.clicked.connect(self.onTestStringButtonClicked) + self.inputLine.textEdited.connect(self.reset_input_style) + self.testLine.textEdited.connect(self.reset_input_style) + self.testLine.textEdited.connect(self.reset_table_style) def _setupUI(self): layout = QVBoxLayout(self) @@ -37,10 +44,12 @@ class ExcludeListDialog(QDialog): self.buttonAdd = QPushButton(tr("Add")) self.buttonRemove = QPushButton(tr("Remove Selected")) self.buttonRestore = QPushButton(tr("Restore defaults")) + self.buttonTestString = QPushButton(tr("Test string")) self.buttonClose = QPushButton(tr("Close")) self.buttonHelp = QPushButton(tr("Help")) - self.linedit = QLineEdit() - self.tableView = ExcludeView() + self.inputLine = QLineEdit() + self.testLine = QLineEdit() + self.tableView = QTableView() triggers = ( QAbstractItemView.DoubleClicked | QAbstractItemView.EditKeyPressed @@ -59,26 +68,31 @@ class ExcludeListDialog(QDialog): hheader.setStretchLastSection(True) hheader.setHighlightSections(False) hheader.setVisible(True) - gridlayout.addWidget(self.linedit, 0, 0) + gridlayout.addWidget(self.inputLine, 0, 0) gridlayout.addWidget(self.buttonAdd, 0, 1, Qt.AlignLeft) gridlayout.addWidget(self.buttonRemove, 1, 1, Qt.AlignLeft) gridlayout.addWidget(self.buttonRestore, 2, 1, Qt.AlignLeft) gridlayout.addWidget(self.buttonHelp, 3, 1, Qt.AlignLeft) - gridlayout.addWidget(self.tableView, 1, 0, 5, 1) + gridlayout.addWidget(self.buttonClose, 4, 1) + gridlayout.addWidget(self.tableView, 1, 0, 6, 1) gridlayout.addItem(QSpacerItem(0, 0, QSizePolicy.Minimum, QSizePolicy.Expanding), 4, 1) - gridlayout.addWidget(self.buttonClose, 5, 1) + gridlayout.addWidget(self.buttonTestString, 6, 1) + gridlayout.addWidget(self.testLine, 6, 0) + layout.addLayout(gridlayout) - self.linedit.setPlaceholderText("Type a regular expression here...") - self.linedit.setFocus() + self.inputLine.setPlaceholderText("Type a python regular expression here...") + self.inputLine.setFocus() + self.testLine.setPlaceholderText("Type a file system path or filename here...") + self.testLine.setClearButtonEnabled(True) # --- model --> view def show(self): super().show() - self.linedit.setFocus() + self.inputLine.setFocus() @pyqtSlot() def addStringFromLineEdit(self): - text = self.linedit.text() + text = self.inputLine.text() if not text: return try: @@ -89,7 +103,7 @@ class ExcludeListDialog(QDialog): except Exception as e: self.app.show_message(f"Expression is invalid: {e}") return - self.linedit.clear() + self.inputLine.clear() def removeSelected(self): self.model.remove_selected() @@ -97,8 +111,54 @@ class ExcludeListDialog(QDialog): def restoreDefaults(self): self.model.restore_defaults() + def onTestStringButtonClicked(self): + input_text = self.testLine.text() + if not input_text: + self.reset_input_style() + return + # if at least one row matched, we know whether table is highlighted or not + self._row_matched = self.model.test_string(input_text) + self.tableView.update() + + input_regex = self.inputLine.text() + if not input_regex: + self.reset_input_style() + return + try: + compiled = re.compile(input_regex) + except re.error: + self.reset_input_style() + return + match = compiled.match(input_text) + if match: + self._input_styled = True + self.inputLine.setStyleSheet("background-color: rgb(10, 120, 10);") + else: + self.reset_input_style() + + def reset_input_style(self): + """Reset regex input line background""" + if self._input_styled: + self._input_styled = False + self.inputLine.setStyleSheet(self.styleSheet()) + + def reset_table_style(self): + if self._row_matched: + self._row_matched = False + self.model.reset_rows_highlight() + self.tableView.update() + def display_help_message(self): - self.app.show_message("""\ -These python regular expressions will filter out files and directory paths \ -specified here.\nDuring directory selection, paths filtered here will be added as \ -"Skipped" by default, but regular files will be ignored altogether during scans.""") + self.app.show_message(tr("""\ +These (case sensitive) python regular expressions will filter out files during scans.
\ +Directores will also have their default state set to Excluded \ +in the Directories tab if their name happen to match one of the regular expressions.
\ +For each file collected two tests are perfomed on each of them to determine whether or not to filter them out:
\ +
  • 1. Regular expressions with no path separator in them will be compared to the file name only.
  • +
  • 2. Regular expressions with no path separator in them will be compared to the full path to the file.

  • +Example: if you want to filter out .PNG files from the "My Pictures" directory only:
    \ +.*My\\sPictures\\\\.*\\.png

    \ +You can test the regular expression with the test string feature by pasting a fake path in it:
    \ +C:\\\\User\\My Pictures\\test.png

    +Matching regular expressions will be highlighted.

    +Directories and files starting with a period '.' are filtered out by default.

    """)) diff --git a/qt/exclude_list_table.py b/qt/exclude_list_table.py index 5f729dfa..7c6a3b63 100644 --- a/qt/exclude_list_table.py +++ b/qt/exclude_list_table.py @@ -2,9 +2,8 @@ # which should be included with this package. The terms are also available at # http://www.gnu.org/licenses/gpl-3.0.html -from PyQt5.QtCore import Qt, QModelIndex -from PyQt5.QtGui import QFont, QFontMetrics, QIcon -from PyQt5.QtWidgets import QTableView +from PyQt5.QtCore import Qt +from PyQt5.QtGui import QFont, QFontMetrics, QIcon, QColor from qtlib.column import Column from qtlib.table import Table @@ -20,13 +19,12 @@ class ExcludeListTable(Table): def __init__(self, app, view, **kwargs): model = app.model.exclude_list_dialog.exclude_list_table # pointer to GUITable super().__init__(model, view, **kwargs) - # view.horizontalHeader().setSortIndicator(1, Qt.AscendingOrder) font = view.font() font.setPointSize(app.prefs.tableFontSize) view.setFont(font) fm = QFontMetrics(font) view.verticalHeader().setDefaultSectionSize(fm.height() + 2) - app.willSavePrefs.connect(self.appWillSavePrefs) + # app.willSavePrefs.connect(self.appWillSavePrefs) def _getData(self, row, column, role): if column.name == "marked": @@ -41,6 +39,9 @@ class ExcludeListTable(Table): return row.data[column.name] elif role == Qt.FontRole: return QFont(self.view.font()) + elif role == Qt.BackgroundRole and column.name == "regex": + if row.highlight: + return QColor(10, 120, 10) # green elif role == Qt.EditRole: if column.name == "regex": return row.data[column.name] @@ -65,24 +66,10 @@ class ExcludeListTable(Table): return self.model.rename_selected(value) return False - def sort(self, column, order): - column = self.model.COLUMNS[column] - self.model.sort(column.name, order == Qt.AscendingOrder) + # def sort(self, column, order): + # column = self.model.COLUMNS[column] + # self.model.sort(column.name, order == Qt.AscendingOrder) - # --- Events - def appWillSavePrefs(self): - self.model.columns.save_columns() - - # --- model --> view - def invalidate_markings(self): - # redraw view - # HACK. this is the only way I found to update the widget without reseting everything - self.view.scroll(0, 1) - self.view.scroll(0, -1) - - -class ExcludeView(QTableView): - def mouseDoubleClickEvent(self, event): - # FIXME this doesn't seem to do anything relevant - self.doubleClicked.emit(QModelIndex()) - # We don't call the superclass' method because the default behavior is to rename the cell. + # # --- Events + # def appWillSavePrefs(self): + # self.model.columns.save_columns() From 18c933b4bf7fca844436e3152a3479dde532d36e Mon Sep 17 00:00:00 2001 From: glubsy Date: Wed, 2 Sep 2020 20:26:23 +0200 Subject: [PATCH 10/21] Prevent widget from stretching in layout * In some themes, the color picker widgets get stretched, while the color picker for the details dialog group doesn't. This should keep them a bit more consistent across themes. --- qt/preferences_dialog.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/qt/preferences_dialog.py b/qt/preferences_dialog.py index bae22e4e..3897a77e 100644 --- a/qt/preferences_dialog.py +++ b/qt/preferences_dialog.py @@ -161,25 +161,28 @@ On MacOS, the tab bar will fill up the window's width instead.")) self.ui_groupbox.setLayout(layout) self.displayVLayout.addWidget(self.ui_groupbox) - gridlayout = QFormLayout() + gridlayout = QGridLayout() + gridlayout.setColumnStretch(2, 2) + formlayout = QFormLayout() result_groupbox = QGroupBox("&Result Table") self.fontSizeSpinBox = QSpinBox() self.fontSizeSpinBox.setMinimum(5) - gridlayout.addRow(tr("Font size:"), self.fontSizeSpinBox) + formlayout.addRow(tr("Font size:"), self.fontSizeSpinBox) self._setupAddCheckbox("reference_bold_font", tr("Use bold font for references")) - gridlayout.addRow(self.reference_bold_font) + formlayout.addRow(self.reference_bold_font) self.result_table_ref_foreground_color = ColorPickerButton(self) - gridlayout.addRow(tr("Reference foreground color:"), + formlayout.addRow(tr("Reference foreground color:"), self.result_table_ref_foreground_color) self.result_table_delta_foreground_color = ColorPickerButton(self) - gridlayout.addRow(tr("Delta foreground color:"), + formlayout.addRow(tr("Delta foreground color:"), self.result_table_delta_foreground_color) - gridlayout.setLabelAlignment(Qt.AlignLeft) + formlayout.setLabelAlignment(Qt.AlignLeft) # Keep same vertical spacing as parent layout for consistency - gridlayout.setVerticalSpacing(self.displayVLayout.spacing()) + formlayout.setVerticalSpacing(self.displayVLayout.spacing()) + gridlayout.addLayout(formlayout, 0, 0) result_groupbox.setLayout(gridlayout) self.displayVLayout.addWidget(result_groupbox) @@ -202,12 +205,13 @@ use the modifier key to drag the floating window around") if ISLINUX else self.details_dialog_titlebar_enabled.stateChanged.connect( self.details_dialog_vertical_titlebar.setEnabled) gridlayout = QGridLayout() - self.details_table_delta_foreground_color_label = QLabel(tr("Delta foreground color:")) - gridlayout.addWidget(self.details_table_delta_foreground_color_label, 4, 0) + formlayout = QFormLayout() self.details_table_delta_foreground_color = ColorPickerButton(self) - gridlayout.addWidget(self.details_table_delta_foreground_color, 4, 2, 1, 1, Qt.AlignLeft) + # Padding on the right side and space between label and widget to keep it somewhat consistent across themes gridlayout.setColumnStretch(1, 1) - gridlayout.setColumnStretch(3, 4) + formlayout.setHorizontalSpacing(50) + formlayout.addRow(tr("Delta foreground color:"), self.details_table_delta_foreground_color) + gridlayout.addLayout(formlayout, 0, 0) self.details_groupbox_layout.addLayout(gridlayout) details_groupbox.setLayout(self.details_groupbox_layout) self.displayVLayout.addWidget(details_groupbox) From a55e02b36d338564a5a6655842ae5e2d7e9d169b Mon Sep 17 00:00:00 2001 From: glubsy Date: Wed, 2 Sep 2020 23:45:31 +0200 Subject: [PATCH 11/21] Fix table maximum size being off by a few pixels * Sometimes, the splitter doesn't fully reach the table maximum height, and the scrollbar is still displayed on the right because a few pixels are still hidden. * It seems the splitter handle counts towards the total height of the widget (the table), so we add it to the maximum height of the table * The scrollbar disappears when we reach just above the actual table's height --- qt/pe/details_dialog.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/qt/pe/details_dialog.py b/qt/pe/details_dialog.py index c83ee754..324a1307 100644 --- a/qt/pe/details_dialog.py +++ b/qt/pe/details_dialog.py @@ -9,7 +9,6 @@ from PyQt5.QtWidgets import ( QAbstractItemView, QSizePolicy, QGridLayout, QSplitter, QFrame) from PyQt5.QtGui import QResizeEvent from hscommon.trans import trget -from hscommon.plat import ISWINDOWS from ..details_dialog import DetailsDialog as DetailsDialogBase from ..details_table import DetailsTable from .image_viewer import ( @@ -102,14 +101,14 @@ class DetailsDialog(DetailsDialogBase): self.vController.updateBothImages() def show(self): - # Compute the maximum size the table view can reach - # Assuming all rows below headers have the same height + # Give the splitter a maximum height to reach. This is assuming that + # all rows below their headers have the same height self.tableView.setMaximumHeight( self.tableView.rowHeight(1) * self.tableModel.model.row_count() + self.tableView.verticalHeader().sectionSize(0) - # Windows seems to add a few pixels more to the table somehow - + (5 if ISWINDOWS else 0)) + # looks like the handle is taken into account by the splitter + + self.splitter.handle(1).size().height()) DetailsDialogBase.show(self) self.ensure_same_sizes() self._update() From 424d34a7ed83d6c8281e44a03095ee5bce02d5ba Mon Sep 17 00:00:00 2001 From: glubsy Date: Thu, 3 Sep 2020 02:12:20 +0200 Subject: [PATCH 12/21] Add desktop.ini to filter list --- core/exclude.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/exclude.py b/core/exclude.py index d0807b86..eb8ffc08 100644 --- a/core/exclude.py +++ b/core/exclude.py @@ -15,10 +15,11 @@ from hscommon.util import FileOrPath import time default_regexes = [r"^thumbs\.db$", # Obsolete after WindowsXP + r"^desktop\.ini$", # Windows metadata r"^\.DS_Store$", # MacOS metadata r"^\.Trash\-.*", # Linux trash directories r"^\$Recycle\.Bin$", # Windows - r"^\..*" # Hidden files + r"^\..*", # Hidden files ] # These are too broad forbidden_regexes = [r".*", r"\/.*", r".*\/.*", r".*\..*"] From 7f19647e4bd440140722017ad292adb660f77abf Mon Sep 17 00:00:00 2001 From: glubsy Date: Tue, 29 Dec 2020 00:56:25 +0100 Subject: [PATCH 13/21] Remove unused lines --- qt/tabbed_window.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/qt/tabbed_window.py b/qt/tabbed_window.py index 193323ca..2912ddcb 100644 --- a/qt/tabbed_window.py +++ b/qt/tabbed_window.py @@ -230,8 +230,6 @@ class TabWindow(QMainWindow): # menu or shortcut. But this is useless if we don't have a button # set up to make a close request anyway. This check could be removed. return - # current_widget.close() # seems unnecessary - # self.tabWidget.widget(index).hide() self.removeTab(index) @pyqtSlot() From 07eba09ec2e451a66fb3174925639bf7b73da1fa Mon Sep 17 00:00:00 2001 From: glubsy Date: Tue, 29 Dec 2020 01:01:26 +0100 Subject: [PATCH 14/21] Fix error after merging branches --- qt/preferences_dialog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qt/preferences_dialog.py b/qt/preferences_dialog.py index 70eeae36..c041b7d0 100644 --- a/qt/preferences_dialog.py +++ b/qt/preferences_dialog.py @@ -176,7 +176,7 @@ On MacOS, the tab bar will fill up the window's width instead.")) formlayout.addRow(tr("Reference foreground color:"), self.result_table_ref_foreground_color) self.result_table_ref_background_color = ColorPickerButton(self) - gridlayout.addRow(tr("Reference background color:"), + formlayout.addRow(tr("Reference background color:"), self.result_table_ref_background_color) self.result_table_delta_foreground_color = ColorPickerButton(self) formlayout.addRow(tr("Delta foreground color:"), From 4b4cc04e879c650e956b16f985447ce77c9cd40f Mon Sep 17 00:00:00 2001 From: glubsy Date: Tue, 29 Dec 2020 05:35:30 +0100 Subject: [PATCH 15/21] Fix directories tests on Windows Regexes did not match properly because the separator for Windows is '\\' --- core/exclude.py | 31 +++++++++++++++++++++++-------- core/tests/directories_test.py | 8 +++++++- core/tests/exclude_test.py | 2 +- 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/core/exclude.py b/core/exclude.py index eb8ffc08..557ac823 100644 --- a/core/exclude.py +++ b/core/exclude.py @@ -12,6 +12,7 @@ from os import sep import logging import functools from hscommon.util import FileOrPath +from hscommon.plat import ISWINDOWS import time default_regexes = [r"^thumbs\.db$", # Obsolete after WindowsXP @@ -19,10 +20,10 @@ default_regexes = [r"^thumbs\.db$", # Obsolete after WindowsXP r"^\.DS_Store$", # MacOS metadata r"^\.Trash\-.*", # Linux trash directories r"^\$Recycle\.Bin$", # Windows - r"^\..*", # Hidden files + r"^\..*", # Hidden files on Unix-like ] # These are too broad -forbidden_regexes = [r".*", r"\/.*", r".*\/.*", r".*\..*"] +forbidden_regexes = [r".*", r"\/.*", r".*\/.*", r".*\\\\.*", r".*\..*"] def timer(func): @@ -168,10 +169,16 @@ class ExcludeList(Markable): def build_compiled_caches(self, union=False): if not union: - self._cached_compiled_files =\ - [x for x in self._excluded_compiled if sep not in x.pattern] - self._cached_compiled_paths =\ - [x for x in self._excluded_compiled if sep in x.pattern] + if not ISWINDOWS: + self._cached_compiled_files =\ + [x for x in self._excluded_compiled if not has_sep(x.pattern)] + self._cached_compiled_paths =\ + [x for x in self._excluded_compiled if has_sep(x.pattern)] + else: + self._cached_compiled_files =\ + [x for x in self._excluded_compiled if not has_sep(x.pattern)] + self._cached_compiled_paths =\ + [x for x in self._excluded_compiled if has_sep(x.pattern)] return marked_count = [x for marked, x in self if marked] # If there is no item, the compiled Pattern will be '' and match everything! @@ -184,13 +191,13 @@ class ExcludeList(Markable): # the same regardless of whether the client asked for union or not self._cached_compiled_union_all =\ (re.compile('|'.join(marked_count)),) - files_marked = [x for x in marked_count if sep not in x] + files_marked = [x for x in marked_count if not has_sep(x)] if not files_marked: self._cached_compiled_union_files = tuple() else: self._cached_compiled_union_files =\ (re.compile('|'.join(files_marked)),) - paths_marked = [x for x in marked_count if sep in x] + paths_marked = [x for x in marked_count if has_sep(x)] if not paths_marked: self._cached_compiled_union_paths = tuple() else: @@ -488,3 +495,11 @@ def ordered_keys(_dict): list_of_items.sort(key=lambda x: x[1].get("index")) for item in list_of_items: yield item[0] + + +if ISWINDOWS: + def has_sep(x): + return '\\' + sep in x +else: + def has_sep(x): + return sep in x diff --git a/core/tests/directories_test.py b/core/tests/directories_test.py index 061e1476..8a5ddcdb 100644 --- a/core/tests/directories_test.py +++ b/core/tests/directories_test.py @@ -12,6 +12,7 @@ import shutil from pytest import raises from hscommon.path import Path from hscommon.testutil import eq_ +from hscommon.plat import ISWINDOWS from ..fs import File from ..directories import ( @@ -428,7 +429,10 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled assert "unwanted_subdirfile.gif" not in files assert "unwanted_subdarfile.png" not in files - regex3 = r".*Recycle\.Bin\/.*unwanted.*subdirfile.*" + if ISWINDOWS: + regex3 = r".*Recycle\.Bin\\.*unwanted.*subdirfile.*" + else: + regex3 = r".*Recycle\.Bin\/.*unwanted.*subdirfile.*" self.d._exclude_list.rename(regex2, regex3) assert self.d._exclude_list.error(regex3) is None # print(f"get_folders(): {[x for x in self.d.get_folders()]}") @@ -516,6 +520,8 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled self.d.set_state(p1["foobar"][".hidden_dir"], DirectoryState.Normal) # The files should still be filtered files = self.get_files_and_expect_num_result(1) + eq_(len(self.d._exclude_list.compiled_paths), 0) + eq_(len(self.d._exclude_list.compiled_files), 1) assert ".hidden_file.txt" not in files assert ".hidden_subfile.png" not in files assert "foobar.jpg" in files diff --git a/core/tests/exclude_test.py b/core/tests/exclude_test.py index 3745ac21..8bfc8cc7 100644 --- a/core/tests/exclude_test.py +++ b/core/tests/exclude_test.py @@ -245,7 +245,7 @@ class TestCaseCompiledList(): assert expr.pattern in exprs def test_compiled_files(self): - # test is separator is indeed checked properly to yield the output + # is path separator checked properly to yield the output regex1 = r"test/one/sub" self.e_separate.add(regex1) self.e_separate.mark(regex1) From e533a396fbb0cd7856db432973c068b4d6c67415 Mon Sep 17 00:00:00 2001 From: glubsy Date: Tue, 29 Dec 2020 05:39:26 +0100 Subject: [PATCH 16/21] Remove redundant check --- core/exclude.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/core/exclude.py b/core/exclude.py index 557ac823..29b00e6b 100644 --- a/core/exclude.py +++ b/core/exclude.py @@ -169,16 +169,10 @@ class ExcludeList(Markable): def build_compiled_caches(self, union=False): if not union: - if not ISWINDOWS: - self._cached_compiled_files =\ - [x for x in self._excluded_compiled if not has_sep(x.pattern)] - self._cached_compiled_paths =\ - [x for x in self._excluded_compiled if has_sep(x.pattern)] - else: - self._cached_compiled_files =\ - [x for x in self._excluded_compiled if not has_sep(x.pattern)] - self._cached_compiled_paths =\ - [x for x in self._excluded_compiled if has_sep(x.pattern)] + self._cached_compiled_files =\ + [x for x in self._excluded_compiled if not has_sep(x.pattern)] + self._cached_compiled_paths =\ + [x for x in self._excluded_compiled if has_sep(x.pattern)] return marked_count = [x for marked, x in self if marked] # If there is no item, the compiled Pattern will be '' and match everything! From f0d3dec517a2c3db81b6c9344ddf89b80d6ab385 Mon Sep 17 00:00:00 2001 From: glubsy Date: Tue, 29 Dec 2020 16:07:55 +0100 Subject: [PATCH 17/21] Fix exclude tests --- core/tests/exclude_test.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/core/tests/exclude_test.py b/core/tests/exclude_test.py index 8bfc8cc7..de5e46c5 100644 --- a/core/tests/exclude_test.py +++ b/core/tests/exclude_test.py @@ -11,6 +11,7 @@ from xml.etree import ElementTree as ET # from pytest import raises from hscommon.testutil import eq_ +from hscommon.plat import ISWINDOWS from .base import DupeGuru from ..exclude import ExcludeList, ExcludeDict, default_regexes, AlreadyThereException @@ -246,7 +247,10 @@ class TestCaseCompiledList(): def test_compiled_files(self): # is path separator checked properly to yield the output - regex1 = r"test/one/sub" + if ISWINDOWS: + regex1 = r"test\\one\\sub" + else: + regex1 = r"test/one/sub" self.e_separate.add(regex1) self.e_separate.mark(regex1) self.e_union.add(regex1) From b5f59d27c92cbaac41d2c421aab9295cf3321f57 Mon Sep 17 00:00:00 2001 From: glubsy Date: Tue, 29 Dec 2020 16:31:03 +0100 Subject: [PATCH 18/21] Brighten up validation color Dark green lacks contrast against black foreground font --- qt/exclude_list_dialog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qt/exclude_list_dialog.py b/qt/exclude_list_dialog.py index 3c23b83f..a4aa4af4 100644 --- a/qt/exclude_list_dialog.py +++ b/qt/exclude_list_dialog.py @@ -132,7 +132,7 @@ class ExcludeListDialog(QDialog): match = compiled.match(input_text) if match: self._input_styled = True - self.inputLine.setStyleSheet("background-color: rgb(10, 120, 10);") + self.inputLine.setStyleSheet("background-color: rgb(10, 220, 10);") else: self.reset_input_style() From b76e86686a75897db912bfd4d79e46824bc874e0 Mon Sep 17 00:00:00 2001 From: glubsy Date: Tue, 29 Dec 2020 16:41:34 +0100 Subject: [PATCH 19/21] Tweak green color on exclude table --- qt/exclude_list_dialog.py | 2 +- qt/exclude_list_table.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/qt/exclude_list_dialog.py b/qt/exclude_list_dialog.py index a4aa4af4..dd7d2002 100644 --- a/qt/exclude_list_dialog.py +++ b/qt/exclude_list_dialog.py @@ -132,7 +132,7 @@ class ExcludeListDialog(QDialog): match = compiled.match(input_text) if match: self._input_styled = True - self.inputLine.setStyleSheet("background-color: rgb(10, 220, 10);") + self.inputLine.setStyleSheet("background-color: rgb(10, 200, 10);") else: self.reset_input_style() diff --git a/qt/exclude_list_table.py b/qt/exclude_list_table.py index 7c6a3b63..3521c7a1 100644 --- a/qt/exclude_list_table.py +++ b/qt/exclude_list_table.py @@ -41,7 +41,7 @@ class ExcludeListTable(Table): return QFont(self.view.font()) elif role == Qt.BackgroundRole and column.name == "regex": if row.highlight: - return QColor(10, 120, 10) # green + return QColor(10, 200, 10) # green elif role == Qt.EditRole: if column.name == "regex": return row.data[column.name] From 39d353d073b03e06b003c23bc52b53d6c104c484 Mon Sep 17 00:00:00 2001 From: glubsy Date: Tue, 29 Dec 2020 18:28:30 +0100 Subject: [PATCH 20/21] Add comment about Win7 bug * For some reason the table view doesn't update properly after the test string button is clicked nor when the input field is edited * The table rows only get repainted the rows properly after receiving a mouse click event * This doesn't happen on Linux --- qt/exclude_list_dialog.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/qt/exclude_list_dialog.py b/qt/exclude_list_dialog.py index dd7d2002..f5ecf5d3 100644 --- a/qt/exclude_list_dialog.py +++ b/qt/exclude_list_dialog.py @@ -118,6 +118,8 @@ class ExcludeListDialog(QDialog): return # if at least one row matched, we know whether table is highlighted or not self._row_matched = self.model.test_string(input_text) + # FIXME There is a bug on Windows (7) where the table rows don't get + # repainted until the table receives a mouse click event. self.tableView.update() input_regex = self.inputLine.text() @@ -160,5 +162,6 @@ Example: if you want to filter out .PNG files from the "My Pictures" directory o .*My\\sPictures\\\\.*\\.png

    \ You can test the regular expression with the test string feature by pasting a fake path in it:
    \ C:\\\\User\\My Pictures\\test.png

    -Matching regular expressions will be highlighted.

    +Matching regular expressions will be highlighted.
    \ +If there is at least one highlight, the path tested will be ignored during scans.

    \ Directories and files starting with a period '.' are filtered out by default.

    """)) From a93bd3aeee4f05718bca7ab4f70f5aa797216996 Mon Sep 17 00:00:00 2001 From: glubsy Date: Tue, 29 Dec 2020 18:52:22 +0100 Subject: [PATCH 21/21] Add missing translation hooks --- qt/exclude_list_dialog.py | 4 ++-- qt/exclude_list_table.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/qt/exclude_list_dialog.py b/qt/exclude_list_dialog.py index f5ecf5d3..425d4eba 100644 --- a/qt/exclude_list_dialog.py +++ b/qt/exclude_list_dialog.py @@ -80,9 +80,9 @@ class ExcludeListDialog(QDialog): gridlayout.addWidget(self.testLine, 6, 0) layout.addLayout(gridlayout) - self.inputLine.setPlaceholderText("Type a python regular expression here...") + self.inputLine.setPlaceholderText(tr("Type a python regular expression here...")) self.inputLine.setFocus() - self.testLine.setPlaceholderText("Type a file system path or filename here...") + self.testLine.setPlaceholderText(tr("Type a file system path or filename here...")) self.testLine.setClearButtonEnabled(True) # --- model --> view diff --git a/qt/exclude_list_table.py b/qt/exclude_list_table.py index 3521c7a1..b58e2579 100644 --- a/qt/exclude_list_table.py +++ b/qt/exclude_list_table.py @@ -7,6 +7,8 @@ from PyQt5.QtGui import QFont, QFontMetrics, QIcon, QColor from qtlib.column import Column from qtlib.table import Table +from hscommon.trans import trget +tr = trget("ui") class ExcludeListTable(Table): @@ -31,7 +33,7 @@ class ExcludeListTable(Table): if role == Qt.CheckStateRole and row.markable: return Qt.Checked if row.marked else Qt.Unchecked if role == Qt.ToolTipRole and not row.markable: - return "Compilation error: " + row.get_cell_value("error") + return tr("Compilation error: ") + row.get_cell_value("error") if role == Qt.DecorationRole and not row.markable: return QIcon.fromTheme("dialog-error", QIcon(":/error")) return None