1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2026-01-22 14:41:39 +00:00

Highlight rows when testing regex string

* Add testing feature to Exclusion dialog to allow users to test regexes against an arbitrary string.
* Fixed test suites.
* Improve comments and help dialog box.
This commit is contained in:
glubsy
2020-09-01 23:02:58 +02:00
parent 584e9c92d9
commit ea11a566af
9 changed files with 216 additions and 164 deletions

View File

@@ -59,8 +59,6 @@ class Directories:
# {path: state}
self.states = {}
self._exclude_list = exclude_list
if exclude_list is not None:
exclude_list._combined_regex = False # TODO make a setter
def __contains__(self, path):
for p in self._dirs:

View File

@@ -20,7 +20,7 @@ default_regexes = [r"^thumbs\.db$", # Obsolete after WindowsXP
r"^\$Recycle\.Bin$", # Windows
r"^\..*" # Hidden files
]
# These are too agressive
# These are too broad
forbidden_regexes = [r".*", r"\/.*", r".*\/.*", r".*\..*"]
@@ -53,20 +53,21 @@ class AlreadyThereException(Exception):
class ExcludeList(Markable):
"""Exclude list of regular expression strings to filter out directories
and files that we want to avoid scanning.
The list() class allows us to preserve item order without too much hassle.
The downside is we have to compare strings every time we look for an item in the list
since we use regex strings as keys.
[regex:str, compilable:bool, error:Exception, compiled:Pattern])
If combined_regex is True, the compiled regexes will be combined into one Pattern
instead of returned as separate Patterns.
"""
"""A list of lists holding regular expression strings and the compiled re.Pattern"""
# Used to filter out directories and files that we would rather avoid scanning.
# The list() class allows us to preserve item order without too much hassle.
# The downside is we have to compare strings every time we look for an item in the list
# since we use regex strings as keys.
# If _use_union is True, the compiled regexes will be combined into one single
# Pattern instead of separate Patterns which may or may not give better
# performance compared to looping through each Pattern individually.
# ---Override
def __init__(self, combined_regex=False):
def __init__(self, union_regex=True):
Markable.__init__(self)
self._use_combined = combined_regex
self._use_union = union_regex
# list([str regex, bool iscompilable, re.error exception, Pattern compiled], ...)
self._excluded = []
self._excluded_compiled = set()
self._dirty = True
@@ -85,6 +86,7 @@ class ExcludeList(Markable):
return len(self._excluded)
def __getitem__(self, key):
"""Returns the list item corresponding to key."""
for item in self._excluded:
if item[0] == key:
return item
@@ -98,6 +100,10 @@ class ExcludeList(Markable):
# TODO if necessary
pass
def get_compiled(self, key):
"""Returns the (precompiled) Pattern for key"""
return self.__getitem__(key)[3]
def is_markable(self, regex):
return self._is_markable(regex)
@@ -106,7 +112,7 @@ class ExcludeList(Markable):
for item in self._excluded:
if item[0] == regex:
return item[1]
return False # should not be necessary, regex SHOULD be in there
return False # should not be necessary, the regex SHOULD be in there
def _did_mark(self, regex):
self._add_compiled(regex)
@@ -116,7 +122,7 @@ class ExcludeList(Markable):
def _add_compiled(self, regex):
self._dirty = True
if self._use_combined:
if self._use_union:
return
for item in self._excluded:
# FIXME probably faster to just rebuild the set from the compiled instead of comparing strings
@@ -127,7 +133,7 @@ class ExcludeList(Markable):
def _remove_compiled(self, regex):
self._dirty = True
if self._use_combined:
if self._use_union:
return
for item in self._excluded_compiled:
if regex in item.pattern:
@@ -153,50 +159,51 @@ class ExcludeList(Markable):
return True, None, compiled
def error(self, regex):
"""Return the compilation error Exception for regex. It should have a "msg" attr."""
"""Return the compilation error Exception for regex.
It should have a "msg" attr."""
for item in self._excluded:
if item[0] == regex:
return item[2]
def build_compiled_caches(self, combined=False):
if not combined:
def build_compiled_caches(self, union=False):
if not union:
self._cached_compiled_files =\
[x for x in self._excluded_compiled if sep not in x.pattern]
self._cached_compiled_paths =\
[x for x in self._excluded_compiled if sep in x.pattern]
return
# HACK returned as a tuple to get a free iterator to keep interface the same
# regardless of whether the client asked for combined or not
marked_count = [x for marked, x in self if marked]
# If there is no item, the compiled Pattern will be '' and match everything!
if not marked_count:
self._cached_compiled_combined_all = []
self._cached_compiled_combined_files = []
self._cached_compiled_combined_paths = []
self._cached_compiled_union_all = []
self._cached_compiled_union_files = []
self._cached_compiled_union_paths = []
else:
self._cached_compiled_combined_all =\
# HACK returned as a tuple to get a free iterator and keep interface
# the same regardless of whether the client asked for union or not
self._cached_compiled_union_all =\
(re.compile('|'.join(marked_count)),)
files_marked = [x for x in marked_count if sep not in x]
if not files_marked:
self._cached_compiled_combined_files = tuple()
self._cached_compiled_union_files = tuple()
else:
self._cached_compiled_combined_files =\
self._cached_compiled_union_files =\
(re.compile('|'.join(files_marked)),)
paths_marked = [x for x in marked_count if sep in x]
if not paths_marked:
self._cached_compiled_combined_paths = tuple()
self._cached_compiled_union_paths = tuple()
else:
self._cached_compiled_combined_paths =\
self._cached_compiled_union_paths =\
(re.compile('|'.join(paths_marked)),)
@property
def compiled(self):
"""Should be used by other classes to retrieve the up-to-date list of patterns."""
if self._use_combined:
if self._use_union:
if self._dirty:
self.build_compiled_caches(True)
self._dirty = False
return self._cached_compiled_combined_all
return self._cached_compiled_union_all
return self._excluded_compiled
@property
@@ -204,23 +211,26 @@ class ExcludeList(Markable):
"""When matching against filenames only, we probably won't be seeing any
directory separator, so we filter out regexes with os.sep in them.
The interface should be expected to be a generator, even if it returns only
one item (one Pattern in the combined case)."""
one item (one Pattern in the union case)."""
if self._dirty:
self.build_compiled_caches(True if self._use_combined else False)
self.build_compiled_caches(True if self._use_union else False)
self._dirty = False
return self._cached_compiled_combined_files if self._use_combined else self._cached_compiled_files
return self._cached_compiled_union_files if self._use_union\
else self._cached_compiled_files
@property
def compiled_paths(self):
"""Returns patterns with only separators in them, for more precise filtering."""
if self._dirty:
self.build_compiled_caches(True if self._use_combined else False)
self.build_compiled_caches(True if self._use_union else False)
self._dirty = False
return self._cached_compiled_combined_paths if self._use_combined else self._cached_compiled_paths
return self._cached_compiled_union_paths if self._use_union\
else self._cached_compiled_paths
# ---Public
def add(self, regex, forced=False):
"""This interface should throw exceptions if there is an error during regex compilation"""
"""This interface should throw exceptions if there is an error during
regex compilation"""
if self.isExcluded(regex):
# This exception should never be ignored
raise AlreadyThereException()
@@ -229,7 +239,8 @@ class ExcludeList(Markable):
iscompilable, exception, compiled = self.compile_re(regex)
if not iscompilable and not forced:
# This exception can be ignored, but taken into account to avoid adding to compiled set
# This exception can be ignored, but taken into account
# to avoid adding to compiled set
raise exception
else:
self._do_add(regex, iscompilable, exception, compiled)
@@ -249,10 +260,6 @@ class ExcludeList(Markable):
return True
return False
def clear(self):
"""Not used and needs refactoring"""
self._excluded = []
def remove(self, regex):
for item in self._excluded:
if item[0] == regex:
@@ -260,7 +267,6 @@ class ExcludeList(Markable):
self._remove_compiled(regex)
def rename(self, regex, newregex):
# if regex not in self._excluded: return
if regex == newregex:
return
found = False
@@ -318,7 +324,8 @@ class ExcludeList(Markable):
# "forced" avoids compilation exceptions and adds anyway
self.add(regex_string, forced=True)
except AlreadyThereException:
logging.error(f"Regex \"{regex_string}\" loaded from XML was already present in the list.")
logging.error(f"Regex \"{regex_string}\" \
loaded from XML was already present in the list.")
continue
if exclude_item.get("marked") == "y":
marked.add(regex_string)
@@ -328,9 +335,7 @@ class ExcludeList(Markable):
def save_to_xml(self, outfile):
"""Create a XML file that can be used by load_from_xml.
outfile can be a file object or a filename.
"""
outfile can be a file object or a filename."""
root = ET.Element("exclude_list")
# reversed in order to keep order of entries when reloading from xml later
for item in reversed(self._excluded):
@@ -343,15 +348,23 @@ class ExcludeList(Markable):
class ExcludeDict(ExcludeList):
"""Version implemented around a dictionary instead of a list, which implies
to keep the index of each string-key as its sub-element and keep it updated
whenever insert/remove is done."""
"""Exclusion list holding a set of regular expressions as keys, the compiled
Pattern, compilation error and compilable boolean as values."""
# Implemntation around a dictionary instead of a list, which implies
# to keep the index of each string-key as its sub-element and keep it updated
# whenever insert/remove is done.
def __init__(self, combined_regex=False):
def __init__(self, union_regex=False):
Markable.__init__(self)
self._use_combined = combined_regex
# { "regex": { "index": int, "compilable": bool, "error": str, "compiled": Pattern or None}}
# Note: "compilable" key should only be updated on add / rename
self._use_union = union_regex
# { "regex string":
# {
# "index": int,
# "compilable": bool,
# "error": str,
# "compiled": Pattern or None
# }
# }
self._excluded = {}
self._excluded_compiled = set()
self._dirty = True
@@ -361,6 +374,14 @@ class ExcludeDict(ExcludeList):
for regex in ordered_keys(self._excluded):
yield self.is_marked(regex), regex
def __getitem__(self, key):
"""Returns the dict item correponding to key"""
return self._excluded.__getitem__(key)
def get_compiled(self, key):
"""Returns the compiled item for key"""
return self.__getitem__(key).get("compiled")
def is_markable(self, regex):
return self._is_markable(regex)
@@ -373,12 +394,12 @@ class ExcludeDict(ExcludeList):
def _add_compiled(self, regex):
self._dirty = True
if self._use_combined:
if self._use_union:
return
try:
self._excluded_compiled.add(self._excluded[regex]["compiled"])
except Exception as e:
print(f"Exception while adding regex {regex} to compiled set: {e}")
logging.warning(f"Exception while adding regex {regex} to compiled set: {e}")
return
def is_compilable(self, regex):
@@ -391,7 +412,8 @@ class ExcludeDict(ExcludeList):
# ---Public
def _do_add(self, regex, iscompilable, exception, compiled):
# We always insert at the top, so index should be 0 and other indices should be pushed by one
# We always insert at the top, so index should be 0
# and other indices should be pushed by one
for value in self._excluded.values():
value["index"] += 1
self._excluded[regex] = {
@@ -406,10 +428,6 @@ class ExcludeDict(ExcludeList):
return True
return False
def clear(self):
"""Not used, need refactoring"""
self._excluded = {}
def remove(self, regex):
old_value = self._excluded.pop(regex)
# Bring down all indices which where above it

View File

@@ -7,13 +7,10 @@
# from hscommon.trans import tr
from .exclude_list_table import ExcludeListTable
import logging
class ExcludeListDialogCore:
# --- View interface
# show()
#
def __init__(self, app):
self.app = app
self.exclude_list = self.app.exclude_list # Markable from exclude.py
@@ -43,7 +40,7 @@ class ExcludeListDialogCore:
self.refresh()
return True
except Exception as e:
print(f"dupeGuru Warning: {e}")
logging.warning(f"Error while renaming regex to {newregex}: {e}")
return False
def add(self, regex):
@@ -54,5 +51,20 @@ class ExcludeListDialogCore:
self.exclude_list.mark(regex)
self.exclude_list_table.add(regex)
def test_string(self, test_string):
"""Sets property on row to highlight if its regex matches test_string supplied."""
matched = False
for row in self.exclude_list_table.rows:
if self.exclude_list.get_compiled(row.regex).match(test_string):
matched = True
row.highlight = True
else:
row.highlight = False
return matched
def reset_rows_highlight(self):
for row in self.exclude_list_table.rows:
row.highlight = False
def show(self):
self.view.show()

View File

@@ -31,8 +31,7 @@ class ExcludeListTable(GUITable, DupeGuruGUIObject):
# --- Virtual
def _do_add(self, regex):
"""(Virtual) Creates a new row, adds it in the table.
Returns ``(row, insert_index)``.
"""
Returns ``(row, insert_index)``."""
# Return index 0 to insert at the top
return ExcludeListRow(self, self.dialog.exclude_list.is_marked(regex), regex), 0
@@ -43,30 +42,18 @@ class ExcludeListTable(GUITable, DupeGuruGUIObject):
def add(self, regex):
row, insert_index = self._do_add(regex)
self.insert(insert_index, row)
# self.select([insert_index])
self.view.refresh()
def _fill(self):
for enabled, regex in self.dialog.exclude_list:
self.append(ExcludeListRow(self, enabled, regex))
# def remove(self):
# super().remove(super().selected_rows)
# def _update_selection(self):
# # rows = self.selected_rows
# # self.dialog._select_rows(list(map(attrgetter("_dupe"), rows)))
# self.dialog.remove_selected()
def refresh(self, refresh_view=True):
"""Override to avoid keeping previous selection in case of multiple rows
selected previously."""
self.cancel_edits()
del self[:]
self._fill()
# sd = self._sort_descriptor
# if sd is not None:
# super().sort_by(self, column_name=sd.column, desc=sd.desc)
if refresh_view:
self.view.refresh()
@@ -76,18 +63,14 @@ class ExcludeListRow(Row):
Row.__init__(self, table)
self._app = table.app
self._data = None
self.enabled_original = enabled
self.regex_original = regex
self.enabled = str(enabled)
self.regex = str(regex)
self.highlight = False
@property
def data(self):
def get_display_info(row):
return {"marked": row.enabled, "regex": row.regex}
if self._data is None:
self._data = get_display_info(self)
self._data = {"marked": self.enabled, "regex": self.regex}
return self._data
@property
@@ -113,10 +96,3 @@ class ExcludeListRow(Row):
return self._app.exclude_list.error(self.regex).msg
else:
return message # Exception object
# @property
# def regex(self):
# return self.regex
# @regex.setter
# def regex(self, value):
# self._app.exclude_list.add(self._regex, value)

View File

@@ -346,7 +346,7 @@ def test_default_path_state_override(tmpdir):
class TestExcludeList():
def setup_method(self, method):
self.d = Directories(exclude_list=ExcludeList(combined_regex=False))
self.d = Directories(exclude_list=ExcludeList(union_regex=False))
def get_files_and_expect_num_result(self, num_result):
"""Calls get_files(), get the filenames only, print for debugging.
@@ -523,14 +523,14 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
class TestExcludeDict(TestExcludeList):
def setup_method(self, method):
self.d = Directories(exclude_list=ExcludeDict(combined_regex=False))
self.d = Directories(exclude_list=ExcludeDict(union_regex=False))
class TestExcludeListCombined(TestExcludeList):
class TestExcludeListunion(TestExcludeList):
def setup_method(self, method):
self.d = Directories(exclude_list=ExcludeList(combined_regex=True))
self.d = Directories(exclude_list=ExcludeList(union_regex=True))
class TestExcludeDictCombined(TestExcludeList):
class TestExcludeDictunion(TestExcludeList):
def setup_method(self, method):
self.d = Directories(exclude_list=ExcludeDict(combined_regex=True))
self.d = Directories(exclude_list=ExcludeDict(union_regex=True))

View File

@@ -96,7 +96,7 @@ class TestCaseDictXMLLoading(TestCaseListXMLLoading):
class TestCaseListEmpty:
def setup_method(self, method):
self.app = DupeGuru()
self.app.exclude_list = ExcludeList()
self.app.exclude_list = ExcludeList(union_regex=False)
self.exclude_list = self.app.exclude_list
def test_add_mark_and_remove_regex(self):
@@ -216,29 +216,29 @@ class TestCaseDictEmpty(TestCaseListEmpty):
"""Same, but with dictionary implementation"""
def setup_method(self, method):
self.app = DupeGuru()
self.app.exclude_list = ExcludeDict()
self.app.exclude_list = ExcludeDict(union_regex=False)
self.exclude_list = self.app.exclude_list
def split_combined(pattern_object):
"""Returns list of strings for each combined pattern"""
def split_union(pattern_object):
"""Returns list of strings for each union pattern"""
return [x for x in pattern_object.pattern.split("|")]
class TestCaseCompiledList():
"""Test consistency between combined or not"""
"""Test consistency between union or and separate versions."""
def setup_method(self, method):
self.e_separate = ExcludeList(combined_regex=False)
self.e_separate = ExcludeList(union_regex=False)
self.e_separate.restore_defaults()
self.e_combined = ExcludeList(combined_regex=True)
self.e_combined.restore_defaults()
self.e_union = ExcludeList(union_regex=True)
self.e_union.restore_defaults()
def test_same_number_of_expressions(self):
# We only get one combined Pattern item in a tuple, which is made of however many parts
eq_(len(split_combined(self.e_combined.compiled[0])), len(default_regexes))
# We only get one union Pattern item in a tuple, which is made of however many parts
eq_(len(split_union(self.e_union.compiled[0])), len(default_regexes))
# We get as many as there are marked items
eq_(len(self.e_separate.compiled), len(default_regexes))
exprs = split_combined(self.e_combined.compiled[0])
exprs = split_union(self.e_union.compiled[0])
# We should have the same number and the same expressions
eq_(len(exprs), len(self.e_separate.compiled))
for expr in self.e_separate.compiled:
@@ -249,29 +249,30 @@ class TestCaseCompiledList():
regex1 = r"test/one/sub"
self.e_separate.add(regex1)
self.e_separate.mark(regex1)
self.e_combined.add(regex1)
self.e_combined.mark(regex1)
self.e_union.add(regex1)
self.e_union.mark(regex1)
separate_compiled_dirs = self.e_separate.compiled
separate_compiled_files = [x for x in self.e_separate.compiled_files]
# HACK we need to call compiled property FIRST to generate the cache
combined_compiled_dirs = self.e_combined.compiled
# print(f"type: {type(self.e_combined.compiled_files[0])}")
union_compiled_dirs = self.e_union.compiled
# print(f"type: {type(self.e_union.compiled_files[0])}")
# A generator returning only one item... ugh
combined_compiled_files = [x for x in self.e_combined.compiled_files][0]
print(f"compiled files: {combined_compiled_files}")
union_compiled_files = [x for x in self.e_union.compiled_files][0]
print(f"compiled files: {union_compiled_files}")
# Separate should give several plus the one added
eq_(len(separate_compiled_dirs), len(default_regexes) + 1)
# regex1 shouldn't be in the "files" version
eq_(len(separate_compiled_files), len(default_regexes))
# Only one Pattern returned, which when split should be however many + 1
eq_(len(split_combined(combined_compiled_dirs[0])), len(default_regexes) + 1)
eq_(len(split_union(union_compiled_dirs[0])), len(default_regexes) + 1)
# regex1 shouldn't be here either
eq_(len(split_combined(combined_compiled_files)), len(default_regexes))
eq_(len(split_union(union_compiled_files)), len(default_regexes))
class TestCaseCompiledDict(TestCaseCompiledList):
"""Test the dictionary version"""
def setup_method(self, method):
self.e_separate = ExcludeDict(combined_regex=False)
self.e_separate = ExcludeDict(union_regex=False)
self.e_separate.restore_defaults()
self.e_combined = ExcludeDict(combined_regex=True)
self.e_combined.restore_defaults()
self.e_union = ExcludeDict(union_regex=True)
self.e_union.restore_defaults()