1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2024-12-21 10:59:03 +00:00

Fix missing regexp after rename

* Doing a full match should be safer to avoid partial results which would result in overly aggressive filtering.
* Add new tests to test suite to cover this issue.
* Fixes #903.
This commit is contained in:
glubsy 2021-06-19 01:52:31 +02:00
parent ab8750eedb
commit a6f83ad3d7
7 changed files with 254 additions and 51 deletions

View File

@ -108,24 +108,9 @@ class Directories:
found_files = []
# print(f"len of files: {len(files)} {files}")
for f in files:
matched = False
for expr in self._exclude_list.compiled_files:
if expr.fullmatch(f):
logging.debug(f"{expr} matched {f}.")
matched = True
break
if not matched:
logging.debug(f"path {root + os.sep + f}")
for expr in self._exclude_list.compiled_paths:
if expr.fullmatch(root + os.sep + f):
print(f"{expr} matched {root}{os.sep}{f}.")
matched = True
break
if not matched:
logging.debug(f"Not filtering: {f}.")
found_files.append(fs.get_file(rootPath + f, fileclasses=fileclasses))
else:
logging.debug(f"Filtering: {f}")
if not self._exclude_list.is_excluded(root, f):
found_files.append(fs.get_file(rootPath + f,
fileclasses=fileclasses))
found_files = [f for f in found_files if f is not None]
# In some cases, directories can be considered as files by dupeGuru, which is
# why we have this line below. In fact, there only one case: Bundle files under

View File

@ -81,7 +81,7 @@ class ExcludeList(Markable):
yield self.is_marked(regex), regex
def __contains__(self, item):
return self.isExcluded(item)
return self.has_entry(item)
def __len__(self):
"""Returns the total number of regexes regardless of mark status."""
@ -173,7 +173,9 @@ class ExcludeList(Markable):
[x for x in self._excluded_compiled if not has_sep(x.pattern)]
self._cached_compiled_paths =\
[x for x in self._excluded_compiled if has_sep(x.pattern)]
self._dirty = False
return
marked_count = [x for marked, x in self if marked]
# If there is no item, the compiled Pattern will be '' and match everything!
if not marked_count:
@ -197,14 +199,14 @@ class ExcludeList(Markable):
else:
self._cached_compiled_union_paths =\
(re.compile('|'.join(paths_marked)),)
self._dirty = False
@property
def compiled(self):
"""Should be used by other classes to retrieve the up-to-date list of patterns."""
if self._use_union:
if self._dirty:
self.build_compiled_caches(True)
self._dirty = False
self.build_compiled_caches(self._use_union)
return self._cached_compiled_union_all
return self._excluded_compiled
@ -215,8 +217,7 @@ class ExcludeList(Markable):
The interface should be expected to be a generator, even if it returns only
one item (one Pattern in the union case)."""
if self._dirty:
self.build_compiled_caches(True if self._use_union else False)
self._dirty = False
self.build_compiled_caches(self._use_union)
return self._cached_compiled_union_files if self._use_union\
else self._cached_compiled_files
@ -224,8 +225,7 @@ class ExcludeList(Markable):
def compiled_paths(self):
"""Returns patterns with only separators in them, for more precise filtering."""
if self._dirty:
self.build_compiled_caches(True if self._use_union else False)
self._dirty = False
self.build_compiled_caches(self._use_union)
return self._cached_compiled_union_paths if self._use_union\
else self._cached_compiled_paths
@ -233,7 +233,7 @@ class ExcludeList(Markable):
def add(self, regex, forced=False):
"""This interface should throw exceptions if there is an error during
regex compilation"""
if self.isExcluded(regex):
if self.has_entry(regex):
# This exception should never be ignored
raise AlreadyThereException()
if regex in forbidden_regexes:
@ -256,12 +256,27 @@ class ExcludeList(Markable):
"""Returns the number of marked regexes only."""
return len([x for marked, x in self if marked])
def isExcluded(self, regex):
def has_entry(self, regex):
for item in self._excluded:
if regex == item[0]:
return True
return False
def is_excluded(self, dirname, filename):
"""Return True if the file or the absolute path to file is supposed to be
filtered out, False otherwise."""
matched = False
for expr in self.compiled_files:
if expr.fullmatch(filename):
matched = True
break
if not matched:
for expr in self.compiled_paths:
if expr.fullmatch(dirname + sep + filename):
matched = True
break
return matched
def remove(self, regex):
for item in self._excluded:
if item[0] == regex:
@ -286,9 +301,11 @@ class ExcludeList(Markable):
break
if not found:
return
if is_compilable and was_marked:
# Not marked by default when added, add it back
self.mark(newregex)
if is_compilable:
self._add_compiled(newregex)
if was_marked:
# Not marked by default when added, add it back
self.mark(newregex)
# def change_index(self, regex, new_index):
# """Internal list must be a list, not dict."""
@ -300,7 +317,7 @@ class ExcludeList(Markable):
if regex not in default_regexes:
self.unmark(regex)
for default_regex in default_regexes:
if not self.isExcluded(default_regex):
if not self.has_entry(default_regex):
self.add(default_regex)
self.mark(default_regex)
@ -399,9 +416,9 @@ class ExcludeDict(ExcludeList):
if self._use_union:
return
try:
self._excluded_compiled.add(self._excluded[regex]["compiled"])
self._excluded_compiled.add(self._excluded.get(regex).get("compiled"))
except Exception as e:
logging.warning(f"Exception while adding regex {regex} to compiled set: {e}")
logging.error(f"Exception while adding regex {regex} to compiled set: {e}")
return
def is_compilable(self, regex):
@ -425,7 +442,7 @@ class ExcludeDict(ExcludeList):
"compiled": compiled
}
def isExcluded(self, regex):
def has_entry(self, regex):
if regex in self._excluded.keys():
return True
return False
@ -451,14 +468,16 @@ class ExcludeDict(ExcludeList):
previous = self._excluded.pop(regex)
iscompilable, error, compiled = self.compile_re(newregex)
self._excluded[newregex] = {
"index": previous["index"],
"index": previous.get('index'),
"compilable": iscompilable,
"error": error,
"compiled": compiled
}
self._remove_compiled(regex)
if was_marked and iscompilable:
self.mark(newregex)
if iscompilable:
self._add_compiled(newregex)
if was_marked:
self.mark(newregex)
def save_to_xml(self, outfile):
"""Create a XML file that can be used by load_from_xml.
@ -492,8 +511,8 @@ def ordered_keys(_dict):
if ISWINDOWS:
def has_sep(x):
return '\\' + sep in x
def has_sep(regexp):
return '\\' + sep in regexp
else:
def has_sep(x):
return sep in x
def has_sep(regexp):
return sep in regexp

View File

@ -7,6 +7,8 @@
# from hscommon.trans import tr
from .exclude_list_table import ExcludeListTable
from core.exclude import has_sep
from os import sep
import logging
@ -30,9 +32,10 @@ class ExcludeListDialogCore:
self.refresh()
def rename_selected(self, newregex):
"""Renames the selected regex to ``newregex``.
If there's more than one selected row, the first one is used.
"""Rename the selected regex to ``newregex``.
If there is more than one selected row, the first one is used.
:param str newregex: The regex to rename the row's regex to.
:return bool: true if success, false if error.
"""
try:
r = self.exclude_list_table.selected_rows[0]
@ -52,17 +55,37 @@ class ExcludeListDialogCore:
self.exclude_list_table.add(regex)
def test_string(self, test_string):
"""Sets property on row to highlight if its regex matches test_string supplied."""
"""Set the highlight property on each row when its regex matches the
test_string supplied. Return True if any row matched."""
matched = False
for row in self.exclude_list_table.rows:
compiled_regex = self.exclude_list.get_compiled(row.regex)
if compiled_regex and compiled_regex.fullmatch(test_string):
matched = True
if self.is_match(test_string, compiled_regex):
row.highlight = True
matched = True
else:
row.highlight = False
return matched
def is_match(self, test_string, compiled_regex):
# This method is like an inverted version of ExcludeList.is_excluded()
if not compiled_regex:
return False
matched = False
# Test only the filename portion of the path
if not has_sep(compiled_regex.pattern) and sep in test_string:
filename = test_string.rsplit(sep, 1)[1]
if compiled_regex.fullmatch(filename):
matched = True
return matched
# Test the entire path + filename
if compiled_regex.fullmatch(test_string):
matched = True
return matched
def reset_rows_highlight(self):
for row in self.exclude_list_table.rows:
row.highlight = False

View File

@ -36,7 +36,7 @@ class ExcludeListTable(GUITable, DupeGuruGUIObject):
return ExcludeListRow(self, self.dialog.exclude_list.is_marked(regex), regex), 0
def _do_delete(self):
self.dalog.exclude_list.remove(self.selected_row.regex)
self.dialog.exclude_list.remove(self.selected_row.regex)
# --- Override
def add(self, regex):

View File

@ -473,6 +473,24 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
assert "file_ending_with_subdir" not in files
assert "file_which_shouldnt_match" in files
# This should match the directory only
regex6 = r".*/subdir.*"
if ISWINDOWS:
regex6 = r".*\\.*subdir.*"
self.d._exclude_list.rename(regex5, regex6)
self.d._exclude_list.remove(regex1)
assert regex1 not in self.d._exclude_list
assert regex5 not in self.d._exclude_list
assert self.d._exclude_list.error(regex6) is None
# This still should not be affected
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
files = self.get_files_and_expect_num_result(5)
# These files are under the "/subdir" directory
assert "somesubdirfile.png" not in files
assert "unwanted_subdirfile.gif" not in files
# This file under "subdar" directory should not be filtered out
assert "file_ending_with_subdir" in files
def test_japanese_unicode(self, tmpdir):
p1 = Path(str(tmpdir))
p1["$Recycle.Bin"].mkdir()

View File

@ -188,6 +188,28 @@ class TestCaseListEmpty:
self.exclude_list.rename(regex_renamed_compilable, regex_compilable)
eq_(self.exclude_list.is_marked(regex_compilable), True)
def test_rename_regex_file_to_path(self):
regex = r".*/one.*"
if ISWINDOWS:
regex = r".*\\one.*"
regex2 = r".*one.*"
self.exclude_list.add(regex)
self.exclude_list.mark(regex)
compiled_re = [x.pattern for x in self.exclude_list._excluded_compiled]
files_re = [x.pattern for x in self.exclude_list.compiled_files]
paths_re = [x.pattern for x in self.exclude_list.compiled_paths]
assert regex in compiled_re
assert regex not in files_re
assert regex in paths_re
self.exclude_list.rename(regex, regex2)
compiled_re = [x.pattern for x in self.exclude_list._excluded_compiled]
files_re = [x.pattern for x in self.exclude_list.compiled_files]
paths_re = [x.pattern for x in self.exclude_list.compiled_paths]
assert regex not in compiled_re
assert regex2 in compiled_re
assert regex2 in files_re
assert regex2 not in paths_re
def test_restore_default(self):
"""Only unmark previously added regexes and mark the pre-defined ones"""
regex = r"one"
@ -213,6 +235,73 @@ class TestCaseListEmpty:
eq_(len(default_regexes), len(self.exclude_list.compiled))
class TestCaseListEmptyUnion(TestCaseListEmpty):
"""Same but with union regex"""
def setup_method(self, method):
self.app = DupeGuru()
self.app.exclude_list = ExcludeList(union_regex=True)
self.exclude_list = self.app.exclude_list
def test_add_mark_and_remove_regex(self):
regex1 = r"one"
regex2 = r"two"
self.exclude_list.add(regex1)
assert(regex1 in self.exclude_list)
self.exclude_list.add(regex2)
self.exclude_list.mark(regex1)
self.exclude_list.mark(regex2)
eq_(len(self.exclude_list), 2)
eq_(len(self.exclude_list.compiled), 1)
compiled_files = [x for x in self.exclude_list.compiled_files]
eq_(len(compiled_files), 1) # Two patterns joined together into one
assert "|" in compiled_files[0].pattern
self.exclude_list.remove(regex2)
assert(regex2 not in self.exclude_list)
eq_(len(self.exclude_list), 1)
def test_rename_regex_file_to_path(self):
regex = r".*/one.*"
if ISWINDOWS:
regex = r".*\\one.*"
regex2 = r".*one.*"
self.exclude_list.add(regex)
self.exclude_list.mark(regex)
eq_(len([x for x in self.exclude_list]), 1)
compiled_re = [x.pattern for x in self.exclude_list.compiled]
files_re = [x.pattern for x in self.exclude_list.compiled_files]
paths_re = [x.pattern for x in self.exclude_list.compiled_paths]
assert regex in compiled_re
assert regex not in files_re
assert regex in paths_re
self.exclude_list.rename(regex, regex2)
eq_(len([x for x in self.exclude_list]), 1)
compiled_re = [x.pattern for x in self.exclude_list.compiled]
files_re = [x.pattern for x in self.exclude_list.compiled_files]
paths_re = [x.pattern for x in self.exclude_list.compiled_paths]
assert regex not in compiled_re
assert regex2 in compiled_re
assert regex2 in files_re
assert regex2 not in paths_re
def test_restore_default(self):
"""Only unmark previously added regexes and mark the pre-defined ones"""
regex = r"one"
self.exclude_list.add(regex)
self.exclude_list.mark(regex)
self.exclude_list.restore_defaults()
eq_(len(default_regexes), self.exclude_list.marked_count)
# added regex shouldn't be marked
eq_(self.exclude_list.is_marked(regex), False)
# added regex shouldn't be in compiled list either
compiled = [x for x in self.exclude_list.compiled]
assert regex not in compiled
# Need to escape both to get the same strings after compilation
compiled_escaped = set([x.encode('unicode-escape').decode() for x in compiled[0].pattern.split("|")])
default_escaped = set([x.encode('unicode-escape').decode() for x in default_regexes])
assert compiled_escaped == default_escaped
eq_(len(default_regexes), len(compiled[0].pattern.split("|")))
class TestCaseDictEmpty(TestCaseListEmpty):
"""Same, but with dictionary implementation"""
def setup_method(self, method):
@ -221,6 +310,73 @@ class TestCaseDictEmpty(TestCaseListEmpty):
self.exclude_list = self.app.exclude_list
class TestCaseDictEmptyUnion(TestCaseDictEmpty):
"""Same, but with union regex"""
def setup_method(self, method):
self.app = DupeGuru()
self.app.exclude_list = ExcludeDict(union_regex=True)
self.exclude_list = self.app.exclude_list
def test_add_mark_and_remove_regex(self):
regex1 = r"one"
regex2 = r"two"
self.exclude_list.add(regex1)
assert(regex1 in self.exclude_list)
self.exclude_list.add(regex2)
self.exclude_list.mark(regex1)
self.exclude_list.mark(regex2)
eq_(len(self.exclude_list), 2)
eq_(len(self.exclude_list.compiled), 1)
compiled_files = [x for x in self.exclude_list.compiled_files]
# two patterns joined into one
eq_(len(compiled_files), 1)
self.exclude_list.remove(regex2)
assert(regex2 not in self.exclude_list)
eq_(len(self.exclude_list), 1)
def test_rename_regex_file_to_path(self):
regex = r".*/one.*"
if ISWINDOWS:
regex = r".*\\one.*"
regex2 = r".*one.*"
self.exclude_list.add(regex)
self.exclude_list.mark(regex)
marked_re = [x for marked, x in self.exclude_list if marked]
eq_(len(marked_re), 1)
compiled_re = [x.pattern for x in self.exclude_list.compiled]
files_re = [x.pattern for x in self.exclude_list.compiled_files]
paths_re = [x.pattern for x in self.exclude_list.compiled_paths]
assert regex in compiled_re
assert regex not in files_re
assert regex in paths_re
self.exclude_list.rename(regex, regex2)
compiled_re = [x.pattern for x in self.exclude_list.compiled]
files_re = [x.pattern for x in self.exclude_list.compiled_files]
paths_re = [x.pattern for x in self.exclude_list.compiled_paths]
assert regex not in compiled_re
assert regex2 in compiled_re
assert regex2 in files_re
assert regex2 not in paths_re
def test_restore_default(self):
"""Only unmark previously added regexes and mark the pre-defined ones"""
regex = r"one"
self.exclude_list.add(regex)
self.exclude_list.mark(regex)
self.exclude_list.restore_defaults()
eq_(len(default_regexes), self.exclude_list.marked_count)
# added regex shouldn't be marked
eq_(self.exclude_list.is_marked(regex), False)
# added regex shouldn't be in compiled list either
compiled = [x for x in self.exclude_list.compiled]
assert regex not in compiled
# Need to escape both to get the same strings after compilation
compiled_escaped = set([x.encode('unicode-escape').decode() for x in compiled[0].pattern.split("|")])
default_escaped = set([x.encode('unicode-escape').decode() for x in default_regexes])
assert compiled_escaped == default_escaped
eq_(len(default_regexes), len(compiled[0].pattern.split("|")))
def split_union(pattern_object):
"""Returns list of strings for each union pattern"""
return [x for x in pattern_object.pattern.split("|")]

View File

@ -116,30 +116,32 @@ class ExcludeListDialog(QDialog):
if not input_text:
self.reset_input_style()
return
# if at least one row matched, we know whether table is highlighted or not
# If at least one row matched, we know whether table is highlighted or not
self._row_matched = self.model.test_string(input_text)
self.table.refresh()
# Test the string currently in the input text box as well
input_regex = self.inputLine.text()
if not input_regex:
self.reset_input_style()
return
compiled = None
try:
compiled = re.compile(input_regex)
except re.error:
self.reset_input_style()
return
if compiled.fullmatch(input_text):
self._input_styled = True
if self.model.is_match(input_text, compiled):
self.inputLine.setStyleSheet("background-color: rgb(10, 200, 10);")
self._input_styled = True
else:
self.reset_input_style()
def reset_input_style(self):
"""Reset regex input line background"""
if self._input_styled:
self._input_styled = False
self.inputLine.setStyleSheet(self.styleSheet())
self._input_styled = False
def reset_table_style(self):
if self._row_matched: