mirror of
https://github.com/arsenetar/dupeguru.git
synced 2024-12-21 10:59:03 +00:00
Fix missing regexp after rename
* Doing a full match should be safer to avoid partial results which would result in overly aggressive filtering. * Add new tests to test suite to cover this issue. * Fixes #903.
This commit is contained in:
parent
ab8750eedb
commit
a6f83ad3d7
@ -108,24 +108,9 @@ class Directories:
|
||||
found_files = []
|
||||
# print(f"len of files: {len(files)} {files}")
|
||||
for f in files:
|
||||
matched = False
|
||||
for expr in self._exclude_list.compiled_files:
|
||||
if expr.fullmatch(f):
|
||||
logging.debug(f"{expr} matched {f}.")
|
||||
matched = True
|
||||
break
|
||||
if not matched:
|
||||
logging.debug(f"path {root + os.sep + f}")
|
||||
for expr in self._exclude_list.compiled_paths:
|
||||
if expr.fullmatch(root + os.sep + f):
|
||||
print(f"{expr} matched {root}{os.sep}{f}.")
|
||||
matched = True
|
||||
break
|
||||
if not matched:
|
||||
logging.debug(f"Not filtering: {f}.")
|
||||
found_files.append(fs.get_file(rootPath + f, fileclasses=fileclasses))
|
||||
else:
|
||||
logging.debug(f"Filtering: {f}")
|
||||
if not self._exclude_list.is_excluded(root, f):
|
||||
found_files.append(fs.get_file(rootPath + f,
|
||||
fileclasses=fileclasses))
|
||||
found_files = [f for f in found_files if f is not None]
|
||||
# In some cases, directories can be considered as files by dupeGuru, which is
|
||||
# why we have this line below. In fact, there only one case: Bundle files under
|
||||
|
@ -81,7 +81,7 @@ class ExcludeList(Markable):
|
||||
yield self.is_marked(regex), regex
|
||||
|
||||
def __contains__(self, item):
|
||||
return self.isExcluded(item)
|
||||
return self.has_entry(item)
|
||||
|
||||
def __len__(self):
|
||||
"""Returns the total number of regexes regardless of mark status."""
|
||||
@ -173,7 +173,9 @@ class ExcludeList(Markable):
|
||||
[x for x in self._excluded_compiled if not has_sep(x.pattern)]
|
||||
self._cached_compiled_paths =\
|
||||
[x for x in self._excluded_compiled if has_sep(x.pattern)]
|
||||
self._dirty = False
|
||||
return
|
||||
|
||||
marked_count = [x for marked, x in self if marked]
|
||||
# If there is no item, the compiled Pattern will be '' and match everything!
|
||||
if not marked_count:
|
||||
@ -197,14 +199,14 @@ class ExcludeList(Markable):
|
||||
else:
|
||||
self._cached_compiled_union_paths =\
|
||||
(re.compile('|'.join(paths_marked)),)
|
||||
self._dirty = False
|
||||
|
||||
@property
|
||||
def compiled(self):
|
||||
"""Should be used by other classes to retrieve the up-to-date list of patterns."""
|
||||
if self._use_union:
|
||||
if self._dirty:
|
||||
self.build_compiled_caches(True)
|
||||
self._dirty = False
|
||||
self.build_compiled_caches(self._use_union)
|
||||
return self._cached_compiled_union_all
|
||||
return self._excluded_compiled
|
||||
|
||||
@ -215,8 +217,7 @@ class ExcludeList(Markable):
|
||||
The interface should be expected to be a generator, even if it returns only
|
||||
one item (one Pattern in the union case)."""
|
||||
if self._dirty:
|
||||
self.build_compiled_caches(True if self._use_union else False)
|
||||
self._dirty = False
|
||||
self.build_compiled_caches(self._use_union)
|
||||
return self._cached_compiled_union_files if self._use_union\
|
||||
else self._cached_compiled_files
|
||||
|
||||
@ -224,8 +225,7 @@ class ExcludeList(Markable):
|
||||
def compiled_paths(self):
|
||||
"""Returns patterns with only separators in them, for more precise filtering."""
|
||||
if self._dirty:
|
||||
self.build_compiled_caches(True if self._use_union else False)
|
||||
self._dirty = False
|
||||
self.build_compiled_caches(self._use_union)
|
||||
return self._cached_compiled_union_paths if self._use_union\
|
||||
else self._cached_compiled_paths
|
||||
|
||||
@ -233,7 +233,7 @@ class ExcludeList(Markable):
|
||||
def add(self, regex, forced=False):
|
||||
"""This interface should throw exceptions if there is an error during
|
||||
regex compilation"""
|
||||
if self.isExcluded(regex):
|
||||
if self.has_entry(regex):
|
||||
# This exception should never be ignored
|
||||
raise AlreadyThereException()
|
||||
if regex in forbidden_regexes:
|
||||
@ -256,12 +256,27 @@ class ExcludeList(Markable):
|
||||
"""Returns the number of marked regexes only."""
|
||||
return len([x for marked, x in self if marked])
|
||||
|
||||
def isExcluded(self, regex):
|
||||
def has_entry(self, regex):
|
||||
for item in self._excluded:
|
||||
if regex == item[0]:
|
||||
return True
|
||||
return False
|
||||
|
||||
def is_excluded(self, dirname, filename):
|
||||
"""Return True if the file or the absolute path to file is supposed to be
|
||||
filtered out, False otherwise."""
|
||||
matched = False
|
||||
for expr in self.compiled_files:
|
||||
if expr.fullmatch(filename):
|
||||
matched = True
|
||||
break
|
||||
if not matched:
|
||||
for expr in self.compiled_paths:
|
||||
if expr.fullmatch(dirname + sep + filename):
|
||||
matched = True
|
||||
break
|
||||
return matched
|
||||
|
||||
def remove(self, regex):
|
||||
for item in self._excluded:
|
||||
if item[0] == regex:
|
||||
@ -286,9 +301,11 @@ class ExcludeList(Markable):
|
||||
break
|
||||
if not found:
|
||||
return
|
||||
if is_compilable and was_marked:
|
||||
# Not marked by default when added, add it back
|
||||
self.mark(newregex)
|
||||
if is_compilable:
|
||||
self._add_compiled(newregex)
|
||||
if was_marked:
|
||||
# Not marked by default when added, add it back
|
||||
self.mark(newregex)
|
||||
|
||||
# def change_index(self, regex, new_index):
|
||||
# """Internal list must be a list, not dict."""
|
||||
@ -300,7 +317,7 @@ class ExcludeList(Markable):
|
||||
if regex not in default_regexes:
|
||||
self.unmark(regex)
|
||||
for default_regex in default_regexes:
|
||||
if not self.isExcluded(default_regex):
|
||||
if not self.has_entry(default_regex):
|
||||
self.add(default_regex)
|
||||
self.mark(default_regex)
|
||||
|
||||
@ -399,9 +416,9 @@ class ExcludeDict(ExcludeList):
|
||||
if self._use_union:
|
||||
return
|
||||
try:
|
||||
self._excluded_compiled.add(self._excluded[regex]["compiled"])
|
||||
self._excluded_compiled.add(self._excluded.get(regex).get("compiled"))
|
||||
except Exception as e:
|
||||
logging.warning(f"Exception while adding regex {regex} to compiled set: {e}")
|
||||
logging.error(f"Exception while adding regex {regex} to compiled set: {e}")
|
||||
return
|
||||
|
||||
def is_compilable(self, regex):
|
||||
@ -425,7 +442,7 @@ class ExcludeDict(ExcludeList):
|
||||
"compiled": compiled
|
||||
}
|
||||
|
||||
def isExcluded(self, regex):
|
||||
def has_entry(self, regex):
|
||||
if regex in self._excluded.keys():
|
||||
return True
|
||||
return False
|
||||
@ -451,14 +468,16 @@ class ExcludeDict(ExcludeList):
|
||||
previous = self._excluded.pop(regex)
|
||||
iscompilable, error, compiled = self.compile_re(newregex)
|
||||
self._excluded[newregex] = {
|
||||
"index": previous["index"],
|
||||
"index": previous.get('index'),
|
||||
"compilable": iscompilable,
|
||||
"error": error,
|
||||
"compiled": compiled
|
||||
}
|
||||
self._remove_compiled(regex)
|
||||
if was_marked and iscompilable:
|
||||
self.mark(newregex)
|
||||
if iscompilable:
|
||||
self._add_compiled(newregex)
|
||||
if was_marked:
|
||||
self.mark(newregex)
|
||||
|
||||
def save_to_xml(self, outfile):
|
||||
"""Create a XML file that can be used by load_from_xml.
|
||||
@ -492,8 +511,8 @@ def ordered_keys(_dict):
|
||||
|
||||
|
||||
if ISWINDOWS:
|
||||
def has_sep(x):
|
||||
return '\\' + sep in x
|
||||
def has_sep(regexp):
|
||||
return '\\' + sep in regexp
|
||||
else:
|
||||
def has_sep(x):
|
||||
return sep in x
|
||||
def has_sep(regexp):
|
||||
return sep in regexp
|
||||
|
@ -7,6 +7,8 @@
|
||||
|
||||
# from hscommon.trans import tr
|
||||
from .exclude_list_table import ExcludeListTable
|
||||
from core.exclude import has_sep
|
||||
from os import sep
|
||||
import logging
|
||||
|
||||
|
||||
@ -30,9 +32,10 @@ class ExcludeListDialogCore:
|
||||
self.refresh()
|
||||
|
||||
def rename_selected(self, newregex):
|
||||
"""Renames the selected regex to ``newregex``.
|
||||
If there's more than one selected row, the first one is used.
|
||||
"""Rename the selected regex to ``newregex``.
|
||||
If there is more than one selected row, the first one is used.
|
||||
:param str newregex: The regex to rename the row's regex to.
|
||||
:return bool: true if success, false if error.
|
||||
"""
|
||||
try:
|
||||
r = self.exclude_list_table.selected_rows[0]
|
||||
@ -52,17 +55,37 @@ class ExcludeListDialogCore:
|
||||
self.exclude_list_table.add(regex)
|
||||
|
||||
def test_string(self, test_string):
|
||||
"""Sets property on row to highlight if its regex matches test_string supplied."""
|
||||
"""Set the highlight property on each row when its regex matches the
|
||||
test_string supplied. Return True if any row matched."""
|
||||
matched = False
|
||||
for row in self.exclude_list_table.rows:
|
||||
compiled_regex = self.exclude_list.get_compiled(row.regex)
|
||||
if compiled_regex and compiled_regex.fullmatch(test_string):
|
||||
matched = True
|
||||
|
||||
if self.is_match(test_string, compiled_regex):
|
||||
row.highlight = True
|
||||
matched = True
|
||||
else:
|
||||
row.highlight = False
|
||||
return matched
|
||||
|
||||
def is_match(self, test_string, compiled_regex):
|
||||
# This method is like an inverted version of ExcludeList.is_excluded()
|
||||
if not compiled_regex:
|
||||
return False
|
||||
matched = False
|
||||
|
||||
# Test only the filename portion of the path
|
||||
if not has_sep(compiled_regex.pattern) and sep in test_string:
|
||||
filename = test_string.rsplit(sep, 1)[1]
|
||||
if compiled_regex.fullmatch(filename):
|
||||
matched = True
|
||||
return matched
|
||||
|
||||
# Test the entire path + filename
|
||||
if compiled_regex.fullmatch(test_string):
|
||||
matched = True
|
||||
return matched
|
||||
|
||||
def reset_rows_highlight(self):
|
||||
for row in self.exclude_list_table.rows:
|
||||
row.highlight = False
|
||||
|
@ -36,7 +36,7 @@ class ExcludeListTable(GUITable, DupeGuruGUIObject):
|
||||
return ExcludeListRow(self, self.dialog.exclude_list.is_marked(regex), regex), 0
|
||||
|
||||
def _do_delete(self):
|
||||
self.dalog.exclude_list.remove(self.selected_row.regex)
|
||||
self.dialog.exclude_list.remove(self.selected_row.regex)
|
||||
|
||||
# --- Override
|
||||
def add(self, regex):
|
||||
|
@ -473,6 +473,24 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
|
||||
assert "file_ending_with_subdir" not in files
|
||||
assert "file_which_shouldnt_match" in files
|
||||
|
||||
# This should match the directory only
|
||||
regex6 = r".*/subdir.*"
|
||||
if ISWINDOWS:
|
||||
regex6 = r".*\\.*subdir.*"
|
||||
self.d._exclude_list.rename(regex5, regex6)
|
||||
self.d._exclude_list.remove(regex1)
|
||||
assert regex1 not in self.d._exclude_list
|
||||
assert regex5 not in self.d._exclude_list
|
||||
assert self.d._exclude_list.error(regex6) is None
|
||||
# This still should not be affected
|
||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
|
||||
files = self.get_files_and_expect_num_result(5)
|
||||
# These files are under the "/subdir" directory
|
||||
assert "somesubdirfile.png" not in files
|
||||
assert "unwanted_subdirfile.gif" not in files
|
||||
# This file under "subdar" directory should not be filtered out
|
||||
assert "file_ending_with_subdir" in files
|
||||
|
||||
def test_japanese_unicode(self, tmpdir):
|
||||
p1 = Path(str(tmpdir))
|
||||
p1["$Recycle.Bin"].mkdir()
|
||||
|
@ -188,6 +188,28 @@ class TestCaseListEmpty:
|
||||
self.exclude_list.rename(regex_renamed_compilable, regex_compilable)
|
||||
eq_(self.exclude_list.is_marked(regex_compilable), True)
|
||||
|
||||
def test_rename_regex_file_to_path(self):
|
||||
regex = r".*/one.*"
|
||||
if ISWINDOWS:
|
||||
regex = r".*\\one.*"
|
||||
regex2 = r".*one.*"
|
||||
self.exclude_list.add(regex)
|
||||
self.exclude_list.mark(regex)
|
||||
compiled_re = [x.pattern for x in self.exclude_list._excluded_compiled]
|
||||
files_re = [x.pattern for x in self.exclude_list.compiled_files]
|
||||
paths_re = [x.pattern for x in self.exclude_list.compiled_paths]
|
||||
assert regex in compiled_re
|
||||
assert regex not in files_re
|
||||
assert regex in paths_re
|
||||
self.exclude_list.rename(regex, regex2)
|
||||
compiled_re = [x.pattern for x in self.exclude_list._excluded_compiled]
|
||||
files_re = [x.pattern for x in self.exclude_list.compiled_files]
|
||||
paths_re = [x.pattern for x in self.exclude_list.compiled_paths]
|
||||
assert regex not in compiled_re
|
||||
assert regex2 in compiled_re
|
||||
assert regex2 in files_re
|
||||
assert regex2 not in paths_re
|
||||
|
||||
def test_restore_default(self):
|
||||
"""Only unmark previously added regexes and mark the pre-defined ones"""
|
||||
regex = r"one"
|
||||
@ -213,6 +235,73 @@ class TestCaseListEmpty:
|
||||
eq_(len(default_regexes), len(self.exclude_list.compiled))
|
||||
|
||||
|
||||
class TestCaseListEmptyUnion(TestCaseListEmpty):
|
||||
"""Same but with union regex"""
|
||||
def setup_method(self, method):
|
||||
self.app = DupeGuru()
|
||||
self.app.exclude_list = ExcludeList(union_regex=True)
|
||||
self.exclude_list = self.app.exclude_list
|
||||
|
||||
def test_add_mark_and_remove_regex(self):
|
||||
regex1 = r"one"
|
||||
regex2 = r"two"
|
||||
self.exclude_list.add(regex1)
|
||||
assert(regex1 in self.exclude_list)
|
||||
self.exclude_list.add(regex2)
|
||||
self.exclude_list.mark(regex1)
|
||||
self.exclude_list.mark(regex2)
|
||||
eq_(len(self.exclude_list), 2)
|
||||
eq_(len(self.exclude_list.compiled), 1)
|
||||
compiled_files = [x for x in self.exclude_list.compiled_files]
|
||||
eq_(len(compiled_files), 1) # Two patterns joined together into one
|
||||
assert "|" in compiled_files[0].pattern
|
||||
self.exclude_list.remove(regex2)
|
||||
assert(regex2 not in self.exclude_list)
|
||||
eq_(len(self.exclude_list), 1)
|
||||
|
||||
def test_rename_regex_file_to_path(self):
|
||||
regex = r".*/one.*"
|
||||
if ISWINDOWS:
|
||||
regex = r".*\\one.*"
|
||||
regex2 = r".*one.*"
|
||||
self.exclude_list.add(regex)
|
||||
self.exclude_list.mark(regex)
|
||||
eq_(len([x for x in self.exclude_list]), 1)
|
||||
compiled_re = [x.pattern for x in self.exclude_list.compiled]
|
||||
files_re = [x.pattern for x in self.exclude_list.compiled_files]
|
||||
paths_re = [x.pattern for x in self.exclude_list.compiled_paths]
|
||||
assert regex in compiled_re
|
||||
assert regex not in files_re
|
||||
assert regex in paths_re
|
||||
self.exclude_list.rename(regex, regex2)
|
||||
eq_(len([x for x in self.exclude_list]), 1)
|
||||
compiled_re = [x.pattern for x in self.exclude_list.compiled]
|
||||
files_re = [x.pattern for x in self.exclude_list.compiled_files]
|
||||
paths_re = [x.pattern for x in self.exclude_list.compiled_paths]
|
||||
assert regex not in compiled_re
|
||||
assert regex2 in compiled_re
|
||||
assert regex2 in files_re
|
||||
assert regex2 not in paths_re
|
||||
|
||||
def test_restore_default(self):
|
||||
"""Only unmark previously added regexes and mark the pre-defined ones"""
|
||||
regex = r"one"
|
||||
self.exclude_list.add(regex)
|
||||
self.exclude_list.mark(regex)
|
||||
self.exclude_list.restore_defaults()
|
||||
eq_(len(default_regexes), self.exclude_list.marked_count)
|
||||
# added regex shouldn't be marked
|
||||
eq_(self.exclude_list.is_marked(regex), False)
|
||||
# added regex shouldn't be in compiled list either
|
||||
compiled = [x for x in self.exclude_list.compiled]
|
||||
assert regex not in compiled
|
||||
# Need to escape both to get the same strings after compilation
|
||||
compiled_escaped = set([x.encode('unicode-escape').decode() for x in compiled[0].pattern.split("|")])
|
||||
default_escaped = set([x.encode('unicode-escape').decode() for x in default_regexes])
|
||||
assert compiled_escaped == default_escaped
|
||||
eq_(len(default_regexes), len(compiled[0].pattern.split("|")))
|
||||
|
||||
|
||||
class TestCaseDictEmpty(TestCaseListEmpty):
|
||||
"""Same, but with dictionary implementation"""
|
||||
def setup_method(self, method):
|
||||
@ -221,6 +310,73 @@ class TestCaseDictEmpty(TestCaseListEmpty):
|
||||
self.exclude_list = self.app.exclude_list
|
||||
|
||||
|
||||
class TestCaseDictEmptyUnion(TestCaseDictEmpty):
|
||||
"""Same, but with union regex"""
|
||||
def setup_method(self, method):
|
||||
self.app = DupeGuru()
|
||||
self.app.exclude_list = ExcludeDict(union_regex=True)
|
||||
self.exclude_list = self.app.exclude_list
|
||||
|
||||
def test_add_mark_and_remove_regex(self):
|
||||
regex1 = r"one"
|
||||
regex2 = r"two"
|
||||
self.exclude_list.add(regex1)
|
||||
assert(regex1 in self.exclude_list)
|
||||
self.exclude_list.add(regex2)
|
||||
self.exclude_list.mark(regex1)
|
||||
self.exclude_list.mark(regex2)
|
||||
eq_(len(self.exclude_list), 2)
|
||||
eq_(len(self.exclude_list.compiled), 1)
|
||||
compiled_files = [x for x in self.exclude_list.compiled_files]
|
||||
# two patterns joined into one
|
||||
eq_(len(compiled_files), 1)
|
||||
self.exclude_list.remove(regex2)
|
||||
assert(regex2 not in self.exclude_list)
|
||||
eq_(len(self.exclude_list), 1)
|
||||
|
||||
def test_rename_regex_file_to_path(self):
|
||||
regex = r".*/one.*"
|
||||
if ISWINDOWS:
|
||||
regex = r".*\\one.*"
|
||||
regex2 = r".*one.*"
|
||||
self.exclude_list.add(regex)
|
||||
self.exclude_list.mark(regex)
|
||||
marked_re = [x for marked, x in self.exclude_list if marked]
|
||||
eq_(len(marked_re), 1)
|
||||
compiled_re = [x.pattern for x in self.exclude_list.compiled]
|
||||
files_re = [x.pattern for x in self.exclude_list.compiled_files]
|
||||
paths_re = [x.pattern for x in self.exclude_list.compiled_paths]
|
||||
assert regex in compiled_re
|
||||
assert regex not in files_re
|
||||
assert regex in paths_re
|
||||
self.exclude_list.rename(regex, regex2)
|
||||
compiled_re = [x.pattern for x in self.exclude_list.compiled]
|
||||
files_re = [x.pattern for x in self.exclude_list.compiled_files]
|
||||
paths_re = [x.pattern for x in self.exclude_list.compiled_paths]
|
||||
assert regex not in compiled_re
|
||||
assert regex2 in compiled_re
|
||||
assert regex2 in files_re
|
||||
assert regex2 not in paths_re
|
||||
|
||||
def test_restore_default(self):
|
||||
"""Only unmark previously added regexes and mark the pre-defined ones"""
|
||||
regex = r"one"
|
||||
self.exclude_list.add(regex)
|
||||
self.exclude_list.mark(regex)
|
||||
self.exclude_list.restore_defaults()
|
||||
eq_(len(default_regexes), self.exclude_list.marked_count)
|
||||
# added regex shouldn't be marked
|
||||
eq_(self.exclude_list.is_marked(regex), False)
|
||||
# added regex shouldn't be in compiled list either
|
||||
compiled = [x for x in self.exclude_list.compiled]
|
||||
assert regex not in compiled
|
||||
# Need to escape both to get the same strings after compilation
|
||||
compiled_escaped = set([x.encode('unicode-escape').decode() for x in compiled[0].pattern.split("|")])
|
||||
default_escaped = set([x.encode('unicode-escape').decode() for x in default_regexes])
|
||||
assert compiled_escaped == default_escaped
|
||||
eq_(len(default_regexes), len(compiled[0].pattern.split("|")))
|
||||
|
||||
|
||||
def split_union(pattern_object):
|
||||
"""Returns list of strings for each union pattern"""
|
||||
return [x for x in pattern_object.pattern.split("|")]
|
||||
|
@ -116,30 +116,32 @@ class ExcludeListDialog(QDialog):
|
||||
if not input_text:
|
||||
self.reset_input_style()
|
||||
return
|
||||
# if at least one row matched, we know whether table is highlighted or not
|
||||
# If at least one row matched, we know whether table is highlighted or not
|
||||
self._row_matched = self.model.test_string(input_text)
|
||||
self.table.refresh()
|
||||
|
||||
# Test the string currently in the input text box as well
|
||||
input_regex = self.inputLine.text()
|
||||
if not input_regex:
|
||||
self.reset_input_style()
|
||||
return
|
||||
compiled = None
|
||||
try:
|
||||
compiled = re.compile(input_regex)
|
||||
except re.error:
|
||||
self.reset_input_style()
|
||||
return
|
||||
if compiled.fullmatch(input_text):
|
||||
self._input_styled = True
|
||||
if self.model.is_match(input_text, compiled):
|
||||
self.inputLine.setStyleSheet("background-color: rgb(10, 200, 10);")
|
||||
self._input_styled = True
|
||||
else:
|
||||
self.reset_input_style()
|
||||
|
||||
def reset_input_style(self):
|
||||
"""Reset regex input line background"""
|
||||
if self._input_styled:
|
||||
self._input_styled = False
|
||||
self.inputLine.setStyleSheet(self.styleSheet())
|
||||
self._input_styled = False
|
||||
|
||||
def reset_table_style(self):
|
||||
if self._row_matched:
|
||||
|
Loading…
Reference in New Issue
Block a user