1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2024-12-21 10:59:03 +00:00

Add test suite, fix bugs

This commit is contained in:
glubsy 2020-08-29 03:57:00 +02:00
parent 26d18945b1
commit 4a1641e39d
7 changed files with 613 additions and 143 deletions

View File

@ -26,7 +26,7 @@ from .pe.photo import get_delta_dimensions
from .util import cmp_value, fix_surrogate_encoding from .util import cmp_value, fix_surrogate_encoding
from . import directories, results, export, fs, prioritize from . import directories, results, export, fs, prioritize
from .ignore import IgnoreList from .ignore import IgnoreList
from .exclude import ExcludeList as ExcludeList from .exclude import ExcludeDict as ExcludeList
from .scanner import ScanType from .scanner import ScanType
from .gui.deletion_options import DeletionOptions from .gui.deletion_options import DeletionOptions
from .gui.details_panel import DetailsPanel from .gui.details_panel import DetailsPanel

View File

@ -80,13 +80,12 @@ class Directories:
# ---Private # ---Private
def _default_state_for_path(self, path): def _default_state_for_path(self, path):
# New logic with regex filters # New logic with regex filters
if self._exclude_list is not None and len(self._exclude_list) > 0: if self._exclude_list is not None and self._exclude_list.mark_count > 0:
# We iterate even if we only have one item here # We iterate even if we only have one item here
for denied_path_re in self._exclude_list.compiled_combined: for denied_path_re in self._exclude_list.compiled:
if denied_path_re.match(str(path)): if denied_path_re.match(str(path.name)):
return DirectoryState.Excluded return DirectoryState.Excluded
return None # return # We still use the old logic to force state on hidden dirs
# Old default logic, still used during initialization of DirectoryTree:
# Override this in subclasses to specify the state of some special folders. # Override this in subclasses to specify the state of some special folders.
if path.name.startswith("."): if path.name.startswith("."):
return DirectoryState.Excluded return DirectoryState.Excluded
@ -95,7 +94,7 @@ class Directories:
for root, dirs, files in os.walk(str(from_path)): for root, dirs, files in os.walk(str(from_path)):
j.check_if_cancelled() j.check_if_cancelled()
rootPath = Path(root) rootPath = Path(root)
state = self.get_state(root) state = self.get_state(rootPath)
if state == DirectoryState.Excluded: if state == DirectoryState.Excluded:
# Recursively get files from folders with lots of subfolder is expensive. However, there # Recursively get files from folders with lots of subfolder is expensive. However, there
# might be a subfolder in this path that is not excluded. What we want to do is to skim # might be a subfolder in this path that is not excluded. What we want to do is to skim
@ -105,15 +104,21 @@ class Directories:
try: try:
if state != DirectoryState.Excluded: if state != DirectoryState.Excluded:
# Old logic # Old logic
if self._exclude_list is None or not len(self._exclude_list): if self._exclude_list is None or not self._exclude_list.mark_count:
found_files = [fs.get_file(rootPath + f, fileclasses=fileclasses) for f in files] found_files = [fs.get_file(rootPath + f, fileclasses=fileclasses) for f in files]
else: else:
found_files = [] found_files = []
# print(f"len of files: {len(files)} {files}")
for f in files: for f in files:
found = False found = False
for expr in self._exclude_list.compiled_files_combined: for expr in self._exclude_list.compiled_files:
found = expr.match(f) if expr.match(f):
if found: found = True
break
if not found:
for expr in self._exclude_list.compiled_paths:
if expr.match(root + os.sep + f):
found = True
break break
if not found: if not found:
found_files.append(fs.get_file(rootPath + f, fileclasses=fileclasses)) found_files.append(fs.get_file(rootPath + f, fileclasses=fileclasses))
@ -215,8 +220,14 @@ class Directories:
if path in self.states: if path in self.states:
return self.states[path] return self.states[path]
state = self._default_state_for_path(path) or DirectoryState.Normal state = self._default_state_for_path(path) or DirectoryState.Normal
# Save non-default states in cache, necessary for _get_files()
if state != DirectoryState.Normal:
self.states[path] = state
return state
prevlen = 0 prevlen = 0
# we loop through the states to find the longest matching prefix # we loop through the states to find the longest matching prefix
# if the parent has a state in cache, return that state
for p, s in self.states.items(): for p, s in self.states.items():
if p.is_parent_of(path) and len(p) > prevlen: if p.is_parent_of(path) and len(p) > prevlen:
prevlen = len(p) prevlen = len(p)

View File

@ -5,7 +5,8 @@
from .markable import Markable from .markable import Markable
from xml.etree import ElementTree as ET from xml.etree import ElementTree as ET
# TODO: perhaps use regex module for better Unicode support? https://pypi.org/project/regex/ # TODO: perhaps use regex module for better Unicode support? https://pypi.org/project/regex/
# or perhaps also https://pypi.org/project/re2/ # also https://pypi.org/project/re2/
# TODO update the Result list with newly added regexes if possible
import re import re
from os import sep from os import sep
import logging import logging
@ -13,8 +14,14 @@ import functools
from hscommon.util import FileOrPath from hscommon.util import FileOrPath
import time import time
default_regexes = [r".*thumbs", r"\.DS.Store", r"\.Trash", r".*Trash-Bin"] default_regexes = [r"^thumbs\.db$", # Obsolete after WindowsXP
forbidden_regexes = [r".*", r"\/.*", r".*\/.*"] r"^\.DS_Store$", # MacOS metadata
r"^\.Trash\-.*", # Linux trash directories
r"^\$Recycle\.Bin$", # Windows
r"^\..*" # Hidden files
]
# These are too agressive
forbidden_regexes = [r".*", r"\/.*", r".*\/.*", r".*\..*"]
def timer(func): def timer(func):
@ -59,36 +66,37 @@ class ExcludeList(Markable):
# ---Override # ---Override
def __init__(self, combined_regex=False): def __init__(self, combined_regex=False):
Markable.__init__(self) Markable.__init__(self)
self._combined_regex = combined_regex self._use_combined = combined_regex
self._excluded = [] self._excluded = []
self._count = 0
self._excluded_compiled = set() self._excluded_compiled = set()
self._dirty = True self._dirty = True
def __debug_test(self):
self.test_regexes = [
r".*Recycle\.Bin$", r"denyme.*", r".*denyme", r".*/test/denyme*",
r".*/test/*denyme", r"denyme", r".*\/\..*", r"^\..*"]
for regex in self.test_regexes:
try:
self.add(regex)
except Exception as e:
print(f"Exception loading test regex {regex}: {e}")
continue
try:
self.mark(regex)
except Exception as e:
print(f"Exception marking test regex {regex}: {e}")
def __iter__(self): def __iter__(self):
"""Iterate in order.""" """Iterate in order."""
for item in self._excluded: for item in self._excluded:
regex = item[0] regex = item[0]
yield self.is_marked(regex), regex yield self.is_marked(regex), regex
def __contains__(self, item):
return self.isExcluded(item)
def __len__(self): def __len__(self):
"""Returns the number of marked regexes.""" """Returns the total number of regexes regardless of mark status."""
return len([x for marked, x in self if marked]) return len(self._excluded)
def __getitem__(self, key):
for item in self._excluded:
if item[0] == key:
return item
raise KeyError(f"Key {key} is not in exclusion list.")
def __setitem__(self, key, value):
# TODO if necessary
pass
def __delitem__(self, key):
# TODO if necessary
pass
def is_markable(self, regex): def is_markable(self, regex):
return self._is_markable(regex) return self._is_markable(regex)
@ -98,7 +106,7 @@ class ExcludeList(Markable):
for item in self._excluded: for item in self._excluded:
if item[0] == regex: if item[0] == regex:
return item[1] return item[1]
return False # should not be needed return False # should not be necessary, regex SHOULD be in there
def _did_mark(self, regex): def _did_mark(self, regex):
self._add_compiled(regex) self._add_compiled(regex)
@ -107,17 +115,19 @@ class ExcludeList(Markable):
self._remove_compiled(regex) self._remove_compiled(regex)
def _add_compiled(self, regex): def _add_compiled(self, regex):
if self._combined_regex:
self._dirty = True self._dirty = True
if self._use_combined:
return return
for item in self._excluded: for item in self._excluded:
# FIXME probably faster to just rebuild the set from the compiled instead of comparing strings
if item[0] == regex: if item[0] == regex:
# no need to test if already present since it's a set() # no need to test if already present since it's a set()
self._excluded_compiled.add(item[3]) self._excluded_compiled.add(item[3])
break
def _remove_compiled(self, regex): def _remove_compiled(self, regex):
if self._combined_regex:
self._dirty = True self._dirty = True
if self._use_combined:
return return
for item in self._excluded_compiled: for item in self._excluded_compiled:
if regex in item.pattern: if regex in item.pattern:
@ -148,44 +158,65 @@ class ExcludeList(Markable):
if item[0] == regex: if item[0] == regex:
return item[2] return item[2]
def build_compiled_caches(self, combined=False):
if not combined:
self._cached_compiled_files =\
[x for x in self._excluded_compiled if sep not in x.pattern]
self._cached_compiled_paths =\
[x for x in self._excluded_compiled if sep in x.pattern]
return
# HACK returned as a tuple to get a free iterator to keep interface the same
# regardless of whether the client asked for combined or not
marked_count = [x for marked, x in self if marked]
# If there is no item, the compiled Pattern will be '' and match everything!
if not marked_count:
self._cached_compiled_combined_all = []
self._cached_compiled_combined_files = []
self._cached_compiled_combined_paths = []
else:
self._cached_compiled_combined_all =\
(re.compile('|'.join(marked_count)),)
files_marked = [x for x in marked_count if sep not in x]
if not files_marked:
self._cached_compiled_combined_files = tuple()
else:
self._cached_compiled_combined_files =\
(re.compile('|'.join(files_marked)),)
paths_marked = [x for x in marked_count if sep in x]
if not paths_marked:
self._cached_compiled_combined_paths = tuple()
else:
self._cached_compiled_combined_paths =\
(re.compile('|'.join(paths_marked)),)
@property @property
def compiled(self): def compiled(self):
"""Should be used by other classes to retrieve the up-to-date list of patterns.""" """Should be used by other classes to retrieve the up-to-date list of patterns."""
if not self._combined_regex: if self._use_combined:
if self._dirty:
self.build_compiled_caches(True)
self._dirty = False
return self._cached_compiled_combined_all
return self._excluded_compiled return self._excluded_compiled
else:
return self.compiled_combined
@property @property
def compiled_files(self): def compiled_files(self):
"""Should be used by other classes to retrieve the up-to-date list of patterns """When matching against filenames only, we probably won't be seeing any
for files only.""" directory separator, so we filter out regexes with os.sep in them.
if not self._combined_regex: The interface should be expected to be a generator, even if it returns only
# Return each compiled element separately one item (one Pattern in the combined case)."""
# return [compiled_pattern for compiled_pattern in self.compiled if sep not in compiled_pattern.pattern]
for compiled in self.compiled:
if sep not in compiled.pattern:
yield compiled
else:
return self.compiled_files_combined
@property
def compiled_combined(self):
if self._dirty: if self._dirty:
self._cached_compiled_combined =\ self.build_compiled_caches(True if self._use_combined else False)
re.compile('|'.join(x for marked, x in self if marked))
# Must compute the filtered out version as well
self._cached_compiled_combined_files =\
re.compile('|'.join(x for marked, x in self
if marked and sep not in x))
self._dirty = False self._dirty = False
# returned as a tuple to get a free iterator and to avoid subclassing return self._cached_compiled_combined_files if self._use_combined else self._cached_compiled_files
return (self._cached_compiled_combined,)
@property @property
def compiled_files_combined(self): def compiled_paths(self):
# returned as a tuple to get a free iterator and to avoid subclassing """Returns patterns with only separators in them, for more precise filtering."""
return (self._cached_compiled_combined_files,) if self._dirty:
self.build_compiled_caches(True if self._use_combined else False)
self._dirty = False
return self._cached_compiled_combined_paths if self._use_combined else self._cached_compiled_paths
# ---Public # ---Public
def add(self, regex, forced=False): def add(self, regex, forced=False):
@ -206,7 +237,11 @@ class ExcludeList(Markable):
def _do_add(self, regex, iscompilable, exception, compiled): def _do_add(self, regex, iscompilable, exception, compiled):
# We need to insert at the top # We need to insert at the top
self._excluded.insert(0, [regex, iscompilable, exception, compiled]) self._excluded.insert(0, [regex, iscompilable, exception, compiled])
# self._count = len(self._excluded)
@property
def marked_count(self):
"""Returns the number of marked regexes only."""
return len([x for marked, x in self if marked])
def isExcluded(self, regex): def isExcluded(self, regex):
for item in self._excluded: for item in self._excluded:
@ -215,6 +250,7 @@ class ExcludeList(Markable):
return False return False
def clear(self): def clear(self):
"""Not used and needs refactoring"""
self._excluded = [] self._excluded = []
def remove(self, regex): def remove(self, regex):
@ -224,25 +260,24 @@ class ExcludeList(Markable):
self._remove_compiled(regex) self._remove_compiled(regex)
def rename(self, regex, newregex): def rename(self, regex, newregex):
# if regex not in self._excluded or regex == newregex: # if regex not in self._excluded: return
# return
if regex == newregex: if regex == newregex:
return return
found = False found = False
for item in self._excluded: was_marked = False
if regex == item[0]: is_compilable = False
found = True
break
if not found:
return
was_marked = self.is_marked(regex)
is_compilable, exception, compiled = self.compile_re(newregex)
for item in self._excluded: for item in self._excluded:
if item[0] == regex: if item[0] == regex:
found = True
was_marked = self.is_marked(regex)
is_compilable, exception, compiled = self.compile_re(newregex)
# We overwrite the found entry # We overwrite the found entry
self._excluded[self._excluded.index(item)] =\ self._excluded[self._excluded.index(item)] =\
[newregex, is_compilable, exception, compiled] [newregex, is_compilable, exception, compiled]
self._remove_compiled(regex)
break
if not found:
return
if is_compilable and was_marked: if is_compilable and was_marked:
# Not marked by default when added, add it back # Not marked by default when added, add it back
self.mark(newregex) self.mark(newregex)
@ -271,7 +306,6 @@ class ExcludeList(Markable):
except Exception as e: except Exception as e:
logging.warning(f"Error while loading {infile}: {e}") logging.warning(f"Error while loading {infile}: {e}")
self.restore_defaults() self.restore_defaults()
self.__debug_test()
return e return e
marked = set() marked = set()
@ -291,7 +325,6 @@ class ExcludeList(Markable):
for item in marked: for item in marked:
self.mark(item) self.mark(item)
self.__debug_test()
def save_to_xml(self, outfile): def save_to_xml(self, outfile):
"""Create a XML file that can be used by load_from_xml. """Create a XML file that can be used by load_from_xml.
@ -314,13 +347,14 @@ class ExcludeDict(ExcludeList):
to keep the index of each string-key as its sub-element and keep it updated to keep the index of each string-key as its sub-element and keep it updated
whenever insert/remove is done.""" whenever insert/remove is done."""
def __init__(self): def __init__(self, combined_regex=False):
Markable.__init__(self) Markable.__init__(self)
self._use_combined = combined_regex
# { "regex": { "index": int, "compilable": bool, "error": str, "compiled": Pattern or None}} # { "regex": { "index": int, "compilable": bool, "error": str, "compiled": Pattern or None}}
# Note: "compilable" key should only be updated on add / rename # Note: "compilable" key should only be updated on add / rename
self._excluded = {} self._excluded = {}
self._count = 0
self._excluded_compiled = set() self._excluded_compiled = set()
self._dirty = True
def __iter__(self): def __iter__(self):
"""Iterate in order.""" """Iterate in order."""
@ -338,8 +372,8 @@ class ExcludeDict(ExcludeList):
return False return False
def _add_compiled(self, regex): def _add_compiled(self, regex):
if self._combined_regex:
self._dirty = True self._dirty = True
if self._use_combined:
return return
try: try:
self._excluded_compiled.add(self._excluded[regex]["compiled"]) self._excluded_compiled.add(self._excluded[regex]["compiled"])
@ -360,8 +394,12 @@ class ExcludeDict(ExcludeList):
# We always insert at the top, so index should be 0 and other indices should be pushed by one # We always insert at the top, so index should be 0 and other indices should be pushed by one
for value in self._excluded.values(): for value in self._excluded.values():
value["index"] += 1 value["index"] += 1
self._excluded[regex] = {"index": 0, "compilable": iscompilable, "error": exception, "compiled": compiled} self._excluded[regex] = {
# self._count = len(self._excluded) "index": 0,
"compilable": iscompilable,
"error": exception,
"compiled": compiled
}
def isExcluded(self, regex): def isExcluded(self, regex):
if regex in self._excluded.keys(): if regex in self._excluded.keys():
@ -369,6 +407,7 @@ class ExcludeDict(ExcludeList):
return False return False
def clear(self): def clear(self):
"""Not used, need refactoring"""
self._excluded = {} self._excluded = {}
def remove(self, regex): def remove(self, regex):
@ -391,7 +430,13 @@ class ExcludeDict(ExcludeList):
was_marked = self.is_marked(regex) was_marked = self.is_marked(regex)
previous = self._excluded.pop(regex) previous = self._excluded.pop(regex)
iscompilable, error, compiled = self.compile_re(newregex) iscompilable, error, compiled = self.compile_re(newregex)
self._excluded[newregex] = {"index": previous["index"], "compilable": iscompilable, "error": error, "compiled": compiled} self._excluded[newregex] = {
"index": previous["index"],
"compilable": iscompilable,
"error": error,
"compiled": compiled
}
self._remove_compiled(regex)
if was_marked and iscompilable: if was_marked and iscompilable:
self.mark(newregex) self.mark(newregex)

View File

@ -245,7 +245,7 @@ class Folder(File):
return not path.islink() and path.isdir() return not path.islink() and path.isdir()
def get_file(path, fileclasses=[File], deny_list_re=set()): def get_file(path, fileclasses=[File]):
"""Wraps ``path`` around its appropriate :class:`File` class. """Wraps ``path`` around its appropriate :class:`File` class.
Whether a class is "appropriate" is decided by :meth:`File.can_handle` Whether a class is "appropriate" is decided by :meth:`File.can_handle`
@ -255,15 +255,10 @@ def get_file(path, fileclasses=[File], deny_list_re=set()):
""" """
for fileclass in fileclasses: for fileclass in fileclasses:
if fileclass.can_handle(path): if fileclass.can_handle(path):
# print(f"returning {path}")
# for expr in deny_list_re:
# if expr.match(str(path.name)):
# print(f"FOUND {repr(expr)} in {str(path.name)}")
# return
return fileclass(path) return fileclass(path)
def get_files(path, fileclasses=[File], deny_list_re=set()): def get_files(path, fileclasses=[File]):
"""Returns a list of :class:`File` for each file contained in ``path``. """Returns a list of :class:`File` for each file contained in ``path``.
:param Path path: path to scan :param Path path: path to scan
@ -273,7 +268,7 @@ def get_files(path, fileclasses=[File], deny_list_re=set()):
try: try:
result = [] result = []
for path in path.listdir(): for path in path.listdir():
file = get_file(path, fileclasses=fileclasses, deny_list_re=deny_list_re) file = get_file(path, fileclasses=fileclasses)
if file is not None: if file is not None:
result.append(file) result.append(file)
return result return result

View File

@ -20,6 +20,7 @@ from ..directories import (
AlreadyThereError, AlreadyThereError,
InvalidPathError, InvalidPathError,
) )
from ..exclude import ExcludeList, ExcludeDict
def create_fake_fs(rootpath): def create_fake_fs(rootpath):
@ -323,7 +324,7 @@ def test_get_state_returns_excluded_by_default_for_hidden_directories(tmpdir):
def test_default_path_state_override(tmpdir): def test_default_path_state_override(tmpdir):
# It's possible for a subclass to override the default state of a path # It's possible for a subclass to override the default state of a path
class MyDirectories(Directories): class MyDirectories(Directories):
def _default_state_for_path(self, path, denylist): def _default_state_for_path(self, path):
if "foobar" in path: if "foobar" in path:
return DirectoryState.Excluded return DirectoryState.Excluded
@ -343,52 +344,193 @@ def test_default_path_state_override(tmpdir):
eq_(len(list(d.get_files())), 2) eq_(len(list(d.get_files())), 2)
def test_exclude_list_regular_expressions(tmpdir): class TestExcludeList():
d = Directories() def setup_method(self, method):
d.deny_list_str.clear() self.d = Directories(exclude_list=ExcludeList(combined_regex=False))
d.deny_list_re.clear()
d.deny_list_re_files.clear() def get_files_and_expect_num_result(self, num_result):
# This should only exlude the directory, but not the contained files if """Calls get_files(), get the filenames only, print for debugging.
# its status is set to normal after loading it in the directory tree num_result is how many files are expected as a result."""
d.deny_list_str.add(r".*Recycle\.Bin$") print(f"EXCLUDED REGEX: paths {self.d._exclude_list.compiled_paths} \
d.deny_list_str.add(r"denyme.*") files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled}")
# d.deny_list_str.add(r".*denymetoo") files = list(self.d.get_files())
# d.deny_list_str.add(r"denyme") files = [file.name for file in files]
d.deny_list_str.add(r".*\/\..*") print(f"FINAL FILES {files}")
d.deny_list_str.add(r"^\..*") eq_(len(files), num_result)
d.compile_re() return files
def test_exclude_recycle_bin_by_default(self, tmpdir):
regex = r"^.*Recycle\.Bin$"
self.d._exclude_list.add(regex)
self.d._exclude_list.mark(regex)
p1 = Path(str(tmpdir)) p1 = Path(str(tmpdir))
# Should be ignored on Windows only (by default) p1["$Recycle.Bin"].mkdir()
p1["Recycle.Bin"].mkdir() p1["$Recycle.Bin"]["subdir"].mkdir()
p1["Recycle.Bin/somerecycledfile"].open("w").close() self.d.add_path(p1)
eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.Excluded)
# By default, subdirs should be excluded too, but this can be overriden separately
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Excluded)
self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.Normal)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
p1["denyme_blah.txt"].open("w").close() def test_exclude_refined(self, tmpdir):
p1["blah_denymetoo"].open("w").close() regex1 = r"^\$Recycle\.Bin$"
p1["blah_denyme"].open("w").close() self.d._exclude_list.add(regex1)
self.d._exclude_list.mark(regex1)
p1 = Path(str(tmpdir))
p1["$Recycle.Bin"].mkdir()
p1["$Recycle.Bin"]["somefile.png"].open("w").close()
p1["$Recycle.Bin"]["some_unwanted_file.jpg"].open("w").close()
p1["$Recycle.Bin"]["subdir"].mkdir()
p1["$Recycle.Bin"]["subdir"]["somesubdirfile.png"].open("w").close()
p1["$Recycle.Bin"]["subdir"]["unwanted_subdirfile.gif"].open("w").close()
p1["$Recycle.Bin"]["subdar"].mkdir()
p1["$Recycle.Bin"]["subdar"]["somesubdarfile.jpeg"].open("w").close()
p1["$Recycle.Bin"]["subdar"]["unwanted_subdarfile.png"].open("w").close()
self.d.add_path(p1["$Recycle.Bin"])
p1[".hidden_file"].open("w").close() # Filter should set the default state to Excluded
p1[".hidden_dir"].mkdir() eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.Excluded)
p1[".hidden_dir/somenormalfile1"].open("w").close() # The subdir should inherit its parent state
p1[".hidden_dir/somenormalfile2_denyme"].open("w").close() eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Excluded)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.Excluded)
# Override a child path's state
self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.Normal)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
# Parent should keep its default state, and the other child too
eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.Excluded)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.Excluded)
# print(f"get_folders(): {[x for x in self.d.get_folders()]}")
# only the 2 files directly under the Normal directory
files = self.get_files_and_expect_num_result(2)
assert "somefile.png" not in files
assert "some_unwanted_file.jpg" not in files
assert "somesubdarfile.jpeg" not in files
assert "unwanted_subdarfile.png" not in files
assert "somesubdirfile.png" in files
assert "unwanted_subdirfile.gif" in files
# Overriding the parent should enable all children
self.d.set_state(p1["$Recycle.Bin"], DirectoryState.Normal)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.Normal)
# all files there
files = self.get_files_and_expect_num_result(6)
assert "somefile.png" in files
assert "some_unwanted_file.jpg" in files
# This should still filter out files under directory, despite the Normal state
regex2 = r".*unwanted.*"
self.d._exclude_list.add(regex2)
self.d._exclude_list.mark(regex2)
files = self.get_files_and_expect_num_result(3)
assert "somefile.png" in files
assert "some_unwanted_file.jpg" not in files
assert "unwanted_subdirfile.gif" not in files
assert "unwanted_subdarfile.png" not in files
regex3 = r".*Recycle\.Bin\/.*unwanted.*subdirfile.*"
self.d._exclude_list.rename(regex2, regex3)
assert self.d._exclude_list.error(regex3) is None
# print(f"get_folders(): {[x for x in self.d.get_folders()]}")
# Directory shouldn't change its state here, unless explicitely done by user
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
files = self.get_files_and_expect_num_result(5)
assert "unwanted_subdirfile.gif" not in files
assert "unwanted_subdarfile.png" in files
# using end of line character should only filter the directory, or file ending with subdir
regex4 = r".*subdir$"
self.d._exclude_list.rename(regex3, regex4)
assert self.d._exclude_list.error(regex4) is None
p1["$Recycle.Bin"]["subdar"]["file_ending_with_subdir"].open("w").close()
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Excluded)
files = self.get_files_and_expect_num_result(4)
assert "file_ending_with_subdir" not in files
assert "somesubdarfile.jpeg" in files
assert "somesubdirfile.png" not in files
assert "unwanted_subdirfile.gif" not in files
self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.Normal)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
# print(f"get_folders(): {[x for x in self.d.get_folders()]}")
files = self.get_files_and_expect_num_result(6)
assert "file_ending_with_subdir" not in files
assert "somesubdirfile.png" in files
assert "unwanted_subdirfile.gif" in files
regex5 = r".*subdir.*"
self.d._exclude_list.rename(regex4, regex5)
# Files containing substring should be filtered
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
# The path should not match, only the filename, the "subdir" in the directory name shouldn't matter
p1["$Recycle.Bin"]["subdir"]["file_which_shouldnt_match"].open("w").close()
files = self.get_files_and_expect_num_result(5)
assert "somesubdirfile.png" not in files
assert "unwanted_subdirfile.gif" not in files
assert "file_ending_with_subdir" not in files
assert "file_which_shouldnt_match" in files
def test_japanese_unicode(self, tmpdir):
p1 = Path(str(tmpdir))
p1["$Recycle.Bin"].mkdir()
p1["$Recycle.Bin"]["somerecycledfile.png"].open("w").close()
p1["$Recycle.Bin"]["some_unwanted_file.jpg"].open("w").close()
p1["$Recycle.Bin"]["subdir"].mkdir()
p1["$Recycle.Bin"]["subdir"]["過去白濁物語~]_カラー.jpg"].open("w").close()
p1["$Recycle.Bin"]["思叫物語"].mkdir()
p1["$Recycle.Bin"]["思叫物語"]["なししろ会う前"].open("w").close()
p1["$Recycle.Bin"]["思叫物語"]["堂~ロ"].open("w").close()
self.d.add_path(p1["$Recycle.Bin"])
regex3 = r".*物語.*"
self.d._exclude_list.add(regex3)
self.d._exclude_list.mark(regex3)
# print(f"get_folders(): {[x for x in self.d.get_folders()]}")
eq_(self.d.get_state(p1["$Recycle.Bin"]["思叫物語"]), DirectoryState.Excluded)
files = self.get_files_and_expect_num_result(2)
assert "過去白濁物語~]_カラー.jpg" not in files
assert "なししろ会う前" not in files
assert "堂~ロ" not in files
# using end of line character should only filter that directory, not affecting its files
regex4 = r".*物語$"
self.d._exclude_list.rename(regex3, regex4)
assert self.d._exclude_list.error(regex4) is None
self.d.set_state(p1["$Recycle.Bin"]["思叫物語"], DirectoryState.Normal)
files = self.get_files_and_expect_num_result(5)
assert "過去白濁物語~]_カラー.jpg" in files
assert "なししろ会う前" in files
assert "堂~ロ" in files
def test_get_state_returns_excluded_for_hidden_directories_and_files(self, tmpdir):
# This regex only work for files, not paths
regex = r"^\..*$"
self.d._exclude_list.add(regex)
self.d._exclude_list.mark(regex)
p1 = Path(str(tmpdir))
p1["foobar"].mkdir() p1["foobar"].mkdir()
p1["foobar/somefile"].open("w").close() p1["foobar"][".hidden_file.txt"].open("w").close()
d.add_path(p1) p1["foobar"][".hidden_dir"].mkdir()
eq_(d.get_state(p1["Recycle.Bin"]), DirectoryState.Excluded) p1["foobar"][".hidden_dir"]["foobar.jpg"].open("w").close()
eq_(d.get_state(p1["foobar"]), DirectoryState.Normal) p1["foobar"][".hidden_dir"][".hidden_subfile.png"].open("w").close()
files = list(d.get_files()) self.d.add_path(p1["foobar"])
files = [file.name for file in files] # It should not inherit its parent's state originally
print(f"first files: {files}") eq_(self.d.get_state(p1["foobar"][".hidden_dir"]), DirectoryState.Excluded)
assert "somerecycledfile" not in files self.d.set_state(p1["foobar"][".hidden_dir"], DirectoryState.Normal)
assert "denyme_blah.txt" not in files # The files should still be filtered
assert ".hidden_file" not in files files = self.get_files_and_expect_num_result(1)
assert "somefile1" not in files assert ".hidden_file.txt" not in files
assert "somefile2_denyme" not in files assert ".hidden_subfile.png" not in files
# Overriding the default state from the Directory Tree assert "foobar.jpg" in files
d.set_state(p1["Recycle.Bin"], DirectoryState.Normal)
d.set_state(p1[".hidden_dir"], DirectoryState.Normal)
files = list(d.get_files()) class TestExcludeDict(TestExcludeList):
files = [file.name for file in files] def setup_method(self, method):
print(f"second files: {files}") self.d = Directories(exclude_list=ExcludeDict(combined_regex=False))
assert "somerecycledfile" in files
assert "somenormalfile1" in files
class TestExcludeListCombined(TestExcludeList):
def setup_method(self, method):
self.d = Directories(exclude_list=ExcludeList(combined_regex=True))
class TestExcludeDictCombined(TestExcludeList):
def setup_method(self, method):
self.d = Directories(exclude_list=ExcludeDict(combined_regex=True))

277
core/tests/exclude_test.py Normal file
View File

@ -0,0 +1,277 @@
# Copyright 2016 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.gnu.org/licenses/gpl-3.0.html
import io
# import os.path as op
from xml.etree import ElementTree as ET
# from pytest import raises
from hscommon.testutil import eq_
from .base import DupeGuru
from ..exclude import ExcludeList, ExcludeDict, default_regexes, AlreadyThereException
from re import error
# Two slightly different implementations here, one around a list of lists,
# and another around a dictionary.
class TestCaseListXMLLoading:
def setup_method(self, method):
self.exclude_list = ExcludeList()
def test_load_non_existant_file(self):
# Loads the pre-defined regexes
self.exclude_list.load_from_xml("non_existant.xml")
eq_(len(default_regexes), len(self.exclude_list))
# they should also be marked by default
eq_(len(default_regexes), self.exclude_list.marked_count)
def test_save_to_xml(self):
f = io.BytesIO()
self.exclude_list.save_to_xml(f)
f.seek(0)
doc = ET.parse(f)
root = doc.getroot()
eq_("exclude_list", root.tag)
def test_save_and_load(self, tmpdir):
e1 = ExcludeList()
e2 = ExcludeList()
eq_(len(e1), 0)
e1.add(r"one")
e1.mark(r"one")
e1.add(r"two")
tmpxml = str(tmpdir.join("exclude_testunit.xml"))
e1.save_to_xml(tmpxml)
e2.load_from_xml(tmpxml)
# We should have the default regexes
assert r"one" in e2
assert r"two" in e2
eq_(len(e2), 2)
eq_(e2.marked_count, 1)
def test_load_xml_with_garbage_and_missing_elements(self):
root = ET.Element("foobar") # The root element shouldn't matter
exclude_node = ET.SubElement(root, "bogus")
exclude_node.set("regex", "None")
exclude_node.set("marked", "y")
exclude_node = ET.SubElement(root, "exclude")
exclude_node.set("regex", "one")
# marked field invalid
exclude_node.set("markedddd", "y")
exclude_node = ET.SubElement(root, "exclude")
exclude_node.set("regex", "two")
# missing marked field
exclude_node = ET.SubElement(root, "exclude")
exclude_node.set("regex", "three")
exclude_node.set("markedddd", "pazjbjepo")
f = io.BytesIO()
tree = ET.ElementTree(root)
tree.write(f, encoding="utf-8")
f.seek(0)
self.exclude_list.load_from_xml(f)
print(f"{[x for x in self.exclude_list]}")
# only the two "exclude" nodes should be added,
eq_(3, len(self.exclude_list))
# None should be marked
eq_(0, self.exclude_list.marked_count)
class TestCaseDictXMLLoading(TestCaseListXMLLoading):
def setup_method(self, method):
self.exclude_list = ExcludeDict()
class TestCaseListEmpty:
def setup_method(self, method):
self.app = DupeGuru()
self.app.exclude_list = ExcludeList()
self.exclude_list = self.app.exclude_list
def test_add_mark_and_remove_regex(self):
regex1 = r"one"
regex2 = r"two"
self.exclude_list.add(regex1)
assert(regex1 in self.exclude_list)
self.exclude_list.add(regex2)
self.exclude_list.mark(regex1)
self.exclude_list.mark(regex2)
eq_(len(self.exclude_list), 2)
eq_(len(self.exclude_list.compiled), 2)
compiled_files = [x for x in self.exclude_list.compiled_files]
eq_(len(compiled_files), 2)
self.exclude_list.remove(regex2)
assert(regex2 not in self.exclude_list)
eq_(len(self.exclude_list), 1)
def test_add_duplicate(self):
self.exclude_list.add(r"one")
eq_(1 , len(self.exclude_list))
try:
self.exclude_list.add(r"one")
except Exception:
pass
eq_(1 , len(self.exclude_list))
def test_add_not_compilable(self):
# Trying to add a non-valid regex should not work and raise exception
regex = r"one))"
try:
self.exclude_list.add(regex)
except Exception as e:
# Make sure we raise a re.error so that the interface can process it
eq_(type(e), error)
added = self.exclude_list.mark(regex)
eq_(added, False)
eq_(len(self.exclude_list), 0)
eq_(len(self.exclude_list.compiled), 0)
compiled_files = [x for x in self.exclude_list.compiled_files]
eq_(len(compiled_files), 0)
def test_force_add_not_compilable(self):
"""Used when loading from XML for example"""
regex = r"one))"
try:
self.exclude_list.add(regex, forced=True)
except Exception as e:
# Should not get an exception here unless it's a duplicate regex
raise e
marked = self.exclude_list.mark(regex)
eq_(marked, False) # can't be marked since not compilable
eq_(len(self.exclude_list), 1)
eq_(len(self.exclude_list.compiled), 0)
compiled_files = [x for x in self.exclude_list.compiled_files]
eq_(len(compiled_files), 0)
# adding a duplicate
regex = r"one))"
try:
self.exclude_list.add(regex, forced=True)
except Exception as e:
# we should have this exception, and it shouldn't be added
assert type(e) is AlreadyThereException
eq_(len(self.exclude_list), 1)
eq_(len(self.exclude_list.compiled), 0)
def test_rename_regex(self):
regex = r"one"
self.exclude_list.add(regex)
self.exclude_list.mark(regex)
regex_renamed = r"one))"
# Not compilable, can't be marked
self.exclude_list.rename(regex, regex_renamed)
assert regex not in self.exclude_list
assert regex_renamed in self.exclude_list
eq_(self.exclude_list.is_marked(regex_renamed), False)
self.exclude_list.mark(regex_renamed)
eq_(self.exclude_list.is_marked(regex_renamed), False)
regex_renamed_compilable = r"two"
self.exclude_list.rename(regex_renamed, regex_renamed_compilable)
assert regex_renamed_compilable in self.exclude_list
eq_(self.exclude_list.is_marked(regex_renamed), False)
self.exclude_list.mark(regex_renamed_compilable)
eq_(self.exclude_list.is_marked(regex_renamed_compilable), True)
eq_(len(self.exclude_list), 1)
# Should still be marked after rename
regex_compilable = r"three"
self.exclude_list.rename(regex_renamed_compilable, regex_compilable)
eq_(self.exclude_list.is_marked(regex_compilable), True)
def test_restore_default(self):
"""Only unmark previously added regexes and mark the pre-defined ones"""
regex = r"one"
self.exclude_list.add(regex)
self.exclude_list.mark(regex)
self.exclude_list.restore_defaults()
eq_(len(default_regexes), self.exclude_list.marked_count)
# added regex shouldn't be marked
eq_(self.exclude_list.is_marked(regex), False)
# added regex shouldn't be in compiled list either
compiled = [x for x in self.exclude_list.compiled]
assert regex not in compiled
# Only default regexes marked and in compiled list
for re in default_regexes:
assert self.exclude_list.is_marked(re)
found = False
for compiled_re in compiled:
if compiled_re.pattern == re:
found = True
if not found:
raise(Exception(f"Default RE {re} not found in compiled list."))
continue
eq_(len(default_regexes), len(self.exclude_list.compiled))
class TestCaseDictEmpty(TestCaseListEmpty):
"""Same, but with dictionary implementation"""
def setup_method(self, method):
self.app = DupeGuru()
self.app.exclude_list = ExcludeDict()
self.exclude_list = self.app.exclude_list
def split_combined(pattern_object):
"""Returns list of strings for each combined pattern"""
return [x for x in pattern_object.pattern.split("|")]
class TestCaseCompiledList():
"""Test consistency between combined or not"""
def setup_method(self, method):
self.e_separate = ExcludeList(combined_regex=False)
self.e_separate.restore_defaults()
self.e_combined = ExcludeList(combined_regex=True)
self.e_combined.restore_defaults()
def test_same_number_of_expressions(self):
# We only get one combined Pattern item in a tuple, which is made of however many parts
eq_(len(split_combined(self.e_combined.compiled[0])), len(default_regexes))
# We get as many as there are marked items
eq_(len(self.e_separate.compiled), len(default_regexes))
exprs = split_combined(self.e_combined.compiled[0])
# We should have the same number and the same expressions
eq_(len(exprs), len(self.e_separate.compiled))
for expr in self.e_separate.compiled:
assert expr.pattern in exprs
def test_compiled_files(self):
# test is separator is indeed checked properly to yield the output
regex1 = r"test/one/sub"
self.e_separate.add(regex1)
self.e_separate.mark(regex1)
self.e_combined.add(regex1)
self.e_combined.mark(regex1)
separate_compiled_dirs = self.e_separate.compiled
separate_compiled_files = [x for x in self.e_separate.compiled_files]
# HACK we need to call compiled property FIRST to generate the cache
combined_compiled_dirs = self.e_combined.compiled
# print(f"type: {type(self.e_combined.compiled_files[0])}")
# A generator returning only one item... ugh
combined_compiled_files = [x for x in self.e_combined.compiled_files][0]
print(f"compiled files: {combined_compiled_files}")
# Separate should give several plus the one added
eq_(len(separate_compiled_dirs), len(default_regexes) + 1)
# regex1 shouldn't be in the "files" version
eq_(len(separate_compiled_files), len(default_regexes))
# Only one Pattern returned, which when split should be however many + 1
eq_(len(split_combined(combined_compiled_dirs[0])), len(default_regexes) + 1)
# regex1 shouldn't be here either
eq_(len(split_combined(combined_compiled_files)), len(default_regexes))
class TestCaseCompiledDict(TestCaseCompiledList):
def setup_method(self, method):
self.e_separate = ExcludeDict(combined_regex=False)
self.e_separate.restore_defaults()
self.e_combined = ExcludeDict(combined_regex=True)
self.e_combined.restore_defaults()

View File

@ -10,7 +10,7 @@ setenv =
PYTHON="{envpython}" PYTHON="{envpython}"
commands = commands =
make modules make modules
{posargs:py.test} core hscommon {posargs:py.test core hscommon}
deps = deps =
-r{toxinidir}/requirements.txt -r{toxinidir}/requirements.txt
-r{toxinidir}/requirements-extra.txt -r{toxinidir}/requirements-extra.txt