Add test suite, fix bugs
This commit is contained in:
parent
26d18945b1
commit
4a1641e39d
|
@ -26,7 +26,7 @@ from .pe.photo import get_delta_dimensions
|
|||
from .util import cmp_value, fix_surrogate_encoding
|
||||
from . import directories, results, export, fs, prioritize
|
||||
from .ignore import IgnoreList
|
||||
from .exclude import ExcludeList as ExcludeList
|
||||
from .exclude import ExcludeDict as ExcludeList
|
||||
from .scanner import ScanType
|
||||
from .gui.deletion_options import DeletionOptions
|
||||
from .gui.details_panel import DetailsPanel
|
||||
|
|
|
@ -80,13 +80,12 @@ class Directories:
|
|||
# ---Private
|
||||
def _default_state_for_path(self, path):
|
||||
# New logic with regex filters
|
||||
if self._exclude_list is not None and len(self._exclude_list) > 0:
|
||||
if self._exclude_list is not None and self._exclude_list.mark_count > 0:
|
||||
# We iterate even if we only have one item here
|
||||
for denied_path_re in self._exclude_list.compiled_combined:
|
||||
if denied_path_re.match(str(path)):
|
||||
for denied_path_re in self._exclude_list.compiled:
|
||||
if denied_path_re.match(str(path.name)):
|
||||
return DirectoryState.Excluded
|
||||
return None
|
||||
# Old default logic, still used during initialization of DirectoryTree:
|
||||
# return # We still use the old logic to force state on hidden dirs
|
||||
# Override this in subclasses to specify the state of some special folders.
|
||||
if path.name.startswith("."):
|
||||
return DirectoryState.Excluded
|
||||
|
@ -95,7 +94,7 @@ class Directories:
|
|||
for root, dirs, files in os.walk(str(from_path)):
|
||||
j.check_if_cancelled()
|
||||
rootPath = Path(root)
|
||||
state = self.get_state(root)
|
||||
state = self.get_state(rootPath)
|
||||
if state == DirectoryState.Excluded:
|
||||
# Recursively get files from folders with lots of subfolder is expensive. However, there
|
||||
# might be a subfolder in this path that is not excluded. What we want to do is to skim
|
||||
|
@ -105,16 +104,22 @@ class Directories:
|
|||
try:
|
||||
if state != DirectoryState.Excluded:
|
||||
# Old logic
|
||||
if self._exclude_list is None or not len(self._exclude_list):
|
||||
if self._exclude_list is None or not self._exclude_list.mark_count:
|
||||
found_files = [fs.get_file(rootPath + f, fileclasses=fileclasses) for f in files]
|
||||
else:
|
||||
found_files = []
|
||||
# print(f"len of files: {len(files)} {files}")
|
||||
for f in files:
|
||||
found = False
|
||||
for expr in self._exclude_list.compiled_files_combined:
|
||||
found = expr.match(f)
|
||||
if found:
|
||||
for expr in self._exclude_list.compiled_files:
|
||||
if expr.match(f):
|
||||
found = True
|
||||
break
|
||||
if not found:
|
||||
for expr in self._exclude_list.compiled_paths:
|
||||
if expr.match(root + os.sep + f):
|
||||
found = True
|
||||
break
|
||||
if not found:
|
||||
found_files.append(fs.get_file(rootPath + f, fileclasses=fileclasses))
|
||||
found_files = [f for f in found_files if f is not None]
|
||||
|
@ -215,8 +220,14 @@ class Directories:
|
|||
if path in self.states:
|
||||
return self.states[path]
|
||||
state = self._default_state_for_path(path) or DirectoryState.Normal
|
||||
# Save non-default states in cache, necessary for _get_files()
|
||||
if state != DirectoryState.Normal:
|
||||
self.states[path] = state
|
||||
return state
|
||||
|
||||
prevlen = 0
|
||||
# we loop through the states to find the longest matching prefix
|
||||
# if the parent has a state in cache, return that state
|
||||
for p, s in self.states.items():
|
||||
if p.is_parent_of(path) and len(p) > prevlen:
|
||||
prevlen = len(p)
|
||||
|
|
197
core/exclude.py
197
core/exclude.py
|
@ -5,7 +5,8 @@
|
|||
from .markable import Markable
|
||||
from xml.etree import ElementTree as ET
|
||||
# TODO: perhaps use regex module for better Unicode support? https://pypi.org/project/regex/
|
||||
# or perhaps also https://pypi.org/project/re2/
|
||||
# also https://pypi.org/project/re2/
|
||||
# TODO update the Result list with newly added regexes if possible
|
||||
import re
|
||||
from os import sep
|
||||
import logging
|
||||
|
@ -13,8 +14,14 @@ import functools
|
|||
from hscommon.util import FileOrPath
|
||||
import time
|
||||
|
||||
default_regexes = [r".*thumbs", r"\.DS.Store", r"\.Trash", r".*Trash-Bin"]
|
||||
forbidden_regexes = [r".*", r"\/.*", r".*\/.*"]
|
||||
default_regexes = [r"^thumbs\.db$", # Obsolete after WindowsXP
|
||||
r"^\.DS_Store$", # MacOS metadata
|
||||
r"^\.Trash\-.*", # Linux trash directories
|
||||
r"^\$Recycle\.Bin$", # Windows
|
||||
r"^\..*" # Hidden files
|
||||
]
|
||||
# These are too agressive
|
||||
forbidden_regexes = [r".*", r"\/.*", r".*\/.*", r".*\..*"]
|
||||
|
||||
|
||||
def timer(func):
|
||||
|
@ -59,36 +66,37 @@ class ExcludeList(Markable):
|
|||
# ---Override
|
||||
def __init__(self, combined_regex=False):
|
||||
Markable.__init__(self)
|
||||
self._combined_regex = combined_regex
|
||||
self._use_combined = combined_regex
|
||||
self._excluded = []
|
||||
self._count = 0
|
||||
self._excluded_compiled = set()
|
||||
self._dirty = True
|
||||
|
||||
def __debug_test(self):
|
||||
self.test_regexes = [
|
||||
r".*Recycle\.Bin$", r"denyme.*", r".*denyme", r".*/test/denyme*",
|
||||
r".*/test/*denyme", r"denyme", r".*\/\..*", r"^\..*"]
|
||||
for regex in self.test_regexes:
|
||||
try:
|
||||
self.add(regex)
|
||||
except Exception as e:
|
||||
print(f"Exception loading test regex {regex}: {e}")
|
||||
continue
|
||||
try:
|
||||
self.mark(regex)
|
||||
except Exception as e:
|
||||
print(f"Exception marking test regex {regex}: {e}")
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterate in order."""
|
||||
for item in self._excluded:
|
||||
regex = item[0]
|
||||
yield self.is_marked(regex), regex
|
||||
|
||||
def __contains__(self, item):
|
||||
return self.isExcluded(item)
|
||||
|
||||
def __len__(self):
|
||||
"""Returns the number of marked regexes."""
|
||||
return len([x for marked, x in self if marked])
|
||||
"""Returns the total number of regexes regardless of mark status."""
|
||||
return len(self._excluded)
|
||||
|
||||
def __getitem__(self, key):
|
||||
for item in self._excluded:
|
||||
if item[0] == key:
|
||||
return item
|
||||
raise KeyError(f"Key {key} is not in exclusion list.")
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
# TODO if necessary
|
||||
pass
|
||||
|
||||
def __delitem__(self, key):
|
||||
# TODO if necessary
|
||||
pass
|
||||
|
||||
def is_markable(self, regex):
|
||||
return self._is_markable(regex)
|
||||
|
@ -98,7 +106,7 @@ class ExcludeList(Markable):
|
|||
for item in self._excluded:
|
||||
if item[0] == regex:
|
||||
return item[1]
|
||||
return False # should not be needed
|
||||
return False # should not be necessary, regex SHOULD be in there
|
||||
|
||||
def _did_mark(self, regex):
|
||||
self._add_compiled(regex)
|
||||
|
@ -107,17 +115,19 @@ class ExcludeList(Markable):
|
|||
self._remove_compiled(regex)
|
||||
|
||||
def _add_compiled(self, regex):
|
||||
if self._combined_regex:
|
||||
self._dirty = True
|
||||
self._dirty = True
|
||||
if self._use_combined:
|
||||
return
|
||||
for item in self._excluded:
|
||||
# FIXME probably faster to just rebuild the set from the compiled instead of comparing strings
|
||||
if item[0] == regex:
|
||||
# no need to test if already present since it's a set()
|
||||
self._excluded_compiled.add(item[3])
|
||||
break
|
||||
|
||||
def _remove_compiled(self, regex):
|
||||
if self._combined_regex:
|
||||
self._dirty = True
|
||||
self._dirty = True
|
||||
if self._use_combined:
|
||||
return
|
||||
for item in self._excluded_compiled:
|
||||
if regex in item.pattern:
|
||||
|
@ -148,44 +158,65 @@ class ExcludeList(Markable):
|
|||
if item[0] == regex:
|
||||
return item[2]
|
||||
|
||||
def build_compiled_caches(self, combined=False):
|
||||
if not combined:
|
||||
self._cached_compiled_files =\
|
||||
[x for x in self._excluded_compiled if sep not in x.pattern]
|
||||
self._cached_compiled_paths =\
|
||||
[x for x in self._excluded_compiled if sep in x.pattern]
|
||||
return
|
||||
# HACK returned as a tuple to get a free iterator to keep interface the same
|
||||
# regardless of whether the client asked for combined or not
|
||||
marked_count = [x for marked, x in self if marked]
|
||||
# If there is no item, the compiled Pattern will be '' and match everything!
|
||||
if not marked_count:
|
||||
self._cached_compiled_combined_all = []
|
||||
self._cached_compiled_combined_files = []
|
||||
self._cached_compiled_combined_paths = []
|
||||
else:
|
||||
self._cached_compiled_combined_all =\
|
||||
(re.compile('|'.join(marked_count)),)
|
||||
files_marked = [x for x in marked_count if sep not in x]
|
||||
if not files_marked:
|
||||
self._cached_compiled_combined_files = tuple()
|
||||
else:
|
||||
self._cached_compiled_combined_files =\
|
||||
(re.compile('|'.join(files_marked)),)
|
||||
paths_marked = [x for x in marked_count if sep in x]
|
||||
if not paths_marked:
|
||||
self._cached_compiled_combined_paths = tuple()
|
||||
else:
|
||||
self._cached_compiled_combined_paths =\
|
||||
(re.compile('|'.join(paths_marked)),)
|
||||
|
||||
@property
|
||||
def compiled(self):
|
||||
"""Should be used by other classes to retrieve the up-to-date list of patterns."""
|
||||
if not self._combined_regex:
|
||||
return self._excluded_compiled
|
||||
else:
|
||||
return self.compiled_combined
|
||||
if self._use_combined:
|
||||
if self._dirty:
|
||||
self.build_compiled_caches(True)
|
||||
self._dirty = False
|
||||
return self._cached_compiled_combined_all
|
||||
return self._excluded_compiled
|
||||
|
||||
@property
|
||||
def compiled_files(self):
|
||||
"""Should be used by other classes to retrieve the up-to-date list of patterns
|
||||
for files only."""
|
||||
if not self._combined_regex:
|
||||
# Return each compiled element separately
|
||||
# return [compiled_pattern for compiled_pattern in self.compiled if sep not in compiled_pattern.pattern]
|
||||
for compiled in self.compiled:
|
||||
if sep not in compiled.pattern:
|
||||
yield compiled
|
||||
else:
|
||||
return self.compiled_files_combined
|
||||
|
||||
@property
|
||||
def compiled_combined(self):
|
||||
"""When matching against filenames only, we probably won't be seeing any
|
||||
directory separator, so we filter out regexes with os.sep in them.
|
||||
The interface should be expected to be a generator, even if it returns only
|
||||
one item (one Pattern in the combined case)."""
|
||||
if self._dirty:
|
||||
self._cached_compiled_combined =\
|
||||
re.compile('|'.join(x for marked, x in self if marked))
|
||||
# Must compute the filtered out version as well
|
||||
self._cached_compiled_combined_files =\
|
||||
re.compile('|'.join(x for marked, x in self
|
||||
if marked and sep not in x))
|
||||
self.build_compiled_caches(True if self._use_combined else False)
|
||||
self._dirty = False
|
||||
# returned as a tuple to get a free iterator and to avoid subclassing
|
||||
return (self._cached_compiled_combined,)
|
||||
return self._cached_compiled_combined_files if self._use_combined else self._cached_compiled_files
|
||||
|
||||
@property
|
||||
def compiled_files_combined(self):
|
||||
# returned as a tuple to get a free iterator and to avoid subclassing
|
||||
return (self._cached_compiled_combined_files,)
|
||||
def compiled_paths(self):
|
||||
"""Returns patterns with only separators in them, for more precise filtering."""
|
||||
if self._dirty:
|
||||
self.build_compiled_caches(True if self._use_combined else False)
|
||||
self._dirty = False
|
||||
return self._cached_compiled_combined_paths if self._use_combined else self._cached_compiled_paths
|
||||
|
||||
# ---Public
|
||||
def add(self, regex, forced=False):
|
||||
|
@ -206,7 +237,11 @@ class ExcludeList(Markable):
|
|||
def _do_add(self, regex, iscompilable, exception, compiled):
|
||||
# We need to insert at the top
|
||||
self._excluded.insert(0, [regex, iscompilable, exception, compiled])
|
||||
# self._count = len(self._excluded)
|
||||
|
||||
@property
|
||||
def marked_count(self):
|
||||
"""Returns the number of marked regexes only."""
|
||||
return len([x for marked, x in self if marked])
|
||||
|
||||
def isExcluded(self, regex):
|
||||
for item in self._excluded:
|
||||
|
@ -215,6 +250,7 @@ class ExcludeList(Markable):
|
|||
return False
|
||||
|
||||
def clear(self):
|
||||
"""Not used and needs refactoring"""
|
||||
self._excluded = []
|
||||
|
||||
def remove(self, regex):
|
||||
|
@ -224,25 +260,24 @@ class ExcludeList(Markable):
|
|||
self._remove_compiled(regex)
|
||||
|
||||
def rename(self, regex, newregex):
|
||||
# if regex not in self._excluded or regex == newregex:
|
||||
# return
|
||||
# if regex not in self._excluded: return
|
||||
if regex == newregex:
|
||||
return
|
||||
found = False
|
||||
for item in self._excluded:
|
||||
if regex == item[0]:
|
||||
found = True
|
||||
break
|
||||
if not found:
|
||||
return
|
||||
|
||||
was_marked = self.is_marked(regex)
|
||||
is_compilable, exception, compiled = self.compile_re(newregex)
|
||||
was_marked = False
|
||||
is_compilable = False
|
||||
for item in self._excluded:
|
||||
if item[0] == regex:
|
||||
found = True
|
||||
was_marked = self.is_marked(regex)
|
||||
is_compilable, exception, compiled = self.compile_re(newregex)
|
||||
# We overwrite the found entry
|
||||
self._excluded[self._excluded.index(item)] =\
|
||||
[newregex, is_compilable, exception, compiled]
|
||||
self._remove_compiled(regex)
|
||||
break
|
||||
if not found:
|
||||
return
|
||||
if is_compilable and was_marked:
|
||||
# Not marked by default when added, add it back
|
||||
self.mark(newregex)
|
||||
|
@ -271,7 +306,6 @@ class ExcludeList(Markable):
|
|||
except Exception as e:
|
||||
logging.warning(f"Error while loading {infile}: {e}")
|
||||
self.restore_defaults()
|
||||
self.__debug_test()
|
||||
return e
|
||||
|
||||
marked = set()
|
||||
|
@ -291,7 +325,6 @@ class ExcludeList(Markable):
|
|||
|
||||
for item in marked:
|
||||
self.mark(item)
|
||||
self.__debug_test()
|
||||
|
||||
def save_to_xml(self, outfile):
|
||||
"""Create a XML file that can be used by load_from_xml.
|
||||
|
@ -314,13 +347,14 @@ class ExcludeDict(ExcludeList):
|
|||
to keep the index of each string-key as its sub-element and keep it updated
|
||||
whenever insert/remove is done."""
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, combined_regex=False):
|
||||
Markable.__init__(self)
|
||||
self._use_combined = combined_regex
|
||||
# { "regex": { "index": int, "compilable": bool, "error": str, "compiled": Pattern or None}}
|
||||
# Note: "compilable" key should only be updated on add / rename
|
||||
self._excluded = {}
|
||||
self._count = 0
|
||||
self._excluded_compiled = set()
|
||||
self._dirty = True
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterate in order."""
|
||||
|
@ -338,8 +372,8 @@ class ExcludeDict(ExcludeList):
|
|||
return False
|
||||
|
||||
def _add_compiled(self, regex):
|
||||
if self._combined_regex:
|
||||
self._dirty = True
|
||||
self._dirty = True
|
||||
if self._use_combined:
|
||||
return
|
||||
try:
|
||||
self._excluded_compiled.add(self._excluded[regex]["compiled"])
|
||||
|
@ -360,8 +394,12 @@ class ExcludeDict(ExcludeList):
|
|||
# We always insert at the top, so index should be 0 and other indices should be pushed by one
|
||||
for value in self._excluded.values():
|
||||
value["index"] += 1
|
||||
self._excluded[regex] = {"index": 0, "compilable": iscompilable, "error": exception, "compiled": compiled}
|
||||
# self._count = len(self._excluded)
|
||||
self._excluded[regex] = {
|
||||
"index": 0,
|
||||
"compilable": iscompilable,
|
||||
"error": exception,
|
||||
"compiled": compiled
|
||||
}
|
||||
|
||||
def isExcluded(self, regex):
|
||||
if regex in self._excluded.keys():
|
||||
|
@ -369,6 +407,7 @@ class ExcludeDict(ExcludeList):
|
|||
return False
|
||||
|
||||
def clear(self):
|
||||
"""Not used, need refactoring"""
|
||||
self._excluded = {}
|
||||
|
||||
def remove(self, regex):
|
||||
|
@ -391,7 +430,13 @@ class ExcludeDict(ExcludeList):
|
|||
was_marked = self.is_marked(regex)
|
||||
previous = self._excluded.pop(regex)
|
||||
iscompilable, error, compiled = self.compile_re(newregex)
|
||||
self._excluded[newregex] = {"index": previous["index"], "compilable": iscompilable, "error": error, "compiled": compiled}
|
||||
self._excluded[newregex] = {
|
||||
"index": previous["index"],
|
||||
"compilable": iscompilable,
|
||||
"error": error,
|
||||
"compiled": compiled
|
||||
}
|
||||
self._remove_compiled(regex)
|
||||
if was_marked and iscompilable:
|
||||
self.mark(newregex)
|
||||
|
||||
|
|
11
core/fs.py
11
core/fs.py
|
@ -245,7 +245,7 @@ class Folder(File):
|
|||
return not path.islink() and path.isdir()
|
||||
|
||||
|
||||
def get_file(path, fileclasses=[File], deny_list_re=set()):
|
||||
def get_file(path, fileclasses=[File]):
|
||||
"""Wraps ``path`` around its appropriate :class:`File` class.
|
||||
|
||||
Whether a class is "appropriate" is decided by :meth:`File.can_handle`
|
||||
|
@ -255,15 +255,10 @@ def get_file(path, fileclasses=[File], deny_list_re=set()):
|
|||
"""
|
||||
for fileclass in fileclasses:
|
||||
if fileclass.can_handle(path):
|
||||
# print(f"returning {path}")
|
||||
# for expr in deny_list_re:
|
||||
# if expr.match(str(path.name)):
|
||||
# print(f"FOUND {repr(expr)} in {str(path.name)}")
|
||||
# return
|
||||
return fileclass(path)
|
||||
|
||||
|
||||
def get_files(path, fileclasses=[File], deny_list_re=set()):
|
||||
def get_files(path, fileclasses=[File]):
|
||||
"""Returns a list of :class:`File` for each file contained in ``path``.
|
||||
|
||||
:param Path path: path to scan
|
||||
|
@ -273,7 +268,7 @@ def get_files(path, fileclasses=[File], deny_list_re=set()):
|
|||
try:
|
||||
result = []
|
||||
for path in path.listdir():
|
||||
file = get_file(path, fileclasses=fileclasses, deny_list_re=deny_list_re)
|
||||
file = get_file(path, fileclasses=fileclasses)
|
||||
if file is not None:
|
||||
result.append(file)
|
||||
return result
|
||||
|
|
|
@ -20,6 +20,7 @@ from ..directories import (
|
|||
AlreadyThereError,
|
||||
InvalidPathError,
|
||||
)
|
||||
from ..exclude import ExcludeList, ExcludeDict
|
||||
|
||||
|
||||
def create_fake_fs(rootpath):
|
||||
|
@ -323,7 +324,7 @@ def test_get_state_returns_excluded_by_default_for_hidden_directories(tmpdir):
|
|||
def test_default_path_state_override(tmpdir):
|
||||
# It's possible for a subclass to override the default state of a path
|
||||
class MyDirectories(Directories):
|
||||
def _default_state_for_path(self, path, denylist):
|
||||
def _default_state_for_path(self, path):
|
||||
if "foobar" in path:
|
||||
return DirectoryState.Excluded
|
||||
|
||||
|
@ -343,52 +344,193 @@ def test_default_path_state_override(tmpdir):
|
|||
eq_(len(list(d.get_files())), 2)
|
||||
|
||||
|
||||
def test_exclude_list_regular_expressions(tmpdir):
|
||||
d = Directories()
|
||||
d.deny_list_str.clear()
|
||||
d.deny_list_re.clear()
|
||||
d.deny_list_re_files.clear()
|
||||
# This should only exlude the directory, but not the contained files if
|
||||
# its status is set to normal after loading it in the directory tree
|
||||
d.deny_list_str.add(r".*Recycle\.Bin$")
|
||||
d.deny_list_str.add(r"denyme.*")
|
||||
# d.deny_list_str.add(r".*denymetoo")
|
||||
# d.deny_list_str.add(r"denyme")
|
||||
d.deny_list_str.add(r".*\/\..*")
|
||||
d.deny_list_str.add(r"^\..*")
|
||||
d.compile_re()
|
||||
p1 = Path(str(tmpdir))
|
||||
# Should be ignored on Windows only (by default)
|
||||
p1["Recycle.Bin"].mkdir()
|
||||
p1["Recycle.Bin/somerecycledfile"].open("w").close()
|
||||
class TestExcludeList():
|
||||
def setup_method(self, method):
|
||||
self.d = Directories(exclude_list=ExcludeList(combined_regex=False))
|
||||
|
||||
p1["denyme_blah.txt"].open("w").close()
|
||||
p1["blah_denymetoo"].open("w").close()
|
||||
p1["blah_denyme"].open("w").close()
|
||||
def get_files_and_expect_num_result(self, num_result):
|
||||
"""Calls get_files(), get the filenames only, print for debugging.
|
||||
num_result is how many files are expected as a result."""
|
||||
print(f"EXCLUDED REGEX: paths {self.d._exclude_list.compiled_paths} \
|
||||
files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled}")
|
||||
files = list(self.d.get_files())
|
||||
files = [file.name for file in files]
|
||||
print(f"FINAL FILES {files}")
|
||||
eq_(len(files), num_result)
|
||||
return files
|
||||
|
||||
p1[".hidden_file"].open("w").close()
|
||||
p1[".hidden_dir"].mkdir()
|
||||
p1[".hidden_dir/somenormalfile1"].open("w").close()
|
||||
p1[".hidden_dir/somenormalfile2_denyme"].open("w").close()
|
||||
def test_exclude_recycle_bin_by_default(self, tmpdir):
|
||||
regex = r"^.*Recycle\.Bin$"
|
||||
self.d._exclude_list.add(regex)
|
||||
self.d._exclude_list.mark(regex)
|
||||
p1 = Path(str(tmpdir))
|
||||
p1["$Recycle.Bin"].mkdir()
|
||||
p1["$Recycle.Bin"]["subdir"].mkdir()
|
||||
self.d.add_path(p1)
|
||||
eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.Excluded)
|
||||
# By default, subdirs should be excluded too, but this can be overriden separately
|
||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Excluded)
|
||||
self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.Normal)
|
||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
|
||||
|
||||
p1["foobar"].mkdir()
|
||||
p1["foobar/somefile"].open("w").close()
|
||||
d.add_path(p1)
|
||||
eq_(d.get_state(p1["Recycle.Bin"]), DirectoryState.Excluded)
|
||||
eq_(d.get_state(p1["foobar"]), DirectoryState.Normal)
|
||||
files = list(d.get_files())
|
||||
files = [file.name for file in files]
|
||||
print(f"first files: {files}")
|
||||
assert "somerecycledfile" not in files
|
||||
assert "denyme_blah.txt" not in files
|
||||
assert ".hidden_file" not in files
|
||||
assert "somefile1" not in files
|
||||
assert "somefile2_denyme" not in files
|
||||
# Overriding the default state from the Directory Tree
|
||||
d.set_state(p1["Recycle.Bin"], DirectoryState.Normal)
|
||||
d.set_state(p1[".hidden_dir"], DirectoryState.Normal)
|
||||
files = list(d.get_files())
|
||||
files = [file.name for file in files]
|
||||
print(f"second files: {files}")
|
||||
assert "somerecycledfile" in files
|
||||
assert "somenormalfile1" in files
|
||||
def test_exclude_refined(self, tmpdir):
|
||||
regex1 = r"^\$Recycle\.Bin$"
|
||||
self.d._exclude_list.add(regex1)
|
||||
self.d._exclude_list.mark(regex1)
|
||||
p1 = Path(str(tmpdir))
|
||||
p1["$Recycle.Bin"].mkdir()
|
||||
p1["$Recycle.Bin"]["somefile.png"].open("w").close()
|
||||
p1["$Recycle.Bin"]["some_unwanted_file.jpg"].open("w").close()
|
||||
p1["$Recycle.Bin"]["subdir"].mkdir()
|
||||
p1["$Recycle.Bin"]["subdir"]["somesubdirfile.png"].open("w").close()
|
||||
p1["$Recycle.Bin"]["subdir"]["unwanted_subdirfile.gif"].open("w").close()
|
||||
p1["$Recycle.Bin"]["subdar"].mkdir()
|
||||
p1["$Recycle.Bin"]["subdar"]["somesubdarfile.jpeg"].open("w").close()
|
||||
p1["$Recycle.Bin"]["subdar"]["unwanted_subdarfile.png"].open("w").close()
|
||||
self.d.add_path(p1["$Recycle.Bin"])
|
||||
|
||||
# Filter should set the default state to Excluded
|
||||
eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.Excluded)
|
||||
# The subdir should inherit its parent state
|
||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Excluded)
|
||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.Excluded)
|
||||
# Override a child path's state
|
||||
self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.Normal)
|
||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
|
||||
# Parent should keep its default state, and the other child too
|
||||
eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.Excluded)
|
||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.Excluded)
|
||||
# print(f"get_folders(): {[x for x in self.d.get_folders()]}")
|
||||
|
||||
# only the 2 files directly under the Normal directory
|
||||
files = self.get_files_and_expect_num_result(2)
|
||||
assert "somefile.png" not in files
|
||||
assert "some_unwanted_file.jpg" not in files
|
||||
assert "somesubdarfile.jpeg" not in files
|
||||
assert "unwanted_subdarfile.png" not in files
|
||||
assert "somesubdirfile.png" in files
|
||||
assert "unwanted_subdirfile.gif" in files
|
||||
# Overriding the parent should enable all children
|
||||
self.d.set_state(p1["$Recycle.Bin"], DirectoryState.Normal)
|
||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.Normal)
|
||||
# all files there
|
||||
files = self.get_files_and_expect_num_result(6)
|
||||
assert "somefile.png" in files
|
||||
assert "some_unwanted_file.jpg" in files
|
||||
|
||||
# This should still filter out files under directory, despite the Normal state
|
||||
regex2 = r".*unwanted.*"
|
||||
self.d._exclude_list.add(regex2)
|
||||
self.d._exclude_list.mark(regex2)
|
||||
files = self.get_files_and_expect_num_result(3)
|
||||
assert "somefile.png" in files
|
||||
assert "some_unwanted_file.jpg" not in files
|
||||
assert "unwanted_subdirfile.gif" not in files
|
||||
assert "unwanted_subdarfile.png" not in files
|
||||
|
||||
regex3 = r".*Recycle\.Bin\/.*unwanted.*subdirfile.*"
|
||||
self.d._exclude_list.rename(regex2, regex3)
|
||||
assert self.d._exclude_list.error(regex3) is None
|
||||
# print(f"get_folders(): {[x for x in self.d.get_folders()]}")
|
||||
# Directory shouldn't change its state here, unless explicitely done by user
|
||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
|
||||
files = self.get_files_and_expect_num_result(5)
|
||||
assert "unwanted_subdirfile.gif" not in files
|
||||
assert "unwanted_subdarfile.png" in files
|
||||
|
||||
# using end of line character should only filter the directory, or file ending with subdir
|
||||
regex4 = r".*subdir$"
|
||||
self.d._exclude_list.rename(regex3, regex4)
|
||||
assert self.d._exclude_list.error(regex4) is None
|
||||
p1["$Recycle.Bin"]["subdar"]["file_ending_with_subdir"].open("w").close()
|
||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Excluded)
|
||||
files = self.get_files_and_expect_num_result(4)
|
||||
assert "file_ending_with_subdir" not in files
|
||||
assert "somesubdarfile.jpeg" in files
|
||||
assert "somesubdirfile.png" not in files
|
||||
assert "unwanted_subdirfile.gif" not in files
|
||||
self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.Normal)
|
||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
|
||||
# print(f"get_folders(): {[x for x in self.d.get_folders()]}")
|
||||
files = self.get_files_and_expect_num_result(6)
|
||||
assert "file_ending_with_subdir" not in files
|
||||
assert "somesubdirfile.png" in files
|
||||
assert "unwanted_subdirfile.gif" in files
|
||||
|
||||
regex5 = r".*subdir.*"
|
||||
self.d._exclude_list.rename(regex4, regex5)
|
||||
# Files containing substring should be filtered
|
||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
|
||||
# The path should not match, only the filename, the "subdir" in the directory name shouldn't matter
|
||||
p1["$Recycle.Bin"]["subdir"]["file_which_shouldnt_match"].open("w").close()
|
||||
files = self.get_files_and_expect_num_result(5)
|
||||
assert "somesubdirfile.png" not in files
|
||||
assert "unwanted_subdirfile.gif" not in files
|
||||
assert "file_ending_with_subdir" not in files
|
||||
assert "file_which_shouldnt_match" in files
|
||||
|
||||
def test_japanese_unicode(self, tmpdir):
|
||||
p1 = Path(str(tmpdir))
|
||||
p1["$Recycle.Bin"].mkdir()
|
||||
p1["$Recycle.Bin"]["somerecycledfile.png"].open("w").close()
|
||||
p1["$Recycle.Bin"]["some_unwanted_file.jpg"].open("w").close()
|
||||
p1["$Recycle.Bin"]["subdir"].mkdir()
|
||||
p1["$Recycle.Bin"]["subdir"]["過去白濁物語~]_カラー.jpg"].open("w").close()
|
||||
p1["$Recycle.Bin"]["思叫物語"].mkdir()
|
||||
p1["$Recycle.Bin"]["思叫物語"]["なししろ会う前"].open("w").close()
|
||||
p1["$Recycle.Bin"]["思叫物語"]["堂~ロ"].open("w").close()
|
||||
self.d.add_path(p1["$Recycle.Bin"])
|
||||
regex3 = r".*物語.*"
|
||||
self.d._exclude_list.add(regex3)
|
||||
self.d._exclude_list.mark(regex3)
|
||||
# print(f"get_folders(): {[x for x in self.d.get_folders()]}")
|
||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["思叫物語"]), DirectoryState.Excluded)
|
||||
files = self.get_files_and_expect_num_result(2)
|
||||
assert "過去白濁物語~]_カラー.jpg" not in files
|
||||
assert "なししろ会う前" not in files
|
||||
assert "堂~ロ" not in files
|
||||
# using end of line character should only filter that directory, not affecting its files
|
||||
regex4 = r".*物語$"
|
||||
self.d._exclude_list.rename(regex3, regex4)
|
||||
assert self.d._exclude_list.error(regex4) is None
|
||||
self.d.set_state(p1["$Recycle.Bin"]["思叫物語"], DirectoryState.Normal)
|
||||
files = self.get_files_and_expect_num_result(5)
|
||||
assert "過去白濁物語~]_カラー.jpg" in files
|
||||
assert "なししろ会う前" in files
|
||||
assert "堂~ロ" in files
|
||||
|
||||
def test_get_state_returns_excluded_for_hidden_directories_and_files(self, tmpdir):
|
||||
# This regex only work for files, not paths
|
||||
regex = r"^\..*$"
|
||||
self.d._exclude_list.add(regex)
|
||||
self.d._exclude_list.mark(regex)
|
||||
p1 = Path(str(tmpdir))
|
||||
p1["foobar"].mkdir()
|
||||
p1["foobar"][".hidden_file.txt"].open("w").close()
|
||||
p1["foobar"][".hidden_dir"].mkdir()
|
||||
p1["foobar"][".hidden_dir"]["foobar.jpg"].open("w").close()
|
||||
p1["foobar"][".hidden_dir"][".hidden_subfile.png"].open("w").close()
|
||||
self.d.add_path(p1["foobar"])
|
||||
# It should not inherit its parent's state originally
|
||||
eq_(self.d.get_state(p1["foobar"][".hidden_dir"]), DirectoryState.Excluded)
|
||||
self.d.set_state(p1["foobar"][".hidden_dir"], DirectoryState.Normal)
|
||||
# The files should still be filtered
|
||||
files = self.get_files_and_expect_num_result(1)
|
||||
assert ".hidden_file.txt" not in files
|
||||
assert ".hidden_subfile.png" not in files
|
||||
assert "foobar.jpg" in files
|
||||
|
||||
|
||||
class TestExcludeDict(TestExcludeList):
|
||||
def setup_method(self, method):
|
||||
self.d = Directories(exclude_list=ExcludeDict(combined_regex=False))
|
||||
|
||||
|
||||
class TestExcludeListCombined(TestExcludeList):
|
||||
def setup_method(self, method):
|
||||
self.d = Directories(exclude_list=ExcludeList(combined_regex=True))
|
||||
|
||||
|
||||
class TestExcludeDictCombined(TestExcludeList):
|
||||
def setup_method(self, method):
|
||||
self.d = Directories(exclude_list=ExcludeDict(combined_regex=True))
|
||||
|
|
|
@ -0,0 +1,277 @@
|
|||
# Copyright 2016 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
import io
|
||||
# import os.path as op
|
||||
|
||||
from xml.etree import ElementTree as ET
|
||||
|
||||
# from pytest import raises
|
||||
from hscommon.testutil import eq_
|
||||
|
||||
from .base import DupeGuru
|
||||
from ..exclude import ExcludeList, ExcludeDict, default_regexes, AlreadyThereException
|
||||
|
||||
from re import error
|
||||
|
||||
|
||||
# Two slightly different implementations here, one around a list of lists,
|
||||
# and another around a dictionary.
|
||||
|
||||
|
||||
class TestCaseListXMLLoading:
|
||||
def setup_method(self, method):
|
||||
self.exclude_list = ExcludeList()
|
||||
|
||||
def test_load_non_existant_file(self):
|
||||
# Loads the pre-defined regexes
|
||||
self.exclude_list.load_from_xml("non_existant.xml")
|
||||
eq_(len(default_regexes), len(self.exclude_list))
|
||||
# they should also be marked by default
|
||||
eq_(len(default_regexes), self.exclude_list.marked_count)
|
||||
|
||||
def test_save_to_xml(self):
|
||||
f = io.BytesIO()
|
||||
self.exclude_list.save_to_xml(f)
|
||||
f.seek(0)
|
||||
doc = ET.parse(f)
|
||||
root = doc.getroot()
|
||||
eq_("exclude_list", root.tag)
|
||||
|
||||
def test_save_and_load(self, tmpdir):
|
||||
e1 = ExcludeList()
|
||||
e2 = ExcludeList()
|
||||
eq_(len(e1), 0)
|
||||
e1.add(r"one")
|
||||
e1.mark(r"one")
|
||||
e1.add(r"two")
|
||||
tmpxml = str(tmpdir.join("exclude_testunit.xml"))
|
||||
e1.save_to_xml(tmpxml)
|
||||
e2.load_from_xml(tmpxml)
|
||||
# We should have the default regexes
|
||||
assert r"one" in e2
|
||||
assert r"two" in e2
|
||||
eq_(len(e2), 2)
|
||||
eq_(e2.marked_count, 1)
|
||||
|
||||
def test_load_xml_with_garbage_and_missing_elements(self):
|
||||
root = ET.Element("foobar") # The root element shouldn't matter
|
||||
exclude_node = ET.SubElement(root, "bogus")
|
||||
exclude_node.set("regex", "None")
|
||||
exclude_node.set("marked", "y")
|
||||
|
||||
exclude_node = ET.SubElement(root, "exclude")
|
||||
exclude_node.set("regex", "one")
|
||||
# marked field invalid
|
||||
exclude_node.set("markedddd", "y")
|
||||
|
||||
exclude_node = ET.SubElement(root, "exclude")
|
||||
exclude_node.set("regex", "two")
|
||||
# missing marked field
|
||||
|
||||
exclude_node = ET.SubElement(root, "exclude")
|
||||
exclude_node.set("regex", "three")
|
||||
exclude_node.set("markedddd", "pazjbjepo")
|
||||
|
||||
f = io.BytesIO()
|
||||
tree = ET.ElementTree(root)
|
||||
tree.write(f, encoding="utf-8")
|
||||
f.seek(0)
|
||||
self.exclude_list.load_from_xml(f)
|
||||
print(f"{[x for x in self.exclude_list]}")
|
||||
# only the two "exclude" nodes should be added,
|
||||
eq_(3, len(self.exclude_list))
|
||||
# None should be marked
|
||||
eq_(0, self.exclude_list.marked_count)
|
||||
|
||||
|
||||
class TestCaseDictXMLLoading(TestCaseListXMLLoading):
|
||||
def setup_method(self, method):
|
||||
self.exclude_list = ExcludeDict()
|
||||
|
||||
|
||||
class TestCaseListEmpty:
|
||||
def setup_method(self, method):
|
||||
self.app = DupeGuru()
|
||||
self.app.exclude_list = ExcludeList()
|
||||
self.exclude_list = self.app.exclude_list
|
||||
|
||||
def test_add_mark_and_remove_regex(self):
|
||||
regex1 = r"one"
|
||||
regex2 = r"two"
|
||||
self.exclude_list.add(regex1)
|
||||
assert(regex1 in self.exclude_list)
|
||||
self.exclude_list.add(regex2)
|
||||
self.exclude_list.mark(regex1)
|
||||
self.exclude_list.mark(regex2)
|
||||
eq_(len(self.exclude_list), 2)
|
||||
eq_(len(self.exclude_list.compiled), 2)
|
||||
compiled_files = [x for x in self.exclude_list.compiled_files]
|
||||
eq_(len(compiled_files), 2)
|
||||
self.exclude_list.remove(regex2)
|
||||
assert(regex2 not in self.exclude_list)
|
||||
eq_(len(self.exclude_list), 1)
|
||||
|
||||
def test_add_duplicate(self):
|
||||
self.exclude_list.add(r"one")
|
||||
eq_(1 , len(self.exclude_list))
|
||||
try:
|
||||
self.exclude_list.add(r"one")
|
||||
except Exception:
|
||||
pass
|
||||
eq_(1 , len(self.exclude_list))
|
||||
|
||||
def test_add_not_compilable(self):
|
||||
# Trying to add a non-valid regex should not work and raise exception
|
||||
regex = r"one))"
|
||||
try:
|
||||
self.exclude_list.add(regex)
|
||||
except Exception as e:
|
||||
# Make sure we raise a re.error so that the interface can process it
|
||||
eq_(type(e), error)
|
||||
added = self.exclude_list.mark(regex)
|
||||
eq_(added, False)
|
||||
eq_(len(self.exclude_list), 0)
|
||||
eq_(len(self.exclude_list.compiled), 0)
|
||||
compiled_files = [x for x in self.exclude_list.compiled_files]
|
||||
eq_(len(compiled_files), 0)
|
||||
|
||||
def test_force_add_not_compilable(self):
|
||||
"""Used when loading from XML for example"""
|
||||
regex = r"one))"
|
||||
try:
|
||||
self.exclude_list.add(regex, forced=True)
|
||||
except Exception as e:
|
||||
# Should not get an exception here unless it's a duplicate regex
|
||||
raise e
|
||||
marked = self.exclude_list.mark(regex)
|
||||
eq_(marked, False) # can't be marked since not compilable
|
||||
eq_(len(self.exclude_list), 1)
|
||||
eq_(len(self.exclude_list.compiled), 0)
|
||||
compiled_files = [x for x in self.exclude_list.compiled_files]
|
||||
eq_(len(compiled_files), 0)
|
||||
# adding a duplicate
|
||||
regex = r"one))"
|
||||
try:
|
||||
self.exclude_list.add(regex, forced=True)
|
||||
except Exception as e:
|
||||
# we should have this exception, and it shouldn't be added
|
||||
assert type(e) is AlreadyThereException
|
||||
eq_(len(self.exclude_list), 1)
|
||||
eq_(len(self.exclude_list.compiled), 0)
|
||||
|
||||
def test_rename_regex(self):
|
||||
regex = r"one"
|
||||
self.exclude_list.add(regex)
|
||||
self.exclude_list.mark(regex)
|
||||
regex_renamed = r"one))"
|
||||
# Not compilable, can't be marked
|
||||
self.exclude_list.rename(regex, regex_renamed)
|
||||
assert regex not in self.exclude_list
|
||||
assert regex_renamed in self.exclude_list
|
||||
eq_(self.exclude_list.is_marked(regex_renamed), False)
|
||||
self.exclude_list.mark(regex_renamed)
|
||||
eq_(self.exclude_list.is_marked(regex_renamed), False)
|
||||
regex_renamed_compilable = r"two"
|
||||
self.exclude_list.rename(regex_renamed, regex_renamed_compilable)
|
||||
assert regex_renamed_compilable in self.exclude_list
|
||||
eq_(self.exclude_list.is_marked(regex_renamed), False)
|
||||
self.exclude_list.mark(regex_renamed_compilable)
|
||||
eq_(self.exclude_list.is_marked(regex_renamed_compilable), True)
|
||||
eq_(len(self.exclude_list), 1)
|
||||
# Should still be marked after rename
|
||||
regex_compilable = r"three"
|
||||
self.exclude_list.rename(regex_renamed_compilable, regex_compilable)
|
||||
eq_(self.exclude_list.is_marked(regex_compilable), True)
|
||||
|
||||
def test_restore_default(self):
|
||||
"""Only unmark previously added regexes and mark the pre-defined ones"""
|
||||
regex = r"one"
|
||||
self.exclude_list.add(regex)
|
||||
self.exclude_list.mark(regex)
|
||||
self.exclude_list.restore_defaults()
|
||||
eq_(len(default_regexes), self.exclude_list.marked_count)
|
||||
# added regex shouldn't be marked
|
||||
eq_(self.exclude_list.is_marked(regex), False)
|
||||
# added regex shouldn't be in compiled list either
|
||||
compiled = [x for x in self.exclude_list.compiled]
|
||||
assert regex not in compiled
|
||||
# Only default regexes marked and in compiled list
|
||||
for re in default_regexes:
|
||||
assert self.exclude_list.is_marked(re)
|
||||
found = False
|
||||
for compiled_re in compiled:
|
||||
if compiled_re.pattern == re:
|
||||
found = True
|
||||
if not found:
|
||||
raise(Exception(f"Default RE {re} not found in compiled list."))
|
||||
continue
|
||||
eq_(len(default_regexes), len(self.exclude_list.compiled))
|
||||
|
||||
|
||||
class TestCaseDictEmpty(TestCaseListEmpty):
|
||||
"""Same, but with dictionary implementation"""
|
||||
def setup_method(self, method):
|
||||
self.app = DupeGuru()
|
||||
self.app.exclude_list = ExcludeDict()
|
||||
self.exclude_list = self.app.exclude_list
|
||||
|
||||
|
||||
def split_combined(pattern_object):
|
||||
"""Returns list of strings for each combined pattern"""
|
||||
return [x for x in pattern_object.pattern.split("|")]
|
||||
|
||||
|
||||
class TestCaseCompiledList():
|
||||
"""Test consistency between combined or not"""
|
||||
def setup_method(self, method):
|
||||
self.e_separate = ExcludeList(combined_regex=False)
|
||||
self.e_separate.restore_defaults()
|
||||
self.e_combined = ExcludeList(combined_regex=True)
|
||||
self.e_combined.restore_defaults()
|
||||
|
||||
def test_same_number_of_expressions(self):
|
||||
# We only get one combined Pattern item in a tuple, which is made of however many parts
|
||||
eq_(len(split_combined(self.e_combined.compiled[0])), len(default_regexes))
|
||||
# We get as many as there are marked items
|
||||
eq_(len(self.e_separate.compiled), len(default_regexes))
|
||||
exprs = split_combined(self.e_combined.compiled[0])
|
||||
# We should have the same number and the same expressions
|
||||
eq_(len(exprs), len(self.e_separate.compiled))
|
||||
for expr in self.e_separate.compiled:
|
||||
assert expr.pattern in exprs
|
||||
|
||||
def test_compiled_files(self):
|
||||
# test is separator is indeed checked properly to yield the output
|
||||
regex1 = r"test/one/sub"
|
||||
self.e_separate.add(regex1)
|
||||
self.e_separate.mark(regex1)
|
||||
self.e_combined.add(regex1)
|
||||
self.e_combined.mark(regex1)
|
||||
separate_compiled_dirs = self.e_separate.compiled
|
||||
separate_compiled_files = [x for x in self.e_separate.compiled_files]
|
||||
# HACK we need to call compiled property FIRST to generate the cache
|
||||
combined_compiled_dirs = self.e_combined.compiled
|
||||
# print(f"type: {type(self.e_combined.compiled_files[0])}")
|
||||
# A generator returning only one item... ugh
|
||||
combined_compiled_files = [x for x in self.e_combined.compiled_files][0]
|
||||
print(f"compiled files: {combined_compiled_files}")
|
||||
# Separate should give several plus the one added
|
||||
eq_(len(separate_compiled_dirs), len(default_regexes) + 1)
|
||||
# regex1 shouldn't be in the "files" version
|
||||
eq_(len(separate_compiled_files), len(default_regexes))
|
||||
# Only one Pattern returned, which when split should be however many + 1
|
||||
eq_(len(split_combined(combined_compiled_dirs[0])), len(default_regexes) + 1)
|
||||
# regex1 shouldn't be here either
|
||||
eq_(len(split_combined(combined_compiled_files)), len(default_regexes))
|
||||
|
||||
|
||||
class TestCaseCompiledDict(TestCaseCompiledList):
|
||||
def setup_method(self, method):
|
||||
self.e_separate = ExcludeDict(combined_regex=False)
|
||||
self.e_separate.restore_defaults()
|
||||
self.e_combined = ExcludeDict(combined_regex=True)
|
||||
self.e_combined.restore_defaults()
|
Loading…
Reference in New Issue