1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2026-01-22 06:37:17 +00:00

Add test suite, fix bugs

This commit is contained in:
glubsy
2020-08-29 03:57:00 +02:00
parent 26d18945b1
commit 4a1641e39d
7 changed files with 613 additions and 143 deletions

View File

@@ -20,6 +20,7 @@ from ..directories import (
AlreadyThereError,
InvalidPathError,
)
from ..exclude import ExcludeList, ExcludeDict
def create_fake_fs(rootpath):
@@ -323,7 +324,7 @@ def test_get_state_returns_excluded_by_default_for_hidden_directories(tmpdir):
def test_default_path_state_override(tmpdir):
# It's possible for a subclass to override the default state of a path
class MyDirectories(Directories):
def _default_state_for_path(self, path, denylist):
def _default_state_for_path(self, path):
if "foobar" in path:
return DirectoryState.Excluded
@@ -343,52 +344,193 @@ def test_default_path_state_override(tmpdir):
eq_(len(list(d.get_files())), 2)
def test_exclude_list_regular_expressions(tmpdir):
d = Directories()
d.deny_list_str.clear()
d.deny_list_re.clear()
d.deny_list_re_files.clear()
# This should only exlude the directory, but not the contained files if
# its status is set to normal after loading it in the directory tree
d.deny_list_str.add(r".*Recycle\.Bin$")
d.deny_list_str.add(r"denyme.*")
# d.deny_list_str.add(r".*denymetoo")
# d.deny_list_str.add(r"denyme")
d.deny_list_str.add(r".*\/\..*")
d.deny_list_str.add(r"^\..*")
d.compile_re()
p1 = Path(str(tmpdir))
# Should be ignored on Windows only (by default)
p1["Recycle.Bin"].mkdir()
p1["Recycle.Bin/somerecycledfile"].open("w").close()
class TestExcludeList():
def setup_method(self, method):
self.d = Directories(exclude_list=ExcludeList(combined_regex=False))
p1["denyme_blah.txt"].open("w").close()
p1["blah_denymetoo"].open("w").close()
p1["blah_denyme"].open("w").close()
def get_files_and_expect_num_result(self, num_result):
"""Calls get_files(), get the filenames only, print for debugging.
num_result is how many files are expected as a result."""
print(f"EXCLUDED REGEX: paths {self.d._exclude_list.compiled_paths} \
files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled}")
files = list(self.d.get_files())
files = [file.name for file in files]
print(f"FINAL FILES {files}")
eq_(len(files), num_result)
return files
p1[".hidden_file"].open("w").close()
p1[".hidden_dir"].mkdir()
p1[".hidden_dir/somenormalfile1"].open("w").close()
p1[".hidden_dir/somenormalfile2_denyme"].open("w").close()
def test_exclude_recycle_bin_by_default(self, tmpdir):
regex = r"^.*Recycle\.Bin$"
self.d._exclude_list.add(regex)
self.d._exclude_list.mark(regex)
p1 = Path(str(tmpdir))
p1["$Recycle.Bin"].mkdir()
p1["$Recycle.Bin"]["subdir"].mkdir()
self.d.add_path(p1)
eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.Excluded)
# By default, subdirs should be excluded too, but this can be overriden separately
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Excluded)
self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.Normal)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
p1["foobar"].mkdir()
p1["foobar/somefile"].open("w").close()
d.add_path(p1)
eq_(d.get_state(p1["Recycle.Bin"]), DirectoryState.Excluded)
eq_(d.get_state(p1["foobar"]), DirectoryState.Normal)
files = list(d.get_files())
files = [file.name for file in files]
print(f"first files: {files}")
assert "somerecycledfile" not in files
assert "denyme_blah.txt" not in files
assert ".hidden_file" not in files
assert "somefile1" not in files
assert "somefile2_denyme" not in files
# Overriding the default state from the Directory Tree
d.set_state(p1["Recycle.Bin"], DirectoryState.Normal)
d.set_state(p1[".hidden_dir"], DirectoryState.Normal)
files = list(d.get_files())
files = [file.name for file in files]
print(f"second files: {files}")
assert "somerecycledfile" in files
assert "somenormalfile1" in files
def test_exclude_refined(self, tmpdir):
regex1 = r"^\$Recycle\.Bin$"
self.d._exclude_list.add(regex1)
self.d._exclude_list.mark(regex1)
p1 = Path(str(tmpdir))
p1["$Recycle.Bin"].mkdir()
p1["$Recycle.Bin"]["somefile.png"].open("w").close()
p1["$Recycle.Bin"]["some_unwanted_file.jpg"].open("w").close()
p1["$Recycle.Bin"]["subdir"].mkdir()
p1["$Recycle.Bin"]["subdir"]["somesubdirfile.png"].open("w").close()
p1["$Recycle.Bin"]["subdir"]["unwanted_subdirfile.gif"].open("w").close()
p1["$Recycle.Bin"]["subdar"].mkdir()
p1["$Recycle.Bin"]["subdar"]["somesubdarfile.jpeg"].open("w").close()
p1["$Recycle.Bin"]["subdar"]["unwanted_subdarfile.png"].open("w").close()
self.d.add_path(p1["$Recycle.Bin"])
# Filter should set the default state to Excluded
eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.Excluded)
# The subdir should inherit its parent state
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Excluded)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.Excluded)
# Override a child path's state
self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.Normal)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
# Parent should keep its default state, and the other child too
eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.Excluded)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.Excluded)
# print(f"get_folders(): {[x for x in self.d.get_folders()]}")
# only the 2 files directly under the Normal directory
files = self.get_files_and_expect_num_result(2)
assert "somefile.png" not in files
assert "some_unwanted_file.jpg" not in files
assert "somesubdarfile.jpeg" not in files
assert "unwanted_subdarfile.png" not in files
assert "somesubdirfile.png" in files
assert "unwanted_subdirfile.gif" in files
# Overriding the parent should enable all children
self.d.set_state(p1["$Recycle.Bin"], DirectoryState.Normal)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.Normal)
# all files there
files = self.get_files_and_expect_num_result(6)
assert "somefile.png" in files
assert "some_unwanted_file.jpg" in files
# This should still filter out files under directory, despite the Normal state
regex2 = r".*unwanted.*"
self.d._exclude_list.add(regex2)
self.d._exclude_list.mark(regex2)
files = self.get_files_and_expect_num_result(3)
assert "somefile.png" in files
assert "some_unwanted_file.jpg" not in files
assert "unwanted_subdirfile.gif" not in files
assert "unwanted_subdarfile.png" not in files
regex3 = r".*Recycle\.Bin\/.*unwanted.*subdirfile.*"
self.d._exclude_list.rename(regex2, regex3)
assert self.d._exclude_list.error(regex3) is None
# print(f"get_folders(): {[x for x in self.d.get_folders()]}")
# Directory shouldn't change its state here, unless explicitely done by user
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
files = self.get_files_and_expect_num_result(5)
assert "unwanted_subdirfile.gif" not in files
assert "unwanted_subdarfile.png" in files
# using end of line character should only filter the directory, or file ending with subdir
regex4 = r".*subdir$"
self.d._exclude_list.rename(regex3, regex4)
assert self.d._exclude_list.error(regex4) is None
p1["$Recycle.Bin"]["subdar"]["file_ending_with_subdir"].open("w").close()
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Excluded)
files = self.get_files_and_expect_num_result(4)
assert "file_ending_with_subdir" not in files
assert "somesubdarfile.jpeg" in files
assert "somesubdirfile.png" not in files
assert "unwanted_subdirfile.gif" not in files
self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.Normal)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
# print(f"get_folders(): {[x for x in self.d.get_folders()]}")
files = self.get_files_and_expect_num_result(6)
assert "file_ending_with_subdir" not in files
assert "somesubdirfile.png" in files
assert "unwanted_subdirfile.gif" in files
regex5 = r".*subdir.*"
self.d._exclude_list.rename(regex4, regex5)
# Files containing substring should be filtered
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
# The path should not match, only the filename, the "subdir" in the directory name shouldn't matter
p1["$Recycle.Bin"]["subdir"]["file_which_shouldnt_match"].open("w").close()
files = self.get_files_and_expect_num_result(5)
assert "somesubdirfile.png" not in files
assert "unwanted_subdirfile.gif" not in files
assert "file_ending_with_subdir" not in files
assert "file_which_shouldnt_match" in files
def test_japanese_unicode(self, tmpdir):
p1 = Path(str(tmpdir))
p1["$Recycle.Bin"].mkdir()
p1["$Recycle.Bin"]["somerecycledfile.png"].open("w").close()
p1["$Recycle.Bin"]["some_unwanted_file.jpg"].open("w").close()
p1["$Recycle.Bin"]["subdir"].mkdir()
p1["$Recycle.Bin"]["subdir"]["過去白濁物語~]_カラー.jpg"].open("w").close()
p1["$Recycle.Bin"]["思叫物語"].mkdir()
p1["$Recycle.Bin"]["思叫物語"]["なししろ会う前"].open("w").close()
p1["$Recycle.Bin"]["思叫物語"]["堂~ロ"].open("w").close()
self.d.add_path(p1["$Recycle.Bin"])
regex3 = r".*物語.*"
self.d._exclude_list.add(regex3)
self.d._exclude_list.mark(regex3)
# print(f"get_folders(): {[x for x in self.d.get_folders()]}")
eq_(self.d.get_state(p1["$Recycle.Bin"]["思叫物語"]), DirectoryState.Excluded)
files = self.get_files_and_expect_num_result(2)
assert "過去白濁物語~]_カラー.jpg" not in files
assert "なししろ会う前" not in files
assert "堂~ロ" not in files
# using end of line character should only filter that directory, not affecting its files
regex4 = r".*物語$"
self.d._exclude_list.rename(regex3, regex4)
assert self.d._exclude_list.error(regex4) is None
self.d.set_state(p1["$Recycle.Bin"]["思叫物語"], DirectoryState.Normal)
files = self.get_files_and_expect_num_result(5)
assert "過去白濁物語~]_カラー.jpg" in files
assert "なししろ会う前" in files
assert "堂~ロ" in files
def test_get_state_returns_excluded_for_hidden_directories_and_files(self, tmpdir):
# This regex only work for files, not paths
regex = r"^\..*$"
self.d._exclude_list.add(regex)
self.d._exclude_list.mark(regex)
p1 = Path(str(tmpdir))
p1["foobar"].mkdir()
p1["foobar"][".hidden_file.txt"].open("w").close()
p1["foobar"][".hidden_dir"].mkdir()
p1["foobar"][".hidden_dir"]["foobar.jpg"].open("w").close()
p1["foobar"][".hidden_dir"][".hidden_subfile.png"].open("w").close()
self.d.add_path(p1["foobar"])
# It should not inherit its parent's state originally
eq_(self.d.get_state(p1["foobar"][".hidden_dir"]), DirectoryState.Excluded)
self.d.set_state(p1["foobar"][".hidden_dir"], DirectoryState.Normal)
# The files should still be filtered
files = self.get_files_and_expect_num_result(1)
assert ".hidden_file.txt" not in files
assert ".hidden_subfile.png" not in files
assert "foobar.jpg" in files
class TestExcludeDict(TestExcludeList):
def setup_method(self, method):
self.d = Directories(exclude_list=ExcludeDict(combined_regex=False))
class TestExcludeListCombined(TestExcludeList):
def setup_method(self, method):
self.d = Directories(exclude_list=ExcludeList(combined_regex=True))
class TestExcludeDictCombined(TestExcludeList):
def setup_method(self, method):
self.d = Directories(exclude_list=ExcludeDict(combined_regex=True))

277
core/tests/exclude_test.py Normal file
View File

@@ -0,0 +1,277 @@
# Copyright 2016 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.gnu.org/licenses/gpl-3.0.html
import io
# import os.path as op
from xml.etree import ElementTree as ET
# from pytest import raises
from hscommon.testutil import eq_
from .base import DupeGuru
from ..exclude import ExcludeList, ExcludeDict, default_regexes, AlreadyThereException
from re import error
# Two slightly different implementations here, one around a list of lists,
# and another around a dictionary.
class TestCaseListXMLLoading:
def setup_method(self, method):
self.exclude_list = ExcludeList()
def test_load_non_existant_file(self):
# Loads the pre-defined regexes
self.exclude_list.load_from_xml("non_existant.xml")
eq_(len(default_regexes), len(self.exclude_list))
# they should also be marked by default
eq_(len(default_regexes), self.exclude_list.marked_count)
def test_save_to_xml(self):
f = io.BytesIO()
self.exclude_list.save_to_xml(f)
f.seek(0)
doc = ET.parse(f)
root = doc.getroot()
eq_("exclude_list", root.tag)
def test_save_and_load(self, tmpdir):
e1 = ExcludeList()
e2 = ExcludeList()
eq_(len(e1), 0)
e1.add(r"one")
e1.mark(r"one")
e1.add(r"two")
tmpxml = str(tmpdir.join("exclude_testunit.xml"))
e1.save_to_xml(tmpxml)
e2.load_from_xml(tmpxml)
# We should have the default regexes
assert r"one" in e2
assert r"two" in e2
eq_(len(e2), 2)
eq_(e2.marked_count, 1)
def test_load_xml_with_garbage_and_missing_elements(self):
root = ET.Element("foobar") # The root element shouldn't matter
exclude_node = ET.SubElement(root, "bogus")
exclude_node.set("regex", "None")
exclude_node.set("marked", "y")
exclude_node = ET.SubElement(root, "exclude")
exclude_node.set("regex", "one")
# marked field invalid
exclude_node.set("markedddd", "y")
exclude_node = ET.SubElement(root, "exclude")
exclude_node.set("regex", "two")
# missing marked field
exclude_node = ET.SubElement(root, "exclude")
exclude_node.set("regex", "three")
exclude_node.set("markedddd", "pazjbjepo")
f = io.BytesIO()
tree = ET.ElementTree(root)
tree.write(f, encoding="utf-8")
f.seek(0)
self.exclude_list.load_from_xml(f)
print(f"{[x for x in self.exclude_list]}")
# only the two "exclude" nodes should be added,
eq_(3, len(self.exclude_list))
# None should be marked
eq_(0, self.exclude_list.marked_count)
class TestCaseDictXMLLoading(TestCaseListXMLLoading):
def setup_method(self, method):
self.exclude_list = ExcludeDict()
class TestCaseListEmpty:
def setup_method(self, method):
self.app = DupeGuru()
self.app.exclude_list = ExcludeList()
self.exclude_list = self.app.exclude_list
def test_add_mark_and_remove_regex(self):
regex1 = r"one"
regex2 = r"two"
self.exclude_list.add(regex1)
assert(regex1 in self.exclude_list)
self.exclude_list.add(regex2)
self.exclude_list.mark(regex1)
self.exclude_list.mark(regex2)
eq_(len(self.exclude_list), 2)
eq_(len(self.exclude_list.compiled), 2)
compiled_files = [x for x in self.exclude_list.compiled_files]
eq_(len(compiled_files), 2)
self.exclude_list.remove(regex2)
assert(regex2 not in self.exclude_list)
eq_(len(self.exclude_list), 1)
def test_add_duplicate(self):
self.exclude_list.add(r"one")
eq_(1 , len(self.exclude_list))
try:
self.exclude_list.add(r"one")
except Exception:
pass
eq_(1 , len(self.exclude_list))
def test_add_not_compilable(self):
# Trying to add a non-valid regex should not work and raise exception
regex = r"one))"
try:
self.exclude_list.add(regex)
except Exception as e:
# Make sure we raise a re.error so that the interface can process it
eq_(type(e), error)
added = self.exclude_list.mark(regex)
eq_(added, False)
eq_(len(self.exclude_list), 0)
eq_(len(self.exclude_list.compiled), 0)
compiled_files = [x for x in self.exclude_list.compiled_files]
eq_(len(compiled_files), 0)
def test_force_add_not_compilable(self):
"""Used when loading from XML for example"""
regex = r"one))"
try:
self.exclude_list.add(regex, forced=True)
except Exception as e:
# Should not get an exception here unless it's a duplicate regex
raise e
marked = self.exclude_list.mark(regex)
eq_(marked, False) # can't be marked since not compilable
eq_(len(self.exclude_list), 1)
eq_(len(self.exclude_list.compiled), 0)
compiled_files = [x for x in self.exclude_list.compiled_files]
eq_(len(compiled_files), 0)
# adding a duplicate
regex = r"one))"
try:
self.exclude_list.add(regex, forced=True)
except Exception as e:
# we should have this exception, and it shouldn't be added
assert type(e) is AlreadyThereException
eq_(len(self.exclude_list), 1)
eq_(len(self.exclude_list.compiled), 0)
def test_rename_regex(self):
regex = r"one"
self.exclude_list.add(regex)
self.exclude_list.mark(regex)
regex_renamed = r"one))"
# Not compilable, can't be marked
self.exclude_list.rename(regex, regex_renamed)
assert regex not in self.exclude_list
assert regex_renamed in self.exclude_list
eq_(self.exclude_list.is_marked(regex_renamed), False)
self.exclude_list.mark(regex_renamed)
eq_(self.exclude_list.is_marked(regex_renamed), False)
regex_renamed_compilable = r"two"
self.exclude_list.rename(regex_renamed, regex_renamed_compilable)
assert regex_renamed_compilable in self.exclude_list
eq_(self.exclude_list.is_marked(regex_renamed), False)
self.exclude_list.mark(regex_renamed_compilable)
eq_(self.exclude_list.is_marked(regex_renamed_compilable), True)
eq_(len(self.exclude_list), 1)
# Should still be marked after rename
regex_compilable = r"three"
self.exclude_list.rename(regex_renamed_compilable, regex_compilable)
eq_(self.exclude_list.is_marked(regex_compilable), True)
def test_restore_default(self):
"""Only unmark previously added regexes and mark the pre-defined ones"""
regex = r"one"
self.exclude_list.add(regex)
self.exclude_list.mark(regex)
self.exclude_list.restore_defaults()
eq_(len(default_regexes), self.exclude_list.marked_count)
# added regex shouldn't be marked
eq_(self.exclude_list.is_marked(regex), False)
# added regex shouldn't be in compiled list either
compiled = [x for x in self.exclude_list.compiled]
assert regex not in compiled
# Only default regexes marked and in compiled list
for re in default_regexes:
assert self.exclude_list.is_marked(re)
found = False
for compiled_re in compiled:
if compiled_re.pattern == re:
found = True
if not found:
raise(Exception(f"Default RE {re} not found in compiled list."))
continue
eq_(len(default_regexes), len(self.exclude_list.compiled))
class TestCaseDictEmpty(TestCaseListEmpty):
"""Same, but with dictionary implementation"""
def setup_method(self, method):
self.app = DupeGuru()
self.app.exclude_list = ExcludeDict()
self.exclude_list = self.app.exclude_list
def split_combined(pattern_object):
"""Returns list of strings for each combined pattern"""
return [x for x in pattern_object.pattern.split("|")]
class TestCaseCompiledList():
"""Test consistency between combined or not"""
def setup_method(self, method):
self.e_separate = ExcludeList(combined_regex=False)
self.e_separate.restore_defaults()
self.e_combined = ExcludeList(combined_regex=True)
self.e_combined.restore_defaults()
def test_same_number_of_expressions(self):
# We only get one combined Pattern item in a tuple, which is made of however many parts
eq_(len(split_combined(self.e_combined.compiled[0])), len(default_regexes))
# We get as many as there are marked items
eq_(len(self.e_separate.compiled), len(default_regexes))
exprs = split_combined(self.e_combined.compiled[0])
# We should have the same number and the same expressions
eq_(len(exprs), len(self.e_separate.compiled))
for expr in self.e_separate.compiled:
assert expr.pattern in exprs
def test_compiled_files(self):
# test is separator is indeed checked properly to yield the output
regex1 = r"test/one/sub"
self.e_separate.add(regex1)
self.e_separate.mark(regex1)
self.e_combined.add(regex1)
self.e_combined.mark(regex1)
separate_compiled_dirs = self.e_separate.compiled
separate_compiled_files = [x for x in self.e_separate.compiled_files]
# HACK we need to call compiled property FIRST to generate the cache
combined_compiled_dirs = self.e_combined.compiled
# print(f"type: {type(self.e_combined.compiled_files[0])}")
# A generator returning only one item... ugh
combined_compiled_files = [x for x in self.e_combined.compiled_files][0]
print(f"compiled files: {combined_compiled_files}")
# Separate should give several plus the one added
eq_(len(separate_compiled_dirs), len(default_regexes) + 1)
# regex1 shouldn't be in the "files" version
eq_(len(separate_compiled_files), len(default_regexes))
# Only one Pattern returned, which when split should be however many + 1
eq_(len(split_combined(combined_compiled_dirs[0])), len(default_regexes) + 1)
# regex1 shouldn't be here either
eq_(len(split_combined(combined_compiled_files)), len(default_regexes))
class TestCaseCompiledDict(TestCaseCompiledList):
def setup_method(self, method):
self.e_separate = ExcludeDict(combined_regex=False)
self.e_separate.restore_defaults()
self.e_combined = ExcludeDict(combined_regex=True)
self.e_combined.restore_defaults()