2020-08-29 03:57:00 +02:00
# Copyright 2016 Hardcoded Software (http://www.hardcoded.net)
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.gnu.org/licenses/gpl-3.0.html
import io
from xml.etree import ElementTree as ET
from hscommon.testutil import eq_
2020-12-29 16:07:55 +01:00
from hscommon.plat import ISWINDOWS
2020-08-29 03:57:00 +02:00
from .base import DupeGuru
from ..exclude import ExcludeList, ExcludeDict, default_regexes, AlreadyThereException
from re import error
# Two slightly different implementations here, one around a list of lists,
# and another around a dictionary.
class TestCaseListXMLLoading:
def setup_method(self, method):
self.exclude_list = ExcludeList()
def test_load_non_existant_file(self):
# Loads the pre-defined regexes
eq_(len(default_regexes), len(self.exclude_list))
# they should also be marked by default
eq_(len(default_regexes), self.exclude_list.marked_count)
def test_save_to_xml(self):
f = io.BytesIO()
doc = ET.parse(f)
root = doc.getroot()
eq_("exclude_list", root.tag)
def test_save_and_load(self, tmpdir):
e1 = ExcludeList()
e2 = ExcludeList()
eq_(len(e1), 0)
tmpxml = str(tmpdir.join("exclude_testunit.xml"))
# We should have the default regexes
assert r"one" in e2
assert r"two" in e2
eq_(len(e2), 2)
eq_(e2.marked_count, 1)
def test_load_xml_with_garbage_and_missing_elements(self):
root = ET.Element("foobar") # The root element shouldn't matter
exclude_node = ET.SubElement(root, "bogus")
exclude_node.set("regex", "None")
exclude_node.set("marked", "y")
exclude_node = ET.SubElement(root, "exclude")
exclude_node.set("regex", "one")
# marked field invalid
exclude_node.set("markedddd", "y")
exclude_node = ET.SubElement(root, "exclude")
exclude_node.set("regex", "two")
# missing marked field
exclude_node = ET.SubElement(root, "exclude")
exclude_node.set("regex", "three")
exclude_node.set("markedddd", "pazjbjepo")
f = io.BytesIO()
tree = ET.ElementTree(root)
tree.write(f, encoding="utf-8")
print(f"{[x for x in self.exclude_list]}")
# only the two "exclude" nodes should be added,
eq_(3, len(self.exclude_list))
# None should be marked
eq_(0, self.exclude_list.marked_count)
class TestCaseDictXMLLoading(TestCaseListXMLLoading):
def setup_method(self, method):
self.exclude_list = ExcludeDict()
class TestCaseListEmpty:
def setup_method(self, method):
self.app = DupeGuru()
2020-09-01 23:02:58 +02:00
self.app.exclude_list = ExcludeList(union_regex=False)
2020-08-29 03:57:00 +02:00
self.exclude_list = self.app.exclude_list
def test_add_mark_and_remove_regex(self):
regex1 = r"one"
regex2 = r"two"
2021-08-15 04:10:18 -05:00
assert regex1 in self.exclude_list
2020-08-29 03:57:00 +02:00
eq_(len(self.exclude_list), 2)
eq_(len(self.exclude_list.compiled), 2)
compiled_files = [x for x in self.exclude_list.compiled_files]
eq_(len(compiled_files), 2)
2021-08-15 04:10:18 -05:00
assert regex2 not in self.exclude_list
2020-08-29 03:57:00 +02:00
eq_(len(self.exclude_list), 1)
def test_add_duplicate(self):
2021-08-15 04:10:18 -05:00
eq_(1, len(self.exclude_list))
2020-08-29 03:57:00 +02:00
except Exception:
2021-08-15 04:10:18 -05:00
eq_(1, len(self.exclude_list))
2020-08-29 03:57:00 +02:00
def test_add_not_compilable(self):
# Trying to add a non-valid regex should not work and raise exception
regex = r"one))"
except Exception as e:
# Make sure we raise a re.error so that the interface can process it
eq_(type(e), error)
added = self.exclude_list.mark(regex)
eq_(added, False)
eq_(len(self.exclude_list), 0)
eq_(len(self.exclude_list.compiled), 0)
compiled_files = [x for x in self.exclude_list.compiled_files]
eq_(len(compiled_files), 0)
def test_force_add_not_compilable(self):
"""Used when loading from XML for example"""
regex = r"one))"
2021-08-21 03:52:09 -05:00
self.exclude_list.add(regex, forced=True)
2020-08-29 03:57:00 +02:00
marked = self.exclude_list.mark(regex)
eq_(marked, False) # can't be marked since not compilable
eq_(len(self.exclude_list), 1)
eq_(len(self.exclude_list.compiled), 0)
compiled_files = [x for x in self.exclude_list.compiled_files]
eq_(len(compiled_files), 0)
# adding a duplicate
regex = r"one))"
self.exclude_list.add(regex, forced=True)
except Exception as e:
# we should have this exception, and it shouldn't be added
assert type(e) is AlreadyThereException
eq_(len(self.exclude_list), 1)
eq_(len(self.exclude_list.compiled), 0)
def test_rename_regex(self):
regex = r"one"
regex_renamed = r"one))"
# Not compilable, can't be marked
self.exclude_list.rename(regex, regex_renamed)
assert regex not in self.exclude_list
assert regex_renamed in self.exclude_list
eq_(self.exclude_list.is_marked(regex_renamed), False)
eq_(self.exclude_list.is_marked(regex_renamed), False)
regex_renamed_compilable = r"two"
self.exclude_list.rename(regex_renamed, regex_renamed_compilable)
assert regex_renamed_compilable in self.exclude_list
eq_(self.exclude_list.is_marked(regex_renamed), False)
eq_(self.exclude_list.is_marked(regex_renamed_compilable), True)
eq_(len(self.exclude_list), 1)
# Should still be marked after rename
regex_compilable = r"three"
self.exclude_list.rename(regex_renamed_compilable, regex_compilable)
eq_(self.exclude_list.is_marked(regex_compilable), True)
2021-06-19 01:52:31 +02:00
def test_rename_regex_file_to_path(self):
regex = r".*/one.*"
regex = r".*\\one.*"
regex2 = r".*one.*"
compiled_re = [x.pattern for x in self.exclude_list._excluded_compiled]
files_re = [x.pattern for x in self.exclude_list.compiled_files]
paths_re = [x.pattern for x in self.exclude_list.compiled_paths]
assert regex in compiled_re
assert regex not in files_re
assert regex in paths_re
self.exclude_list.rename(regex, regex2)
compiled_re = [x.pattern for x in self.exclude_list._excluded_compiled]
files_re = [x.pattern for x in self.exclude_list.compiled_files]
paths_re = [x.pattern for x in self.exclude_list.compiled_paths]
assert regex not in compiled_re
assert regex2 in compiled_re
assert regex2 in files_re
assert regex2 not in paths_re
2020-08-29 03:57:00 +02:00
def test_restore_default(self):
"""Only unmark previously added regexes and mark the pre-defined ones"""
regex = r"one"
eq_(len(default_regexes), self.exclude_list.marked_count)
# added regex shouldn't be marked
eq_(self.exclude_list.is_marked(regex), False)
# added regex shouldn't be in compiled list either
compiled = [x for x in self.exclude_list.compiled]
assert regex not in compiled
# Only default regexes marked and in compiled list
for re in default_regexes:
assert self.exclude_list.is_marked(re)
found = False
for compiled_re in compiled:
if compiled_re.pattern == re:
found = True
if not found:
2021-08-15 04:10:18 -05:00
raise (Exception(f"Default RE {re} not found in compiled list."))
2020-08-29 03:57:00 +02:00
eq_(len(default_regexes), len(self.exclude_list.compiled))
2021-06-19 01:52:31 +02:00
class TestCaseListEmptyUnion(TestCaseListEmpty):
"""Same but with union regex"""
2021-08-15 04:10:18 -05:00
2021-06-19 01:52:31 +02:00
def setup_method(self, method):
self.app = DupeGuru()
self.app.exclude_list = ExcludeList(union_regex=True)
self.exclude_list = self.app.exclude_list
def test_add_mark_and_remove_regex(self):
regex1 = r"one"
regex2 = r"two"
2021-08-15 04:10:18 -05:00
assert regex1 in self.exclude_list
2021-06-19 01:52:31 +02:00
eq_(len(self.exclude_list), 2)
eq_(len(self.exclude_list.compiled), 1)
compiled_files = [x for x in self.exclude_list.compiled_files]
eq_(len(compiled_files), 1) # Two patterns joined together into one
assert "|" in compiled_files[0].pattern
2021-08-15 04:10:18 -05:00
assert regex2 not in self.exclude_list
2021-06-19 01:52:31 +02:00
eq_(len(self.exclude_list), 1)
def test_rename_regex_file_to_path(self):
regex = r".*/one.*"
regex = r".*\\one.*"
regex2 = r".*one.*"
eq_(len([x for x in self.exclude_list]), 1)
compiled_re = [x.pattern for x in self.exclude_list.compiled]
files_re = [x.pattern for x in self.exclude_list.compiled_files]
paths_re = [x.pattern for x in self.exclude_list.compiled_paths]
assert regex in compiled_re
assert regex not in files_re
assert regex in paths_re
self.exclude_list.rename(regex, regex2)
eq_(len([x for x in self.exclude_list]), 1)
compiled_re = [x.pattern for x in self.exclude_list.compiled]
files_re = [x.pattern for x in self.exclude_list.compiled_files]
paths_re = [x.pattern for x in self.exclude_list.compiled_paths]
assert regex not in compiled_re
assert regex2 in compiled_re
assert regex2 in files_re
assert regex2 not in paths_re
def test_restore_default(self):
"""Only unmark previously added regexes and mark the pre-defined ones"""
regex = r"one"
eq_(len(default_regexes), self.exclude_list.marked_count)
# added regex shouldn't be marked
eq_(self.exclude_list.is_marked(regex), False)
# added regex shouldn't be in compiled list either
compiled = [x for x in self.exclude_list.compiled]
assert regex not in compiled
# Need to escape both to get the same strings after compilation
2021-08-15 04:10:18 -05:00
compiled_escaped = set([x.encode("unicode-escape").decode() for x in compiled[0].pattern.split("|")])
default_escaped = set([x.encode("unicode-escape").decode() for x in default_regexes])
2021-06-19 01:52:31 +02:00
assert compiled_escaped == default_escaped
eq_(len(default_regexes), len(compiled[0].pattern.split("|")))
2020-08-29 03:57:00 +02:00
class TestCaseDictEmpty(TestCaseListEmpty):
"""Same, but with dictionary implementation"""
2021-08-15 04:10:18 -05:00
2020-08-29 03:57:00 +02:00
def setup_method(self, method):
self.app = DupeGuru()
2020-09-01 23:02:58 +02:00
self.app.exclude_list = ExcludeDict(union_regex=False)
2020-08-29 03:57:00 +02:00
self.exclude_list = self.app.exclude_list
2021-06-19 01:52:31 +02:00
class TestCaseDictEmptyUnion(TestCaseDictEmpty):
"""Same, but with union regex"""
2021-08-15 04:10:18 -05:00
2021-06-19 01:52:31 +02:00
def setup_method(self, method):
self.app = DupeGuru()
self.app.exclude_list = ExcludeDict(union_regex=True)
self.exclude_list = self.app.exclude_list
def test_add_mark_and_remove_regex(self):
regex1 = r"one"
regex2 = r"two"
2021-08-15 04:10:18 -05:00
assert regex1 in self.exclude_list
2021-06-19 01:52:31 +02:00
eq_(len(self.exclude_list), 2)
eq_(len(self.exclude_list.compiled), 1)
compiled_files = [x for x in self.exclude_list.compiled_files]
# two patterns joined into one
eq_(len(compiled_files), 1)
2021-08-15 04:10:18 -05:00
assert regex2 not in self.exclude_list
2021-06-19 01:52:31 +02:00
eq_(len(self.exclude_list), 1)
def test_rename_regex_file_to_path(self):
regex = r".*/one.*"
regex = r".*\\one.*"
regex2 = r".*one.*"
marked_re = [x for marked, x in self.exclude_list if marked]
eq_(len(marked_re), 1)
compiled_re = [x.pattern for x in self.exclude_list.compiled]
files_re = [x.pattern for x in self.exclude_list.compiled_files]
paths_re = [x.pattern for x in self.exclude_list.compiled_paths]
assert regex in compiled_re
assert regex not in files_re
assert regex in paths_re
self.exclude_list.rename(regex, regex2)
compiled_re = [x.pattern for x in self.exclude_list.compiled]
files_re = [x.pattern for x in self.exclude_list.compiled_files]
paths_re = [x.pattern for x in self.exclude_list.compiled_paths]
assert regex not in compiled_re
assert regex2 in compiled_re
assert regex2 in files_re
assert regex2 not in paths_re
def test_restore_default(self):
"""Only unmark previously added regexes and mark the pre-defined ones"""
regex = r"one"
eq_(len(default_regexes), self.exclude_list.marked_count)
# added regex shouldn't be marked
eq_(self.exclude_list.is_marked(regex), False)
# added regex shouldn't be in compiled list either
compiled = [x for x in self.exclude_list.compiled]
assert regex not in compiled
# Need to escape both to get the same strings after compilation
2021-08-15 04:10:18 -05:00
compiled_escaped = set([x.encode("unicode-escape").decode() for x in compiled[0].pattern.split("|")])
default_escaped = set([x.encode("unicode-escape").decode() for x in default_regexes])
2021-06-19 01:52:31 +02:00
assert compiled_escaped == default_escaped
eq_(len(default_regexes), len(compiled[0].pattern.split("|")))
2020-09-01 23:02:58 +02:00
def split_union(pattern_object):
"""Returns list of strings for each union pattern"""
2020-08-29 03:57:00 +02:00
return [x for x in pattern_object.pattern.split("|")]
2021-08-15 04:10:18 -05:00
class TestCaseCompiledList:
2020-09-01 23:02:58 +02:00
"""Test consistency between union or and separate versions."""
2021-08-15 04:10:18 -05:00
2020-08-29 03:57:00 +02:00
def setup_method(self, method):
2020-09-01 23:02:58 +02:00
self.e_separate = ExcludeList(union_regex=False)
2020-08-29 03:57:00 +02:00
2020-09-01 23:02:58 +02:00
self.e_union = ExcludeList(union_regex=True)
2020-08-29 03:57:00 +02:00
def test_same_number_of_expressions(self):
2020-09-01 23:02:58 +02:00
# We only get one union Pattern item in a tuple, which is made of however many parts
eq_(len(split_union(self.e_union.compiled[0])), len(default_regexes))
2020-08-29 03:57:00 +02:00
# We get as many as there are marked items
eq_(len(self.e_separate.compiled), len(default_regexes))
2020-09-01 23:02:58 +02:00
exprs = split_union(self.e_union.compiled[0])
2020-08-29 03:57:00 +02:00
# We should have the same number and the same expressions
eq_(len(exprs), len(self.e_separate.compiled))
for expr in self.e_separate.compiled:
assert expr.pattern in exprs
def test_compiled_files(self):
2020-12-29 05:35:30 +01:00
# is path separator checked properly to yield the output
2020-12-29 16:07:55 +01:00
regex1 = r"test\\one\\sub"
regex1 = r"test/one/sub"
2020-08-29 03:57:00 +02:00
2020-09-01 23:02:58 +02:00
2020-08-29 03:57:00 +02:00
separate_compiled_dirs = self.e_separate.compiled
separate_compiled_files = [x for x in self.e_separate.compiled_files]
# HACK we need to call compiled property FIRST to generate the cache
2020-09-01 23:02:58 +02:00
union_compiled_dirs = self.e_union.compiled
# print(f"type: {type(self.e_union.compiled_files[0])}")
2020-08-29 03:57:00 +02:00
# A generator returning only one item... ugh
2020-09-01 23:02:58 +02:00
union_compiled_files = [x for x in self.e_union.compiled_files][0]
print(f"compiled files: {union_compiled_files}")
2020-08-29 03:57:00 +02:00
# Separate should give several plus the one added
eq_(len(separate_compiled_dirs), len(default_regexes) + 1)
# regex1 shouldn't be in the "files" version
eq_(len(separate_compiled_files), len(default_regexes))
# Only one Pattern returned, which when split should be however many + 1
2020-09-01 23:02:58 +02:00
eq_(len(split_union(union_compiled_dirs[0])), len(default_regexes) + 1)
2020-08-29 03:57:00 +02:00
# regex1 shouldn't be here either
2020-09-01 23:02:58 +02:00
eq_(len(split_union(union_compiled_files)), len(default_regexes))
2020-08-29 03:57:00 +02:00
class TestCaseCompiledDict(TestCaseCompiledList):
2020-09-01 23:02:58 +02:00
"""Test the dictionary version"""
2021-08-15 04:10:18 -05:00
2020-08-29 03:57:00 +02:00
def setup_method(self, method):
2020-09-01 23:02:58 +02:00
self.e_separate = ExcludeDict(union_regex=False)
2020-08-29 03:57:00 +02:00
2020-09-01 23:02:58 +02:00
self.e_union = ExcludeDict(union_regex=True)