dupeguru/core/tests/exclude_test.py

# Copyright 2016 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.gnu.org/licenses/gpl-3.0.html

import io
# import os.path as op

from xml.etree import ElementTree as ET

# from pytest import raises
from hscommon.testutil import eq_
from hscommon.plat import ISWINDOWS

from .base import DupeGuru
from ..exclude import ExcludeList, ExcludeDict, default_regexes, AlreadyThereException

from re import error


# Two slightly different implementations here, one around a list of lists,
# and another around a dictionary.


class TestCaseListXMLLoading:
    def setup_method(self, method):
        self.exclude_list = ExcludeList()

    def test_load_non_existant_file(self):
        # Loads the pre-defined regexes
        self.exclude_list.load_from_xml("non_existant.xml")
        eq_(len(default_regexes), len(self.exclude_list))
        # they should also be marked by default
        eq_(len(default_regexes), self.exclude_list.marked_count)

    def test_save_to_xml(self):
        f = io.BytesIO()
        self.exclude_list.save_to_xml(f)
        f.seek(0)
        doc = ET.parse(f)
        root = doc.getroot()
        eq_("exclude_list", root.tag)

    def test_save_and_load(self, tmpdir):
        e1 = ExcludeList()
        e2 = ExcludeList()
        eq_(len(e1), 0)
        e1.add(r"one")
        e1.mark(r"one")
        e1.add(r"two")
        tmpxml = str(tmpdir.join("exclude_testunit.xml"))
        e1.save_to_xml(tmpxml)
        e2.load_from_xml(tmpxml)
        # We should have the default regexes
        assert r"one" in e2
        assert r"two" in e2
        eq_(len(e2), 2)
        eq_(e2.marked_count, 1)

    def test_load_xml_with_garbage_and_missing_elements(self):
        root = ET.Element("foobar")  # The root element shouldn't matter
        exclude_node = ET.SubElement(root, "bogus")
        exclude_node.set("regex", "None")
        exclude_node.set("marked", "y")

        exclude_node = ET.SubElement(root, "exclude")
        exclude_node.set("regex", "one")
        # marked field invalid
        exclude_node.set("markedddd", "y")

        exclude_node = ET.SubElement(root, "exclude")
        exclude_node.set("regex", "two")
        # missing marked field

        exclude_node = ET.SubElement(root, "exclude")
        exclude_node.set("regex", "three")
        exclude_node.set("markedddd", "pazjbjepo")

        f = io.BytesIO()
        tree = ET.ElementTree(root)
        tree.write(f, encoding="utf-8")
        f.seek(0)
        self.exclude_list.load_from_xml(f)
        print(f"{[x for x in self.exclude_list]}")
        # only the two "exclude" nodes should be added,
        eq_(3, len(self.exclude_list))
        # None should be marked
        eq_(0, self.exclude_list.marked_count)


class TestCaseDictXMLLoading(TestCaseListXMLLoading):
    def setup_method(self, method):
        self.exclude_list = ExcludeDict()


class TestCaseListEmpty:
    def setup_method(self, method):
        self.app = DupeGuru()
        self.app.exclude_list = ExcludeList(union_regex=False)
        self.exclude_list = self.app.exclude_list

    def test_add_mark_and_remove_regex(self):
        regex1 = r"one"
        regex2 = r"two"
        self.exclude_list.add(regex1)
        assert(regex1 in self.exclude_list)
        self.exclude_list.add(regex2)
        self.exclude_list.mark(regex1)
        self.exclude_list.mark(regex2)
        eq_(len(self.exclude_list), 2)
        eq_(len(self.exclude_list.compiled), 2)
        compiled_files = [x for x in self.exclude_list.compiled_files]
        eq_(len(compiled_files), 2)
        self.exclude_list.remove(regex2)
        assert(regex2 not in self.exclude_list)
        eq_(len(self.exclude_list), 1)

    def test_add_duplicate(self):
        self.exclude_list.add(r"one")
        eq_(1 , len(self.exclude_list))
        try:
            self.exclude_list.add(r"one")
        except Exception:
            pass
        eq_(1 , len(self.exclude_list))

    def test_add_not_compilable(self):
        # Trying to add a non-valid regex should not work and raise exception
        regex = r"one))"
        try:
            self.exclude_list.add(regex)
        except Exception as e:
            # Make sure we raise a re.error so that the interface can process it
            eq_(type(e), error)
        added = self.exclude_list.mark(regex)
        eq_(added, False)
        eq_(len(self.exclude_list), 0)
        eq_(len(self.exclude_list.compiled), 0)
        compiled_files = [x for x in self.exclude_list.compiled_files]
        eq_(len(compiled_files), 0)

    def test_force_add_not_compilable(self):
        """Used when loading from XML for example"""
        regex = r"one))"
        try:
            self.exclude_list.add(regex, forced=True)
        except Exception as e:
            # Should not get an exception here unless it's a duplicate regex
            raise e
        marked = self.exclude_list.mark(regex)
        eq_(marked, False)  # can't be marked since not compilable
        eq_(len(self.exclude_list), 1)
        eq_(len(self.exclude_list.compiled), 0)
        compiled_files = [x for x in self.exclude_list.compiled_files]
        eq_(len(compiled_files), 0)
        # adding a duplicate
        regex = r"one))"
        try:
            self.exclude_list.add(regex, forced=True)
        except Exception as e:
            # we should have this exception, and it shouldn't be added
            assert type(e) is AlreadyThereException
        eq_(len(self.exclude_list), 1)
        eq_(len(self.exclude_list.compiled), 0)

    def test_rename_regex(self):
        regex = r"one"
        self.exclude_list.add(regex)
        self.exclude_list.mark(regex)
        regex_renamed = r"one))"
        # Not compilable, can't be marked
        self.exclude_list.rename(regex, regex_renamed)
        assert regex not in self.exclude_list
        assert regex_renamed in self.exclude_list
        eq_(self.exclude_list.is_marked(regex_renamed), False)
        self.exclude_list.mark(regex_renamed)
        eq_(self.exclude_list.is_marked(regex_renamed), False)
        regex_renamed_compilable = r"two"
        self.exclude_list.rename(regex_renamed, regex_renamed_compilable)
        assert regex_renamed_compilable in self.exclude_list
        eq_(self.exclude_list.is_marked(regex_renamed), False)
        self.exclude_list.mark(regex_renamed_compilable)
        eq_(self.exclude_list.is_marked(regex_renamed_compilable), True)
        eq_(len(self.exclude_list), 1)
        # Should still be marked after rename
        regex_compilable = r"three"
        self.exclude_list.rename(regex_renamed_compilable, regex_compilable)
        eq_(self.exclude_list.is_marked(regex_compilable), True)

    def test_restore_default(self):
        """Only unmark previously added regexes and mark the pre-defined ones"""
        regex = r"one"
        self.exclude_list.add(regex)
        self.exclude_list.mark(regex)
        self.exclude_list.restore_defaults()
        eq_(len(default_regexes), self.exclude_list.marked_count)
        # added regex shouldn't be marked
        eq_(self.exclude_list.is_marked(regex), False)
        # added regex shouldn't be in compiled list either
        compiled = [x for x in self.exclude_list.compiled]
        assert regex not in compiled
        # Only default regexes marked and in compiled list
        for re in default_regexes:
            assert self.exclude_list.is_marked(re)
            found = False
            for compiled_re in compiled:
                if compiled_re.pattern == re:
                    found = True
            if not found:
                raise(Exception(f"Default RE {re} not found in compiled list."))
            continue
        eq_(len(default_regexes), len(self.exclude_list.compiled))


class TestCaseDictEmpty(TestCaseListEmpty):
    """Same, but with dictionary implementation"""
    def setup_method(self, method):
        self.app = DupeGuru()
        self.app.exclude_list = ExcludeDict(union_regex=False)
        self.exclude_list = self.app.exclude_list


def split_union(pattern_object):
    """Returns list of strings for each union pattern"""
    return [x for x in pattern_object.pattern.split("|")]


class TestCaseCompiledList():
    """Test consistency between union or and separate versions."""
    def setup_method(self, method):
        self.e_separate = ExcludeList(union_regex=False)
        self.e_separate.restore_defaults()
        self.e_union = ExcludeList(union_regex=True)
        self.e_union.restore_defaults()

    def test_same_number_of_expressions(self):
        # We only get one union Pattern item in a tuple, which is made of however many parts
        eq_(len(split_union(self.e_union.compiled[0])), len(default_regexes))
        # We get as many as there are marked items
        eq_(len(self.e_separate.compiled), len(default_regexes))
        exprs = split_union(self.e_union.compiled[0])
        # We should have the same number and the same expressions
        eq_(len(exprs), len(self.e_separate.compiled))
        for expr in self.e_separate.compiled:
            assert expr.pattern in exprs

    def test_compiled_files(self):
        # is path separator checked properly to yield the output
        if ISWINDOWS:
            regex1 = r"test\\one\\sub"
        else:
            regex1 = r"test/one/sub"
        self.e_separate.add(regex1)
        self.e_separate.mark(regex1)
        self.e_union.add(regex1)
        self.e_union.mark(regex1)
        separate_compiled_dirs = self.e_separate.compiled
        separate_compiled_files = [x for x in self.e_separate.compiled_files]
        # HACK we need to call compiled property FIRST to generate the cache
        union_compiled_dirs = self.e_union.compiled
        # print(f"type: {type(self.e_union.compiled_files[0])}")
        # A generator returning only one item... ugh
        union_compiled_files = [x for x in self.e_union.compiled_files][0]
        print(f"compiled files: {union_compiled_files}")
        # Separate should give several plus the one added
        eq_(len(separate_compiled_dirs), len(default_regexes) + 1)
        # regex1 shouldn't be in the "files" version
        eq_(len(separate_compiled_files), len(default_regexes))
        # Only one Pattern returned, which when split should be however many + 1
        eq_(len(split_union(union_compiled_dirs[0])), len(default_regexes) + 1)
        # regex1 shouldn't be here either
        eq_(len(split_union(union_compiled_files)), len(default_regexes))


class TestCaseCompiledDict(TestCaseCompiledList):
    """Test the dictionary version"""
    def setup_method(self, method):
        self.e_separate = ExcludeDict(union_regex=False)
        self.e_separate.restore_defaults()
        self.e_union = ExcludeDict(union_regex=True)
        self.e_union.restore_defaults()