Remove flake8 E731 Errors

Note: black formatting is now applying correctly as well.
2026-03-08 10:11:38 +00:00 · 2021-08-15 03:51:27 -05:00
parent af19660c18
commit 9446f37fad
6 changed files with 100 additions and 229 deletions
--- a/core/engine.py
+++ b/core/engine.py
@@ -17,7 +17,11 @@ from hscommon.util import flatten, multi_replace
 from hscommon.trans import tr
 from hscommon.jobprogress import job

-(WEIGHT_WORDS, MATCH_SIMILAR_WORDS, NO_FIELD_ORDER,) = range(3)
+(
+    WEIGHT_WORDS,
+    MATCH_SIMILAR_WORDS,
+    NO_FIELD_ORDER,
+) = range(3)

 JOB_REFRESH_RATE = 100

@@ -34,11 +38,9 @@ def getwords(s):
    # of their accents, etc.) are preserved as is. The arbitrary limit is
    # obtained from this one: ord("\u037e") GREEK QUESTION MARK
    s = "".join(
-        c for c in s
-        if (ord(c) <= 894
-            and c in string.ascii_letters + string.digits + string.whitespace
-            )
-        or ord(c) > 894
+        c
+        for c in s
+        if (ord(c) <= 894 and c in string.ascii_letters + string.digits + string.whitespace) or ord(c) > 894
    )
    return [_f for _f in s.split(" ") if _f]  # remove empty elements

@@ -115,9 +117,7 @@ def compare_fields(first, second, flags=()):
            if matched_field:
                second.remove(matched_field)
    else:
-        results = [
-            compare(field1, field2, flags) for field1, field2 in zip(first, second)
-        ]
+        results = [compare(field1, field2, flags) for field1, field2 in zip(first, second)]
    return min(results) if results else 0


@@ -130,9 +130,7 @@ def build_word_dict(objects, j=job.nulljob):
    The result will be a dict with words as keys, lists of objects as values.
    """
    result = defaultdict(set)
-    for object in j.iter_with_progress(
-        objects, "Prepared %d/%d files", JOB_REFRESH_RATE
-    ):
+    for object in j.iter_with_progress(objects, "Prepared %d/%d files", JOB_REFRESH_RATE):
        for word in unpack_fields(object.words):
            result[word].add(object)
    return result
@@ -167,9 +165,7 @@ def reduce_common_words(word_dict, threshold):
    The exception to this removal are the objects where all the words of the object are common.
    Because if we remove them, we will miss some duplicates!
    """
-    uncommon_words = set(
-        word for word, objects in word_dict.items() if len(objects) < threshold
-    )
+    uncommon_words = set(word for word, objects in word_dict.items() if len(objects) < threshold)
    for word, objects in list(word_dict.items()):
        if len(objects) < threshold:
            continue
@@ -275,10 +271,7 @@ def getmatches(
        # This is the place where the memory usage is at its peak during the scan.
        # Just continue the process with an incomplete list of matches.
        del compared  # This should give us enough room to call logging.
-        logging.warning(
-            "Memory Overflow. Matches: %d. Word dict: %d"
-            % (len(result), len(word_dict))
-        )
+        logging.warning("Memory Overflow. Matches: %d. Word dict: %d" % (len(result), len(word_dict)))
        return result
    return result

@@ -408,18 +401,13 @@ class Group:

        You can call this after the duplicate scanning process to free a bit of memory.
        """
-        discarded = set(
-            m
-            for m in self.matches
-            if not all(obj in self.unordered for obj in [m.first, m.second])
-        )
+        discarded = set(m for m in self.matches if not all(obj in self.unordered for obj in [m.first, m.second]))
        self.matches -= discarded
        self.candidates = defaultdict(set)
        return discarded

    def get_match_of(self, item):
-        """Returns the match pair between ``item`` and :attr:`ref`.
-        """
+        """Returns the match pair between ``item`` and :attr:`ref`."""
        if item is self.ref:
            return
        for m in self._get_matches_for_ref():
@@ -435,8 +423,7 @@ class Group:
        """
        # tie_breaker(ref, dupe) --> True if dupe should be ref
        # Returns True if anything changed during prioritization.
-        master_key_func = lambda x: (-x.is_ref, key_func(x))
-        new_order = sorted(self.ordered, key=master_key_func)
+        new_order = sorted(self.ordered, key=lambda x: (-x.is_ref, key_func(x)))
        changed = new_order != self.ordered
        self.ordered = new_order
        if tie_breaker is None:
@@ -459,9 +446,7 @@ class Group:
            self.unordered.remove(item)
            self._percentage = None
            self._matches_for_ref = None
-            if (len(self) > 1) and any(
-                not getattr(item, "is_ref", False) for item in self
-            ):
+            if (len(self) > 1) and any(not getattr(item, "is_ref", False) for item in self):
                if discard_matches:
                    self.matches = set(m for m in self.matches if item not in m)
            else:
@@ -470,8 +455,7 @@ class Group:
            pass

    def switch_ref(self, with_dupe):
-        """Make the :attr:`ref` dupe of the group switch position with ``with_dupe``.
-        """
+        """Make the :attr:`ref` dupe of the group switch position with ``with_dupe``."""
        if self.ref.is_ref:
            return False
        try:
@@ -490,9 +474,7 @@ class Group:
        if self._percentage is None:
            if self.dupes:
                matches = self._get_matches_for_ref()
-                self._percentage = sum(match.percentage for match in matches) // len(
-                    matches
-                )
+                self._percentage = sum(match.percentage for match in matches) // len(matches)
            else:
                self._percentage = 0
        return self._percentage
@@ -547,12 +529,8 @@ def get_groups(matches):
    orphan_matches = []
    for group in groups:
        orphan_matches += {
-            m
-            for m in group.discard_matches()
-            if not any(obj in matched_files for obj in [m.first, m.second])
+            m for m in group.discard_matches() if not any(obj in matched_files for obj in [m.first, m.second])
        }
    if groups and orphan_matches:
-        groups += get_groups(
-            orphan_matches
-        )  # no job, as it isn't supposed to take a long time
+        groups += get_groups(orphan_matches)  # no job, as it isn't supposed to take a long time
    return groups
--- a/core/results.py
+++ b/core/results.py
@@ -106,9 +106,7 @@ class Results(Markable):
                    self.groups,
                )
            if self.__filtered_dupes:
-                self.__dupes = [
-                    dupe for dupe in self.__dupes if dupe in self.__filtered_dupes
-                ]
+                self.__dupes = [dupe for dupe in self.__dupes if dupe in self.__filtered_dupes]
            sd = self.__dupes_sort_descriptor
            if sd:
                self.sort_dupes(sd[0], sd[1], sd[2])
@@ -127,18 +125,10 @@ class Results(Markable):
            total_count = self.__total_count
            total_size = self.__total_size
        else:
-            mark_count = len(
-                [dupe for dupe in self.__filtered_dupes if self.is_marked(dupe)]
-            )
-            marked_size = sum(
-                dupe.size for dupe in self.__filtered_dupes if self.is_marked(dupe)
-            )
-            total_count = len(
-                [dupe for dupe in self.__filtered_dupes if self.is_markable(dupe)]
-            )
-            total_size = sum(
-                dupe.size for dupe in self.__filtered_dupes if self.is_markable(dupe)
-            )
+            mark_count = len([dupe for dupe in self.__filtered_dupes if self.is_marked(dupe)])
+            marked_size = sum(dupe.size for dupe in self.__filtered_dupes if self.is_marked(dupe))
+            total_count = len([dupe for dupe in self.__filtered_dupes if self.is_markable(dupe)])
+            total_size = sum(dupe.size for dupe in self.__filtered_dupes if self.is_markable(dupe))
        if self.mark_inverted:
            marked_size = self.__total_size - marked_size
        result = tr("%d / %d (%s / %s) duplicates marked.") % (
@@ -201,11 +191,7 @@ class Results(Markable):
            self.__filters.append(filter_str)
            if self.__filtered_dupes is None:
                self.__filtered_dupes = flatten(g[:] for g in self.groups)
-            self.__filtered_dupes = set(
-                dupe
-                for dupe in self.__filtered_dupes
-                if filter_re.search(str(dupe.path))
-            )
+            self.__filtered_dupes = set(dupe for dupe in self.__filtered_dupes if filter_re.search(str(dupe.path)))
            filtered_groups = set()
            for dupe in self.__filtered_dupes:
                filtered_groups.add(self.get_group_of_duplicate(dupe))
@@ -217,8 +203,7 @@ class Results(Markable):
        self.__dupes = None

    def get_group_of_duplicate(self, dupe):
-        """Returns :class:`~core.engine.Group` in which ``dupe`` belongs.
-        """
+        """Returns :class:`~core.engine.Group` in which ``dupe`` belongs."""
        try:
            return self.__group_of_duplicate[dupe]
        except (TypeError, KeyError):
@@ -284,8 +269,7 @@ class Results(Markable):
        self.is_modified = False

    def make_ref(self, dupe):
-        """Make ``dupe`` take the :attr:`~core.engine.Group.ref` position of its group.
-        """
+        """Make ``dupe`` take the :attr:`~core.engine.Group.ref` position of its group."""
        g = self.get_group_of_duplicate(dupe)
        r = g.ref
        if not g.switch_ref(dupe):
@@ -412,10 +396,10 @@ class Results(Markable):
        """
        if not self.__dupes:
            self.__get_dupe_list()
-        keyfunc = lambda d: self.app._get_dupe_sort_key(
-            d, lambda: self.get_group_of_duplicate(d), key, delta
+        self.__dupes.sort(
+            key=lambda d: self.app._get_dupe_sort_key(d, lambda: self.get_group_of_duplicate(d), key, delta),
+            reverse=not asc,
        )
-        self.__dupes.sort(key=keyfunc, reverse=not asc)
        self.__dupes_sort_descriptor = (key, asc, delta)

    def sort_groups(self, key, asc=True):
@@ -426,8 +410,7 @@ class Results(Markable):
        :param str key: key attribute name to sort with.
        :param bool asc: If false, sorting is reversed.
        """
-        keyfunc = lambda g: self.app._get_group_sort_key(g, key)
-        self.groups.sort(key=keyfunc, reverse=not asc)
+        self.groups.sort(key=lambda g: self.app._get_group_sort_key(g, key), reverse=not asc)
        self.__groups_sort_descriptor = (key, asc)

    # ---Properties
--- a/core/tests/engine_test.py
+++ b/core/tests/engine_test.py
@@ -177,9 +177,7 @@ class TestCaseWordCompareWithFields:
    def test_simple(self):
        eq_(
            67,
-            compare_fields(
-                [["a", "b"], ["c", "d", "e"]], [["a", "b"], ["c", "d", "f"]]
-            ),
+            compare_fields([["a", "b"], ["c", "d", "e"]], [["a", "b"], ["c", "d", "f"]]),
        )

    def test_empty(self):
@@ -265,9 +263,7 @@ class TestCasebuild_word_dict:
        j = job.Job(1, do_progress)
        self.log = []
        s = "foo bar"
-        build_word_dict(
-            [NamedObject(s, True), NamedObject(s, True), NamedObject(s, True)], j
-        )
+        build_word_dict([NamedObject(s, True), NamedObject(s, True), NamedObject(s, True)], j)
        # We don't have intermediate log because iter_with_progress is called with every > 1
        eq_(0, self.log[0])
        eq_(100, self.log[1])
@@ -297,10 +293,7 @@ class TestCasereduce_common_words:

    def test_dont_remove_objects_with_only_common_words(self):
        d = {
-            "common": set(
-                [NamedObject("common uncommon", True) for i in range(50)]
-                + [NamedObject("common", True)]
-            ),
+            "common": set([NamedObject("common uncommon", True) for i in range(50)] + [NamedObject("common", True)]),
            "uncommon": set([NamedObject("common uncommon", True)]),
        }
        reduce_common_words(d, 50)
@@ -309,10 +302,7 @@ class TestCasereduce_common_words:

    def test_values_still_are_set_instances(self):
        d = {
-            "common": set(
-                [NamedObject("common uncommon", True) for i in range(50)]
-                + [NamedObject("common", True)]
-            ),
+            "common": set([NamedObject("common uncommon", True) for i in range(50)] + [NamedObject("common", True)]),
            "uncommon": set([NamedObject("common uncommon", True)]),
        }
        reduce_common_words(d, 50)
@@ -352,12 +342,8 @@ class TestCasereduce_common_words:
        # would not stay in 'bar' because 'foo' is not a common word anymore.
        only_common = NamedObject("foo bar", True)
        d = {
-            "foo": set(
-                [NamedObject("foo bar baz", True) for i in range(49)] + [only_common]
-            ),
-            "bar": set(
-                [NamedObject("foo bar baz", True) for i in range(49)] + [only_common]
-            ),
+            "foo": set([NamedObject("foo bar baz", True) for i in range(49)] + [only_common]),
+            "bar": set([NamedObject("foo bar baz", True) for i in range(49)] + [only_common]),
            "baz": set([NamedObject("foo bar baz", True) for i in range(49)]),
        }
        reduce_common_words(d, 50)
@@ -386,9 +372,7 @@ class TestCaseget_match:
        assert object() not in m

    def test_word_weight(self):
-        m = get_match(
-            NamedObject("foo bar", True), NamedObject("bar bleh", True), (WEIGHT_WORDS,)
-        )
+        m = get_match(NamedObject("foo bar", True), NamedObject("bar bleh", True), (WEIGHT_WORDS,))
        eq_(m.percentage, int((6.0 / 13.0) * 100))


@@ -554,8 +538,12 @@ class TestCaseGetMatchesByContents:
    def test_big_file_partial_hashes(self):
        smallsize = 1
        bigsize = 100 * 1024 * 1024  # 100MB
-        f = [no("bigfoo", size=bigsize), no("bigbar", size=bigsize),
-             no("smallfoo", size=smallsize), no("smallbar", size=smallsize)]
+        f = [
+            no("bigfoo", size=bigsize),
+            no("bigbar", size=bigsize),
+            no("smallfoo", size=smallsize),
+            no("smallbar", size=smallsize),
+        ]
        f[0].md5 = f[0].md5partial = f[0].md5samples = "foobar"
        f[1].md5 = f[1].md5partial = f[1].md5samples = "foobar"
        f[2].md5 = f[2].md5partial = "bleh"
@@ -749,8 +737,7 @@ class TestCaseGroup:
        # if the ref has the same key as one or more of the dupe, run the tie_breaker func among them
        g = get_test_group()
        o1, o2, o3 = g.ordered
-        tie_breaker = lambda ref, dupe: dupe is o3
-        g.prioritize(lambda x: 0, tie_breaker)
+        g.prioritize(lambda x: 0, lambda ref, dupe: dupe is o3)
        assert g.ref is o3

    def test_prioritize_with_tie_breaker_runs_on_all_dupes(self):
@@ -761,8 +748,7 @@ class TestCaseGroup:
        o1.foo = 1
        o2.foo = 2
        o3.foo = 3
-        tie_breaker = lambda ref, dupe: dupe.foo > ref.foo
-        g.prioritize(lambda x: 0, tie_breaker)
+        g.prioritize(lambda x: 0, lambda ref, dupe: dupe.foo > ref.foo)
        assert g.ref is o3

    def test_prioritize_with_tie_breaker_runs_only_on_tie_dupes(self):
@@ -775,9 +761,7 @@ class TestCaseGroup:
        o1.bar = 1
        o2.bar = 2
        o3.bar = 3
-        key_func = lambda x: -x.foo
-        tie_breaker = lambda ref, dupe: dupe.bar > ref.bar
-        g.prioritize(key_func, tie_breaker)
+        g.prioritize(lambda x: -x.foo, lambda ref, dupe: dupe.bar > ref.bar)
        assert g.ref is o2

    def test_prioritize_with_ref_dupe(self):
@@ -909,9 +893,7 @@ class TestCaseget_groups:
        m1 = Match(A, B, 90)  # This is the strongest "A" match
        m2 = Match(A, C, 80)  # Because C doesn't match with B, it won't be in the group
        m3 = Match(A, D, 80)  # Same thing for D
-        m4 = Match(
-            C, D, 70
-        )  # However, because C and D match, they should have their own group.
+        m4 = Match(C, D, 70)  # However, because C and D match, they should have their own group.
        groups = get_groups([m1, m2, m3, m4])
        eq_(len(groups), 2)
        g1, g2 = groups