1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2026-01-22 14:41:39 +00:00

Misc cleanups in core/tests

This commit is contained in:
2021-08-21 03:52:09 -05:00
parent b4fa1d68f0
commit 0189c29f47
9 changed files with 119 additions and 261 deletions

View File

@@ -103,10 +103,9 @@ class TestCasegetfields:
expected = [["a", "bc", "def"]]
actual = getfields(" - a bc def")
eq_(expected, actual)
expected = [["bc", "def"]]
class TestCaseunpack_fields:
class TestCaseUnpackFields:
def test_with_fields(self):
expected = ["a", "b", "c", "d", "e", "f"]
actual = unpack_fields([["a"], ["b", "c"], ["d", "e", "f"]])
@@ -218,24 +217,24 @@ class TestCaseWordCompareWithFields:
eq_([["c", "d", "f"], ["a", "b"]], second)
class TestCasebuild_word_dict:
class TestCaseBuildWordDict:
def test_with_standard_words(self):
itemList = [NamedObject("foo bar", True)]
itemList.append(NamedObject("bar baz", True))
itemList.append(NamedObject("baz bleh foo", True))
d = build_word_dict(itemList)
item_list = [NamedObject("foo bar", True)]
item_list.append(NamedObject("bar baz", True))
item_list.append(NamedObject("baz bleh foo", True))
d = build_word_dict(item_list)
eq_(4, len(d))
eq_(2, len(d["foo"]))
assert itemList[0] in d["foo"]
assert itemList[2] in d["foo"]
assert item_list[0] in d["foo"]
assert item_list[2] in d["foo"]
eq_(2, len(d["bar"]))
assert itemList[0] in d["bar"]
assert itemList[1] in d["bar"]
assert item_list[0] in d["bar"]
assert item_list[1] in d["bar"]
eq_(2, len(d["baz"]))
assert itemList[1] in d["baz"]
assert itemList[2] in d["baz"]
assert item_list[1] in d["baz"]
assert item_list[2] in d["baz"]
eq_(1, len(d["bleh"]))
assert itemList[2] in d["bleh"]
assert item_list[2] in d["bleh"]
def test_unpack_fields(self):
o = NamedObject("")
@@ -269,7 +268,7 @@ class TestCasebuild_word_dict:
eq_(100, self.log[1])
class TestCasemerge_similar_words:
class TestCaseMergeSimilarWords:
def test_some_similar_words(self):
d = {
"foobar": set([1]),
@@ -281,11 +280,11 @@ class TestCasemerge_similar_words:
eq_(3, len(d["foobar"]))
class TestCasereduce_common_words:
class TestCaseReduceCommonWords:
def test_typical(self):
d = {
"foo": set([NamedObject("foo bar", True) for i in range(50)]),
"bar": set([NamedObject("foo bar", True) for i in range(49)]),
"foo": set([NamedObject("foo bar", True) for _ in range(50)]),
"bar": set([NamedObject("foo bar", True) for _ in range(49)]),
}
reduce_common_words(d, 50)
assert "foo" not in d
@@ -293,7 +292,7 @@ class TestCasereduce_common_words:
def test_dont_remove_objects_with_only_common_words(self):
d = {
"common": set([NamedObject("common uncommon", True) for i in range(50)] + [NamedObject("common", True)]),
"common": set([NamedObject("common uncommon", True) for _ in range(50)] + [NamedObject("common", True)]),
"uncommon": set([NamedObject("common uncommon", True)]),
}
reduce_common_words(d, 50)
@@ -302,20 +301,20 @@ class TestCasereduce_common_words:
def test_values_still_are_set_instances(self):
d = {
"common": set([NamedObject("common uncommon", True) for i in range(50)] + [NamedObject("common", True)]),
"common": set([NamedObject("common uncommon", True) for _ in range(50)] + [NamedObject("common", True)]),
"uncommon": set([NamedObject("common uncommon", True)]),
}
reduce_common_words(d, 50)
assert isinstance(d["common"], set)
assert isinstance(d["uncommon"], set)
def test_dont_raise_KeyError_when_a_word_has_been_removed(self):
def test_dont_raise_keyerror_when_a_word_has_been_removed(self):
# If a word has been removed by the reduce, an object in a subsequent common word that
# contains the word that has been removed would cause a KeyError.
d = {
"foo": set([NamedObject("foo bar baz", True) for i in range(50)]),
"bar": set([NamedObject("foo bar baz", True) for i in range(50)]),
"baz": set([NamedObject("foo bar baz", True) for i in range(49)]),
"foo": set([NamedObject("foo bar baz", True) for _ in range(50)]),
"bar": set([NamedObject("foo bar baz", True) for _ in range(50)]),
"baz": set([NamedObject("foo bar baz", True) for _ in range(49)]),
}
try:
reduce_common_words(d, 50)
@@ -329,7 +328,7 @@ class TestCasereduce_common_words:
o.words = [["foo", "bar"], ["baz"]]
return o
d = {"foo": set([create_it() for i in range(50)])}
d = {"foo": set([create_it() for _ in range(50)])}
try:
reduce_common_words(d, 50)
except TypeError:
@@ -342,9 +341,9 @@ class TestCasereduce_common_words:
# would not stay in 'bar' because 'foo' is not a common word anymore.
only_common = NamedObject("foo bar", True)
d = {
"foo": set([NamedObject("foo bar baz", True) for i in range(49)] + [only_common]),
"bar": set([NamedObject("foo bar baz", True) for i in range(49)] + [only_common]),
"baz": set([NamedObject("foo bar baz", True) for i in range(49)]),
"foo": set([NamedObject("foo bar baz", True) for _ in range(49)] + [only_common]),
"bar": set([NamedObject("foo bar baz", True) for _ in range(49)] + [only_common]),
"baz": set([NamedObject("foo bar baz", True) for _ in range(49)]),
}
reduce_common_words(d, 50)
eq_(1, len(d["foo"]))
@@ -352,7 +351,7 @@ class TestCasereduce_common_words:
eq_(49, len(d["baz"]))
class TestCaseget_match:
class TestCaseGetMatch:
def test_simple(self):
o1 = NamedObject("foo bar", True)
o2 = NamedObject("bar bleh", True)
@@ -381,12 +380,12 @@ class TestCaseGetMatches:
eq_(getmatches([]), [])
def test_simple(self):
itemList = [
item_list = [
NamedObject("foo bar"),
NamedObject("bar bleh"),
NamedObject("a b c foo"),
]
r = getmatches(itemList)
r = getmatches(item_list)
eq_(2, len(r))
m = first(m for m in r if m.percentage == 50) # "foo bar" and "bar bleh"
assert_match(m, "foo bar", "bar bleh")
@@ -394,40 +393,40 @@ class TestCaseGetMatches:
assert_match(m, "foo bar", "a b c foo")
def test_null_and_unrelated_objects(self):
itemList = [
item_list = [
NamedObject("foo bar"),
NamedObject("bar bleh"),
NamedObject(""),
NamedObject("unrelated object"),
]
r = getmatches(itemList)
r = getmatches(item_list)
eq_(len(r), 1)
m = r[0]
eq_(m.percentage, 50)
assert_match(m, "foo bar", "bar bleh")
def test_twice_the_same_word(self):
itemList = [NamedObject("foo foo bar"), NamedObject("bar bleh")]
r = getmatches(itemList)
item_list = [NamedObject("foo foo bar"), NamedObject("bar bleh")]
r = getmatches(item_list)
eq_(1, len(r))
def test_twice_the_same_word_when_preworded(self):
itemList = [NamedObject("foo foo bar", True), NamedObject("bar bleh", True)]
r = getmatches(itemList)
item_list = [NamedObject("foo foo bar", True), NamedObject("bar bleh", True)]
r = getmatches(item_list)
eq_(1, len(r))
def test_two_words_match(self):
itemList = [NamedObject("foo bar"), NamedObject("foo bar bleh")]
r = getmatches(itemList)
item_list = [NamedObject("foo bar"), NamedObject("foo bar bleh")]
r = getmatches(item_list)
eq_(1, len(r))
def test_match_files_with_only_common_words(self):
# If a word occurs more than 50 times, it is excluded from the matching process
# The problem with the common_word_threshold is that the files containing only common
# words will never be matched together. We *should* match them.
# This test assumes that the common word threashold const is 50
itemList = [NamedObject("foo") for i in range(50)]
r = getmatches(itemList)
# This test assumes that the common word threshold const is 50
item_list = [NamedObject("foo") for _ in range(50)]
r = getmatches(item_list)
eq_(1225, len(r))
def test_use_words_already_there_if_there(self):
@@ -450,28 +449,28 @@ class TestCaseGetMatches:
eq_(100, self.log[-1])
def test_weight_words(self):
itemList = [NamedObject("foo bar"), NamedObject("bar bleh")]
m = getmatches(itemList, weight_words=True)[0]
item_list = [NamedObject("foo bar"), NamedObject("bar bleh")]
m = getmatches(item_list, weight_words=True)[0]
eq_(int((6.0 / 13.0) * 100), m.percentage)
def test_similar_word(self):
itemList = [NamedObject("foobar"), NamedObject("foobars")]
eq_(len(getmatches(itemList, match_similar_words=True)), 1)
eq_(getmatches(itemList, match_similar_words=True)[0].percentage, 100)
itemList = [NamedObject("foobar"), NamedObject("foo")]
eq_(len(getmatches(itemList, match_similar_words=True)), 0) # too far
itemList = [NamedObject("bizkit"), NamedObject("bizket")]
eq_(len(getmatches(itemList, match_similar_words=True)), 1)
itemList = [NamedObject("foobar"), NamedObject("foosbar")]
eq_(len(getmatches(itemList, match_similar_words=True)), 1)
item_list = [NamedObject("foobar"), NamedObject("foobars")]
eq_(len(getmatches(item_list, match_similar_words=True)), 1)
eq_(getmatches(item_list, match_similar_words=True)[0].percentage, 100)
item_list = [NamedObject("foobar"), NamedObject("foo")]
eq_(len(getmatches(item_list, match_similar_words=True)), 0) # too far
item_list = [NamedObject("bizkit"), NamedObject("bizket")]
eq_(len(getmatches(item_list, match_similar_words=True)), 1)
item_list = [NamedObject("foobar"), NamedObject("foosbar")]
eq_(len(getmatches(item_list, match_similar_words=True)), 1)
def test_single_object_with_similar_words(self):
itemList = [NamedObject("foo foos")]
eq_(len(getmatches(itemList, match_similar_words=True)), 0)
item_list = [NamedObject("foo foos")]
eq_(len(getmatches(item_list, match_similar_words=True)), 0)
def test_double_words_get_counted_only_once(self):
itemList = [NamedObject("foo bar foo bleh"), NamedObject("foo bar bleh bar")]
m = getmatches(itemList)[0]
item_list = [NamedObject("foo bar foo bleh"), NamedObject("foo bar bleh bar")]
m = getmatches(item_list)[0]
eq_(75, m.percentage)
def test_with_fields(self):
@@ -491,13 +490,13 @@ class TestCaseGetMatches:
eq_(m.percentage, 50)
def test_only_match_similar_when_the_option_is_set(self):
itemList = [NamedObject("foobar"), NamedObject("foobars")]
eq_(len(getmatches(itemList, match_similar_words=False)), 0)
item_list = [NamedObject("foobar"), NamedObject("foobars")]
eq_(len(getmatches(item_list, match_similar_words=False)), 0)
def test_dont_recurse_do_match(self):
# with nosetests, the stack is increased. The number has to be high enough not to be failing falsely
sys.setrecursionlimit(200)
files = [NamedObject("foo bar") for i in range(201)]
files = [NamedObject("foo bar") for _ in range(201)]
try:
getmatches(files)
except RuntimeError:
@@ -506,27 +505,27 @@ class TestCaseGetMatches:
sys.setrecursionlimit(1000)
def test_min_match_percentage(self):
itemList = [
item_list = [
NamedObject("foo bar"),
NamedObject("bar bleh"),
NamedObject("a b c foo"),
]
r = getmatches(itemList, min_match_percentage=50)
r = getmatches(item_list, min_match_percentage=50)
eq_(1, len(r)) # Only "foo bar" / "bar bleh" should match
def test_MemoryError(self, monkeypatch):
def test_memory_error(self, monkeypatch):
@log_calls
def mocked_match(first, second, flags):
if len(mocked_match.calls) > 42:
raise MemoryError()
return Match(first, second, 0)
objects = [NamedObject() for i in range(10)] # results in 45 matches
objects = [NamedObject() for _ in range(10)] # results in 45 matches
monkeypatch.setattr(engine, "get_match", mocked_match)
try:
r = getmatches(objects)
except MemoryError:
self.fail("MemorryError must be handled")
self.fail("MemoryError must be handled")
eq_(42, len(r))
@@ -563,7 +562,7 @@ class TestCaseGetMatchesByContents:
class TestCaseGroup:
def test_empy(self):
def test_empty(self):
g = Group()
eq_(None, g.ref)
eq_([], g.dupes)
@@ -802,14 +801,14 @@ class TestCaseGroup:
eq_(0, len(g.candidates))
class TestCaseget_groups:
class TestCaseGetGroups:
def test_empty(self):
r = get_groups([])
eq_([], r)
def test_simple(self):
itemList = [NamedObject("foo bar"), NamedObject("bar bleh")]
matches = getmatches(itemList)
item_list = [NamedObject("foo bar"), NamedObject("bar bleh")]
matches = getmatches(item_list)
m = matches[0]
r = get_groups(matches)
eq_(1, len(r))
@@ -819,15 +818,15 @@ class TestCaseget_groups:
def test_group_with_multiple_matches(self):
# This results in 3 matches
itemList = [NamedObject("foo"), NamedObject("foo"), NamedObject("foo")]
matches = getmatches(itemList)
item_list = [NamedObject("foo"), NamedObject("foo"), NamedObject("foo")]
matches = getmatches(item_list)
r = get_groups(matches)
eq_(1, len(r))
g = r[0]
eq_(3, len(g))
def test_must_choose_a_group(self):
itemList = [
item_list = [
NamedObject("a b"),
NamedObject("a b"),
NamedObject("b c"),
@@ -836,13 +835,13 @@ class TestCaseget_groups:
]
# There will be 2 groups here: group "a b" and group "c d"
# "b c" can go either of them, but not both.
matches = getmatches(itemList)
matches = getmatches(item_list)
r = get_groups(matches)
eq_(2, len(r))
eq_(5, len(r[0]) + len(r[1]))
def test_should_all_go_in_the_same_group(self):
itemList = [
item_list = [
NamedObject("a b"),
NamedObject("a b"),
NamedObject("a b"),
@@ -850,7 +849,7 @@ class TestCaseget_groups:
]
# There will be 2 groups here: group "a b" and group "c d"
# "b c" can fit in both, but it must be in only one of them
matches = getmatches(itemList)
matches = getmatches(item_list)
r = get_groups(matches)
eq_(1, len(r))
@@ -869,8 +868,8 @@ class TestCaseget_groups:
assert o3 in g
def test_four_sized_group(self):
itemList = [NamedObject("foobar") for i in range(4)]
m = getmatches(itemList)
item_list = [NamedObject("foobar") for _ in range(4)]
m = getmatches(item_list)
r = get_groups(m)
eq_(1, len(r))
eq_(4, len(r[0]))