Merge core_{se,me,pe} into core.{se,me,pe}

2026-03-09 10:31:38 +00:00 · 2016-05-31 22:32:37 -04:00
parent d4919054f9
commit a65077f871
39 changed files with 55 additions and 75 deletions
--- a/core/tests/block_test.py
+++ b/core/tests/block_test.py
@@ -0,0 +1,312 @@
+# Copyright 2016 Hardcoded Software (http://www.hardcoded.net)
+#
+# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
+# which should be included with this package. The terms are also available at
+# http://www.gnu.org/licenses/gpl-3.0.html
+# The commented out tests are tests for function that have been converted to pure C for speed
+
+from pytest import raises, skip
+from hscommon.testutil import eq_
+
+try:
+    from ..pe.block import avgdiff, getblocks2, NoBlocksError, DifferentBlockCountError
+except ImportError:
+    skip("Can't import the block module, probably hasn't been compiled.")
+
+def my_avgdiff(first, second, limit=768, min_iter=3): # this is so I don't have to re-write every call
+    return avgdiff(first, second, limit, min_iter)
+
+BLACK = (0, 0, 0)
+RED = (0xff, 0, 0)
+GREEN = (0, 0xff, 0)
+BLUE = (0, 0, 0xff)
+
+class FakeImage:
+    def __init__(self, size, data):
+        self.size = size
+        self.data = data
+
+    def getdata(self):
+        return self.data
+
+    def crop(self, box):
+        pixels = []
+        for i in range(box[1], box[3]):
+            for j in range(box[0], box[2]):
+                pixel = self.data[i * self.size[0] + j]
+                pixels.append(pixel)
+        return FakeImage((box[2] - box[0], box[3] - box[1]), pixels)
+
+def empty():
+    return FakeImage((0, 0), [])
+
+def single_pixel(): #one red pixel
+    return FakeImage((1, 1), [(0xff, 0, 0)])
+
+def four_pixels():
+    pixels = [RED, (0, 0x80, 0xff), (0x80, 0, 0), (0, 0x40, 0x80)]
+    return FakeImage((2, 2), pixels)
+
+class TestCasegetblock:
+    def test_single_pixel(self):
+        im = single_pixel()
+        [b] = getblocks2(im, 1)
+        eq_(RED, b)
+
+    def test_no_pixel(self):
+        im = empty()
+        eq_([], getblocks2(im, 1))
+
+    def test_four_pixels(self):
+        im = four_pixels()
+        [b] = getblocks2(im, 1)
+        meanred = (0xff + 0x80) // 4
+        meangreen = (0x80 + 0x40) // 4
+        meanblue = (0xff + 0x80) // 4
+        eq_((meanred, meangreen, meanblue), b)
+
+
+# class TCdiff(unittest.TestCase):
+#     def test_diff(self):
+#         b1 = (10, 20, 30)
+#         b2 = (1, 2, 3)
+#         eq_(9 + 18 + 27, diff(b1, b2))
+#
+#     def test_diff_negative(self):
+#         b1 = (10, 20, 30)
+#         b2 = (1, 2, 3)
+#         eq_(9 + 18 + 27, diff(b2, b1))
+#
+#     def test_diff_mixed_positive_and_negative(self):
+#         b1 = (1, 5, 10)
+#         b2 = (10, 1, 15)
+#         eq_(9 + 4 + 5, diff(b1, b2))
+#
+
+# class TCgetblocks(unittest.TestCase):
+#     def test_empty_image(self):
+#         im = empty()
+#         blocks = getblocks(im, 1)
+#         eq_(0, len(blocks))
+#
+#     def test_one_block_image(self):
+#         im = four_pixels()
+#         blocks = getblocks2(im, 1)
+#         eq_(1, len(blocks))
+#         block = blocks[0]
+#         meanred = (0xff + 0x80) // 4
+#         meangreen = (0x80 + 0x40) // 4
+#         meanblue = (0xff + 0x80) // 4
+#         eq_((meanred, meangreen, meanblue), block)
+#
+#     def test_not_enough_height_to_fit_a_block(self):
+#         im = FakeImage((2, 1), [BLACK, BLACK])
+#         blocks = getblocks(im, 2)
+#         eq_(0, len(blocks))
+#
+#     def xtest_dont_include_leftovers(self):
+#         # this test is disabled because getblocks is not used and getblock in cdeffed
+#         pixels = [
+#             RED,(0, 0x80, 0xff), BLACK,
+#             (0x80, 0, 0),(0, 0x40, 0x80), BLACK,
+#             BLACK, BLACK, BLACK
+#         ]
+#         im = FakeImage((3, 3), pixels)
+#         blocks = getblocks(im, 2)
+#         block = blocks[0]
+#         #Because the block is smaller than the image, only blocksize must be considered.
+#         meanred = (0xff + 0x80) // 4
+#         meangreen = (0x80 + 0x40) // 4
+#         meanblue = (0xff + 0x80) // 4
+#         eq_((meanred, meangreen, meanblue), block)
+#
+#     def xtest_two_blocks(self):
+#         # this test is disabled because getblocks is not used and getblock in cdeffed
+#         pixels = [BLACK for i in xrange(4 * 2)]
+#         pixels[0] = RED
+#         pixels[1] = (0, 0x80, 0xff)
+#         pixels[4] = (0x80, 0, 0)
+#         pixels[5] = (0, 0x40, 0x80)
+#         im = FakeImage((4, 2), pixels)
+#         blocks = getblocks(im, 2)
+#         eq_(2, len(blocks))
+#         block = blocks[0]
+#         #Because the block is smaller than the image, only blocksize must be considered.
+#         meanred = (0xff + 0x80) // 4
+#         meangreen = (0x80 + 0x40) // 4
+#         meanblue = (0xff + 0x80) // 4
+#         eq_((meanred, meangreen, meanblue), block)
+#         eq_(BLACK, blocks[1])
+#
+#     def test_four_blocks(self):
+#         pixels = [BLACK for i in xrange(4 * 4)]
+#         pixels[0] = RED
+#         pixels[1] = (0, 0x80, 0xff)
+#         pixels[4] = (0x80, 0, 0)
+#         pixels[5] = (0, 0x40, 0x80)
+#         im = FakeImage((4, 4), pixels)
+#         blocks = getblocks2(im, 2)
+#         eq_(4, len(blocks))
+#         block = blocks[0]
+#         #Because the block is smaller than the image, only blocksize must be considered.
+#         meanred = (0xff + 0x80) // 4
+#         meangreen = (0x80 + 0x40) // 4
+#         meanblue = (0xff + 0x80) // 4
+#         eq_((meanred, meangreen, meanblue), block)
+#         eq_(BLACK, blocks[1])
+#         eq_(BLACK, blocks[2])
+#         eq_(BLACK, blocks[3])
+#
+
+class TestCasegetblocks2:
+    def test_empty_image(self):
+        im = empty()
+        blocks = getblocks2(im, 1)
+        eq_(0, len(blocks))
+
+    def test_one_block_image(self):
+        im = four_pixels()
+        blocks = getblocks2(im, 1)
+        eq_(1, len(blocks))
+        block = blocks[0]
+        meanred = (0xff + 0x80) // 4
+        meangreen = (0x80 + 0x40) // 4
+        meanblue = (0xff + 0x80) // 4
+        eq_((meanred, meangreen, meanblue), block)
+
+    def test_four_blocks_all_black(self):
+        im = FakeImage((2, 2), [BLACK, BLACK, BLACK, BLACK])
+        blocks = getblocks2(im, 2)
+        eq_(4, len(blocks))
+        for block in blocks:
+            eq_(BLACK, block)
+
+    def test_two_pixels_image_horizontal(self):
+        pixels = [RED, BLUE]
+        im = FakeImage((2, 1), pixels)
+        blocks = getblocks2(im, 2)
+        eq_(4, len(blocks))
+        eq_(RED, blocks[0])
+        eq_(BLUE, blocks[1])
+        eq_(RED, blocks[2])
+        eq_(BLUE, blocks[3])
+
+    def test_two_pixels_image_vertical(self):
+        pixels = [RED, BLUE]
+        im = FakeImage((1, 2), pixels)
+        blocks = getblocks2(im, 2)
+        eq_(4, len(blocks))
+        eq_(RED, blocks[0])
+        eq_(RED, blocks[1])
+        eq_(BLUE, blocks[2])
+        eq_(BLUE, blocks[3])
+
+
+class TestCaseavgdiff:
+    def test_empty(self):
+        with raises(NoBlocksError):
+            my_avgdiff([], [])
+
+    def test_two_blocks(self):
+        b1 = (5, 10, 15)
+        b2 = (255, 250, 245)
+        b3 = (0, 0, 0)
+        b4 = (255, 0, 255)
+        blocks1 = [b1, b2]
+        blocks2 = [b3, b4]
+        expected1 = 5 + 10 + 15
+        expected2 = 0 + 250 + 10
+        expected = (expected1 + expected2) // 2
+        eq_(expected, my_avgdiff(blocks1, blocks2))
+
+    def test_blocks_not_the_same_size(self):
+        b = (0, 0, 0)
+        with raises(DifferentBlockCountError):
+            my_avgdiff([b, b], [b])
+
+    def test_first_arg_is_empty_but_not_second(self):
+        #Don't return 0 (as when the 2 lists are empty), raise!
+        b = (0, 0, 0)
+        with raises(DifferentBlockCountError):
+            my_avgdiff([], [b])
+
+    def test_limit(self):
+        ref = (0, 0, 0)
+        b1 = (10, 10, 10) #avg 30
+        b2 = (20, 20, 20) #avg 45
+        b3 = (30, 30, 30) #avg 60
+        blocks1 = [ref, ref, ref]
+        blocks2 = [b1, b2, b3]
+        eq_(45, my_avgdiff(blocks1, blocks2, 44))
+
+    def test_min_iterations(self):
+        ref = (0, 0, 0)
+        b1 = (10, 10, 10) #avg 30
+        b2 = (20, 20, 20) #avg 45
+        b3 = (10, 10, 10) #avg 40
+        blocks1 = [ref, ref, ref]
+        blocks2 = [b1, b2, b3]
+        eq_(40, my_avgdiff(blocks1, blocks2, 45 - 1, 3))
+
+    # Bah, I don't know why this test fails, but I don't think it matters very much
+    # def test_just_over_the_limit(self):
+    #     #A score just over the limit might return exactly the limit due to truncating. We should
+    #     #ceil() the result in this case.
+    #     ref = (0, 0, 0)
+    #     b1 = (10, 0, 0)
+    #     b2 = (11, 0, 0)
+    #     blocks1 = [ref, ref]
+    #     blocks2 = [b1, b2]
+    #     eq_(11, my_avgdiff(blocks1, blocks2, 10))
+    #
+    def test_return_at_least_1_at_the_slightest_difference(self):
+        ref = (0, 0, 0)
+        b1 = (1, 0, 0)
+        blocks1 = [ref for i in range(250)]
+        blocks2 = [ref for i in range(250)]
+        blocks2[0] = b1
+        eq_(1, my_avgdiff(blocks1, blocks2))
+
+    def test_return_0_if_there_is_no_difference(self):
+        ref = (0, 0, 0)
+        blocks1 = [ref, ref]
+        blocks2 = [ref, ref]
+        eq_(0, my_avgdiff(blocks1, blocks2))
+
+
+# class TCmaxdiff(unittest.TestCase):
+#     def test_empty(self):
+#         self.assertRaises(NoBlocksError, maxdiff,[],[])
+#
+#     def test_two_blocks(self):
+#         b1 = (5, 10, 15)
+#         b2 = (255, 250, 245)
+#         b3 = (0, 0, 0)
+#         b4 = (255, 0, 255)
+#         blocks1 = [b1, b2]
+#         blocks2 = [b3, b4]
+#         expected1 = 5 + 10 + 15
+#         expected2 = 0 + 250 + 10
+#         expected = max(expected1, expected2)
+#         eq_(expected, maxdiff(blocks1, blocks2))
+#
+#     def test_blocks_not_the_same_size(self):
+#         b = (0, 0, 0)
+#         self.assertRaises(DifferentBlockCountError, maxdiff,[b, b],[b])
+#
+#     def test_first_arg_is_empty_but_not_second(self):
+#         #Don't return 0 (as when the 2 lists are empty), raise!
+#         b = (0, 0, 0)
+#         self.assertRaises(DifferentBlockCountError, maxdiff,[],[b])
+#
+#     def test_limit(self):
+#         b1 = (5, 10, 15)
+#         b2 = (255, 250, 245)
+#         b3 = (0, 0, 0)
+#         b4 = (255, 0, 255)
+#         blocks1 = [b1, b2]
+#         blocks2 = [b3, b4]
+#         expected1 = 5 + 10 + 15
+#         expected2 = 0 + 250 + 10
+#         eq_(expected1, maxdiff(blocks1, blocks2, expected1 - 1))
+#
--- a/core/tests/cache_test.py
+++ b/core/tests/cache_test.py
@@ -0,0 +1,143 @@
+# Copyright 2016 Hardcoded Software (http://www.hardcoded.net)
+#
+# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
+# which should be included with this package. The terms are also available at
+# http://www.gnu.org/licenses/gpl-3.0.html
+
+import logging
+
+from pytest import raises, skip
+from hscommon.testutil import eq_
+
+try:
+    from ..pe.cache import Cache, colors_to_string, string_to_colors
+except ImportError:
+    skip("Can't import the cache module, probably hasn't been compiled.")
+
+class TestCasecolors_to_string:
+    def test_no_color(self):
+        eq_('', colors_to_string([]))
+
+    def test_single_color(self):
+        eq_('000000', colors_to_string([(0, 0, 0)]))
+        eq_('010101', colors_to_string([(1, 1, 1)]))
+        eq_('0a141e', colors_to_string([(10, 20, 30)]))
+
+    def test_two_colors(self):
+        eq_('000102030405', colors_to_string([(0, 1, 2), (3, 4, 5)]))
+
+
+class TestCasestring_to_colors:
+    def test_empty(self):
+        eq_([], string_to_colors(''))
+
+    def test_single_color(self):
+        eq_([(0, 0, 0)], string_to_colors('000000'))
+        eq_([(2, 3, 4)], string_to_colors('020304'))
+        eq_([(10, 20, 30)], string_to_colors('0a141e'))
+
+    def test_two_colors(self):
+        eq_([(10, 20, 30), (40, 50, 60)], string_to_colors('0a141e28323c'))
+
+    def test_incomplete_color(self):
+        # don't return anything if it's not a complete color
+        eq_([], string_to_colors('102'))
+
+
+class TestCaseCache:
+    def test_empty(self):
+        c = Cache()
+        eq_(0, len(c))
+        with raises(KeyError):
+            c['foo']
+
+    def test_set_then_retrieve_blocks(self):
+        c = Cache()
+        b = [(0, 0, 0), (1, 2, 3)]
+        c['foo'] = b
+        eq_(b, c['foo'])
+
+    def test_delitem(self):
+        c = Cache()
+        c['foo'] = ''
+        del c['foo']
+        assert 'foo' not in c
+        with raises(KeyError):
+            del c['foo']
+
+    def test_persistance(self, tmpdir):
+        DBNAME = tmpdir.join('hstest.db')
+        c = Cache(str(DBNAME))
+        c['foo'] = [(1, 2, 3)]
+        del c
+        c = Cache(str(DBNAME))
+        eq_([(1, 2, 3)], c['foo'])
+
+    def test_filter(self):
+        c = Cache()
+        c['foo'] = ''
+        c['bar'] = ''
+        c['baz'] = ''
+        c.filter(lambda p: p != 'bar') #only 'bar' is removed
+        eq_(2, len(c))
+        assert 'foo' in c
+        assert 'baz' in c
+        assert 'bar' not in c
+
+    def test_clear(self):
+        c = Cache()
+        c['foo'] = ''
+        c['bar'] = ''
+        c['baz'] = ''
+        c.clear()
+        eq_(0, len(c))
+        assert 'foo' not in c
+        assert 'baz' not in c
+        assert 'bar' not in c
+
+    def test_corrupted_db(self, tmpdir, monkeypatch):
+        # If we don't do this monkeypatching, we get a weird exception about trying to flush a
+        # closed file. I've tried setting logging level and stuff, but nothing worked. So, there we
+        # go, a dirty monkeypatch.
+        monkeypatch.setattr(logging, 'warning', lambda *args, **kw: None)
+        dbname = str(tmpdir.join('foo.db'))
+        fp = open(dbname, 'w')
+        fp.write('invalid sqlite content')
+        fp.close()
+        c = Cache(dbname) # should not raise a DatabaseError
+        c['foo'] = [(1, 2, 3)]
+        del c
+        c = Cache(dbname)
+        eq_(c['foo'], [(1, 2, 3)])
+
+    def test_by_id(self):
+        # it's possible to use the cache by referring to the files by their row_id
+        c = Cache()
+        b = [(0, 0, 0), (1, 2, 3)]
+        c['foo'] = b
+        foo_id = c.get_id('foo')
+        eq_(c[foo_id], b)
+
+
+class TestCaseCacheSQLEscape:
+    def test_contains(self):
+        c = Cache()
+        assert "foo'bar" not in c
+
+    def test_getitem(self):
+        c = Cache()
+        with raises(KeyError):
+            c["foo'bar"]
+
+    def test_setitem(self):
+        c = Cache()
+        c["foo'bar"] = []
+
+    def test_delitem(self):
+        c = Cache()
+        c["foo'bar"] = []
+        try:
+            del c["foo'bar"]
+        except KeyError:
+            assert False
+
--- a/core/tests/scanner_test.py
+++ b/core/tests/scanner_test.py
@@ -12,6 +12,7 @@ from .. import fs
 from ..engine import getwords, Match
 from ..ignore import IgnoreList
 from ..scanner import Scanner, ScanType
+from ..me.scanner import ScannerME

 class NamedObject:
    def __init__(self, name="foobar", size=1, path=None):
@@ -528,3 +529,13 @@ def test_dont_count_ref_files_as_discarded(fake_fileexists):
    o2.is_ref = True
    eq_(len(s.get_dupe_groups([o1, o2, o3])), 1)
    eq_(s.discarded_file_count, 0)
+
+def test_priorize_me(fake_fileexists):
+    # in ScannerME, bitrate goes first (right after is_ref) in priorization
+    s = ScannerME()
+    o1, o2 = no('foo', path='p1'), no('foo', path='p2')
+    o1.bitrate = 1
+    o2.bitrate = 2
+    [group] = s.get_dupe_groups([o1, o2])
+    assert group.ref is o2
+