1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2026-01-22 06:37:17 +00:00

Merge core_{se,me,pe} into core.{se,me,pe}

This commit is contained in:
Virgil Dupras
2016-05-31 22:32:37 -04:00
parent d4919054f9
commit a65077f871
39 changed files with 55 additions and 75 deletions

312
core/tests/block_test.py Normal file
View File

@@ -0,0 +1,312 @@
# Copyright 2016 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.gnu.org/licenses/gpl-3.0.html
# The commented out tests are tests for function that have been converted to pure C for speed
from pytest import raises, skip
from hscommon.testutil import eq_
try:
from ..pe.block import avgdiff, getblocks2, NoBlocksError, DifferentBlockCountError
except ImportError:
skip("Can't import the block module, probably hasn't been compiled.")
def my_avgdiff(first, second, limit=768, min_iter=3): # this is so I don't have to re-write every call
return avgdiff(first, second, limit, min_iter)
BLACK = (0, 0, 0)
RED = (0xff, 0, 0)
GREEN = (0, 0xff, 0)
BLUE = (0, 0, 0xff)
class FakeImage:
def __init__(self, size, data):
self.size = size
self.data = data
def getdata(self):
return self.data
def crop(self, box):
pixels = []
for i in range(box[1], box[3]):
for j in range(box[0], box[2]):
pixel = self.data[i * self.size[0] + j]
pixels.append(pixel)
return FakeImage((box[2] - box[0], box[3] - box[1]), pixels)
def empty():
return FakeImage((0, 0), [])
def single_pixel(): #one red pixel
return FakeImage((1, 1), [(0xff, 0, 0)])
def four_pixels():
pixels = [RED, (0, 0x80, 0xff), (0x80, 0, 0), (0, 0x40, 0x80)]
return FakeImage((2, 2), pixels)
class TestCasegetblock:
def test_single_pixel(self):
im = single_pixel()
[b] = getblocks2(im, 1)
eq_(RED, b)
def test_no_pixel(self):
im = empty()
eq_([], getblocks2(im, 1))
def test_four_pixels(self):
im = four_pixels()
[b] = getblocks2(im, 1)
meanred = (0xff + 0x80) // 4
meangreen = (0x80 + 0x40) // 4
meanblue = (0xff + 0x80) // 4
eq_((meanred, meangreen, meanblue), b)
# class TCdiff(unittest.TestCase):
# def test_diff(self):
# b1 = (10, 20, 30)
# b2 = (1, 2, 3)
# eq_(9 + 18 + 27, diff(b1, b2))
#
# def test_diff_negative(self):
# b1 = (10, 20, 30)
# b2 = (1, 2, 3)
# eq_(9 + 18 + 27, diff(b2, b1))
#
# def test_diff_mixed_positive_and_negative(self):
# b1 = (1, 5, 10)
# b2 = (10, 1, 15)
# eq_(9 + 4 + 5, diff(b1, b2))
#
# class TCgetblocks(unittest.TestCase):
# def test_empty_image(self):
# im = empty()
# blocks = getblocks(im, 1)
# eq_(0, len(blocks))
#
# def test_one_block_image(self):
# im = four_pixels()
# blocks = getblocks2(im, 1)
# eq_(1, len(blocks))
# block = blocks[0]
# meanred = (0xff + 0x80) // 4
# meangreen = (0x80 + 0x40) // 4
# meanblue = (0xff + 0x80) // 4
# eq_((meanred, meangreen, meanblue), block)
#
# def test_not_enough_height_to_fit_a_block(self):
# im = FakeImage((2, 1), [BLACK, BLACK])
# blocks = getblocks(im, 2)
# eq_(0, len(blocks))
#
# def xtest_dont_include_leftovers(self):
# # this test is disabled because getblocks is not used and getblock in cdeffed
# pixels = [
# RED,(0, 0x80, 0xff), BLACK,
# (0x80, 0, 0),(0, 0x40, 0x80), BLACK,
# BLACK, BLACK, BLACK
# ]
# im = FakeImage((3, 3), pixels)
# blocks = getblocks(im, 2)
# block = blocks[0]
# #Because the block is smaller than the image, only blocksize must be considered.
# meanred = (0xff + 0x80) // 4
# meangreen = (0x80 + 0x40) // 4
# meanblue = (0xff + 0x80) // 4
# eq_((meanred, meangreen, meanblue), block)
#
# def xtest_two_blocks(self):
# # this test is disabled because getblocks is not used and getblock in cdeffed
# pixels = [BLACK for i in xrange(4 * 2)]
# pixels[0] = RED
# pixels[1] = (0, 0x80, 0xff)
# pixels[4] = (0x80, 0, 0)
# pixels[5] = (0, 0x40, 0x80)
# im = FakeImage((4, 2), pixels)
# blocks = getblocks(im, 2)
# eq_(2, len(blocks))
# block = blocks[0]
# #Because the block is smaller than the image, only blocksize must be considered.
# meanred = (0xff + 0x80) // 4
# meangreen = (0x80 + 0x40) // 4
# meanblue = (0xff + 0x80) // 4
# eq_((meanred, meangreen, meanblue), block)
# eq_(BLACK, blocks[1])
#
# def test_four_blocks(self):
# pixels = [BLACK for i in xrange(4 * 4)]
# pixels[0] = RED
# pixels[1] = (0, 0x80, 0xff)
# pixels[4] = (0x80, 0, 0)
# pixels[5] = (0, 0x40, 0x80)
# im = FakeImage((4, 4), pixels)
# blocks = getblocks2(im, 2)
# eq_(4, len(blocks))
# block = blocks[0]
# #Because the block is smaller than the image, only blocksize must be considered.
# meanred = (0xff + 0x80) // 4
# meangreen = (0x80 + 0x40) // 4
# meanblue = (0xff + 0x80) // 4
# eq_((meanred, meangreen, meanblue), block)
# eq_(BLACK, blocks[1])
# eq_(BLACK, blocks[2])
# eq_(BLACK, blocks[3])
#
class TestCasegetblocks2:
def test_empty_image(self):
im = empty()
blocks = getblocks2(im, 1)
eq_(0, len(blocks))
def test_one_block_image(self):
im = four_pixels()
blocks = getblocks2(im, 1)
eq_(1, len(blocks))
block = blocks[0]
meanred = (0xff + 0x80) // 4
meangreen = (0x80 + 0x40) // 4
meanblue = (0xff + 0x80) // 4
eq_((meanred, meangreen, meanblue), block)
def test_four_blocks_all_black(self):
im = FakeImage((2, 2), [BLACK, BLACK, BLACK, BLACK])
blocks = getblocks2(im, 2)
eq_(4, len(blocks))
for block in blocks:
eq_(BLACK, block)
def test_two_pixels_image_horizontal(self):
pixels = [RED, BLUE]
im = FakeImage((2, 1), pixels)
blocks = getblocks2(im, 2)
eq_(4, len(blocks))
eq_(RED, blocks[0])
eq_(BLUE, blocks[1])
eq_(RED, blocks[2])
eq_(BLUE, blocks[3])
def test_two_pixels_image_vertical(self):
pixels = [RED, BLUE]
im = FakeImage((1, 2), pixels)
blocks = getblocks2(im, 2)
eq_(4, len(blocks))
eq_(RED, blocks[0])
eq_(RED, blocks[1])
eq_(BLUE, blocks[2])
eq_(BLUE, blocks[3])
class TestCaseavgdiff:
def test_empty(self):
with raises(NoBlocksError):
my_avgdiff([], [])
def test_two_blocks(self):
b1 = (5, 10, 15)
b2 = (255, 250, 245)
b3 = (0, 0, 0)
b4 = (255, 0, 255)
blocks1 = [b1, b2]
blocks2 = [b3, b4]
expected1 = 5 + 10 + 15
expected2 = 0 + 250 + 10
expected = (expected1 + expected2) // 2
eq_(expected, my_avgdiff(blocks1, blocks2))
def test_blocks_not_the_same_size(self):
b = (0, 0, 0)
with raises(DifferentBlockCountError):
my_avgdiff([b, b], [b])
def test_first_arg_is_empty_but_not_second(self):
#Don't return 0 (as when the 2 lists are empty), raise!
b = (0, 0, 0)
with raises(DifferentBlockCountError):
my_avgdiff([], [b])
def test_limit(self):
ref = (0, 0, 0)
b1 = (10, 10, 10) #avg 30
b2 = (20, 20, 20) #avg 45
b3 = (30, 30, 30) #avg 60
blocks1 = [ref, ref, ref]
blocks2 = [b1, b2, b3]
eq_(45, my_avgdiff(blocks1, blocks2, 44))
def test_min_iterations(self):
ref = (0, 0, 0)
b1 = (10, 10, 10) #avg 30
b2 = (20, 20, 20) #avg 45
b3 = (10, 10, 10) #avg 40
blocks1 = [ref, ref, ref]
blocks2 = [b1, b2, b3]
eq_(40, my_avgdiff(blocks1, blocks2, 45 - 1, 3))
# Bah, I don't know why this test fails, but I don't think it matters very much
# def test_just_over_the_limit(self):
# #A score just over the limit might return exactly the limit due to truncating. We should
# #ceil() the result in this case.
# ref = (0, 0, 0)
# b1 = (10, 0, 0)
# b2 = (11, 0, 0)
# blocks1 = [ref, ref]
# blocks2 = [b1, b2]
# eq_(11, my_avgdiff(blocks1, blocks2, 10))
#
def test_return_at_least_1_at_the_slightest_difference(self):
ref = (0, 0, 0)
b1 = (1, 0, 0)
blocks1 = [ref for i in range(250)]
blocks2 = [ref for i in range(250)]
blocks2[0] = b1
eq_(1, my_avgdiff(blocks1, blocks2))
def test_return_0_if_there_is_no_difference(self):
ref = (0, 0, 0)
blocks1 = [ref, ref]
blocks2 = [ref, ref]
eq_(0, my_avgdiff(blocks1, blocks2))
# class TCmaxdiff(unittest.TestCase):
# def test_empty(self):
# self.assertRaises(NoBlocksError, maxdiff,[],[])
#
# def test_two_blocks(self):
# b1 = (5, 10, 15)
# b2 = (255, 250, 245)
# b3 = (0, 0, 0)
# b4 = (255, 0, 255)
# blocks1 = [b1, b2]
# blocks2 = [b3, b4]
# expected1 = 5 + 10 + 15
# expected2 = 0 + 250 + 10
# expected = max(expected1, expected2)
# eq_(expected, maxdiff(blocks1, blocks2))
#
# def test_blocks_not_the_same_size(self):
# b = (0, 0, 0)
# self.assertRaises(DifferentBlockCountError, maxdiff,[b, b],[b])
#
# def test_first_arg_is_empty_but_not_second(self):
# #Don't return 0 (as when the 2 lists are empty), raise!
# b = (0, 0, 0)
# self.assertRaises(DifferentBlockCountError, maxdiff,[],[b])
#
# def test_limit(self):
# b1 = (5, 10, 15)
# b2 = (255, 250, 245)
# b3 = (0, 0, 0)
# b4 = (255, 0, 255)
# blocks1 = [b1, b2]
# blocks2 = [b3, b4]
# expected1 = 5 + 10 + 15
# expected2 = 0 + 250 + 10
# eq_(expected1, maxdiff(blocks1, blocks2, expected1 - 1))
#

143
core/tests/cache_test.py Normal file
View File

@@ -0,0 +1,143 @@
# Copyright 2016 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.gnu.org/licenses/gpl-3.0.html
import logging
from pytest import raises, skip
from hscommon.testutil import eq_
try:
from ..pe.cache import Cache, colors_to_string, string_to_colors
except ImportError:
skip("Can't import the cache module, probably hasn't been compiled.")
class TestCasecolors_to_string:
def test_no_color(self):
eq_('', colors_to_string([]))
def test_single_color(self):
eq_('000000', colors_to_string([(0, 0, 0)]))
eq_('010101', colors_to_string([(1, 1, 1)]))
eq_('0a141e', colors_to_string([(10, 20, 30)]))
def test_two_colors(self):
eq_('000102030405', colors_to_string([(0, 1, 2), (3, 4, 5)]))
class TestCasestring_to_colors:
def test_empty(self):
eq_([], string_to_colors(''))
def test_single_color(self):
eq_([(0, 0, 0)], string_to_colors('000000'))
eq_([(2, 3, 4)], string_to_colors('020304'))
eq_([(10, 20, 30)], string_to_colors('0a141e'))
def test_two_colors(self):
eq_([(10, 20, 30), (40, 50, 60)], string_to_colors('0a141e28323c'))
def test_incomplete_color(self):
# don't return anything if it's not a complete color
eq_([], string_to_colors('102'))
class TestCaseCache:
def test_empty(self):
c = Cache()
eq_(0, len(c))
with raises(KeyError):
c['foo']
def test_set_then_retrieve_blocks(self):
c = Cache()
b = [(0, 0, 0), (1, 2, 3)]
c['foo'] = b
eq_(b, c['foo'])
def test_delitem(self):
c = Cache()
c['foo'] = ''
del c['foo']
assert 'foo' not in c
with raises(KeyError):
del c['foo']
def test_persistance(self, tmpdir):
DBNAME = tmpdir.join('hstest.db')
c = Cache(str(DBNAME))
c['foo'] = [(1, 2, 3)]
del c
c = Cache(str(DBNAME))
eq_([(1, 2, 3)], c['foo'])
def test_filter(self):
c = Cache()
c['foo'] = ''
c['bar'] = ''
c['baz'] = ''
c.filter(lambda p: p != 'bar') #only 'bar' is removed
eq_(2, len(c))
assert 'foo' in c
assert 'baz' in c
assert 'bar' not in c
def test_clear(self):
c = Cache()
c['foo'] = ''
c['bar'] = ''
c['baz'] = ''
c.clear()
eq_(0, len(c))
assert 'foo' not in c
assert 'baz' not in c
assert 'bar' not in c
def test_corrupted_db(self, tmpdir, monkeypatch):
# If we don't do this monkeypatching, we get a weird exception about trying to flush a
# closed file. I've tried setting logging level and stuff, but nothing worked. So, there we
# go, a dirty monkeypatch.
monkeypatch.setattr(logging, 'warning', lambda *args, **kw: None)
dbname = str(tmpdir.join('foo.db'))
fp = open(dbname, 'w')
fp.write('invalid sqlite content')
fp.close()
c = Cache(dbname) # should not raise a DatabaseError
c['foo'] = [(1, 2, 3)]
del c
c = Cache(dbname)
eq_(c['foo'], [(1, 2, 3)])
def test_by_id(self):
# it's possible to use the cache by referring to the files by their row_id
c = Cache()
b = [(0, 0, 0), (1, 2, 3)]
c['foo'] = b
foo_id = c.get_id('foo')
eq_(c[foo_id], b)
class TestCaseCacheSQLEscape:
def test_contains(self):
c = Cache()
assert "foo'bar" not in c
def test_getitem(self):
c = Cache()
with raises(KeyError):
c["foo'bar"]
def test_setitem(self):
c = Cache()
c["foo'bar"] = []
def test_delitem(self):
c = Cache()
c["foo'bar"] = []
try:
del c["foo'bar"]
except KeyError:
assert False

View File

@@ -12,6 +12,7 @@ from .. import fs
from ..engine import getwords, Match
from ..ignore import IgnoreList
from ..scanner import Scanner, ScanType
from ..me.scanner import ScannerME
class NamedObject:
def __init__(self, name="foobar", size=1, path=None):
@@ -528,3 +529,13 @@ def test_dont_count_ref_files_as_discarded(fake_fileexists):
o2.is_ref = True
eq_(len(s.get_dupe_groups([o1, o2, o3])), 1)
eq_(s.discarded_file_count, 0)
def test_priorize_me(fake_fileexists):
# in ScannerME, bitrate goes first (right after is_ref) in priorization
s = ScannerME()
o1, o2 = no('foo', path='p1'), no('foo', path='p2')
o1.bitrate = 1
o2.bitrate = 2
[group] = s.get_dupe_groups([o1, o2])
assert group.ref is o2