1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2026-01-23 07:01:39 +00:00

Moved the tests to a "tests" subfolder, and "un-unittest"'ed some of them.

--HG--
extra : convert_revision : svn%3Ac306627e-7827-47d3-bdf0-9a457c9553a1/trunk%4025
This commit is contained in:
hsoft
2009-06-07 14:26:46 +00:00
parent 9c1473b877
commit 1f72e2a6be
12 changed files with 226 additions and 302 deletions

0
py/tests/__init__.py Normal file
View File

316
py/tests/app_cocoa_test.py Normal file
View File

@@ -0,0 +1,316 @@
# Unit Name: dupeguru.tests.app_cocoa_test
# Created By: Virgil Dupras
# Created On: 2006/11/11
# $Id$
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
import tempfile
import shutil
import logging
from hsutil.path import Path
from hsutil.testcase import TestCase
from hsutil.decorators import log_calls
import hsfs.phys
import os.path as op
from .results_test import GetTestGroups
from .. import engine, data
try:
from ..app_cocoa import DupeGuru as DupeGuruBase, DGDirectory
except ImportError:
from nose.plugins.skip import SkipTest
raise SkipTest("These tests can only be run on OS X")
class DupeGuru(DupeGuruBase):
def __init__(self):
DupeGuruBase.__init__(self, data, '/tmp', appid=4)
def _start_job(self, jobid, func):
func(nulljob)
def r2np(rows):
#Transforms a list of rows [1,2,3] into a list of node paths [[1],[2],[3]]
return [[i] for i in rows]
class TCDupeGuru(TestCase):
def setUp(self):
self.app = DupeGuru()
self.objects,self.matches,self.groups = GetTestGroups()
self.app.results.groups = self.groups
def test_GetObjects(self):
app = self.app
objects = self.objects
groups = self.groups
g,d = app.GetObjects([0])
self.assert_(g is groups[0])
self.assert_(d is None)
g,d = app.GetObjects([0,0])
self.assert_(g is groups[0])
self.assert_(d is objects[1])
g,d = app.GetObjects([1,0])
self.assert_(g is groups[1])
self.assert_(d is objects[4])
def test_GetObjects_after_sort(self):
app = self.app
objects = self.objects
groups = self.groups[:] #To keep the old order in memory
app.sort_groups(0,False) #0 = Filename
#Now, the group order is supposed to be reversed
g,d = app.GetObjects([0,0])
self.assert_(g is groups[1])
self.assert_(d is objects[4])
def test_GetObjects_out_of_range(self):
app = self.app
self.assertEqual((None,None),app.GetObjects([2]))
self.assertEqual((None,None),app.GetObjects([]))
self.assertEqual((None,None),app.GetObjects([1,2]))
def test_selectResultNodePaths(self):
app = self.app
objects = self.objects
app.SelectResultNodePaths([[0,0],[0,1]])
self.assertEqual(2,len(app.selected_dupes))
self.assert_(app.selected_dupes[0] is objects[1])
self.assert_(app.selected_dupes[1] is objects[2])
def test_selectResultNodePaths_with_ref(self):
app = self.app
objects = self.objects
app.SelectResultNodePaths([[0,0],[0,1],[1]])
self.assertEqual(3,len(app.selected_dupes))
self.assert_(app.selected_dupes[0] is objects[1])
self.assert_(app.selected_dupes[1] is objects[2])
self.assert_(app.selected_dupes[2] is self.groups[1].ref)
def test_selectResultNodePaths_empty(self):
self.app.SelectResultNodePaths([])
self.assertEqual(0,len(self.app.selected_dupes))
def test_selectResultNodePaths_after_sort(self):
app = self.app
objects = self.objects
groups = self.groups[:] #To keep the old order in memory
app.sort_groups(0,False) #0 = Filename
#Now, the group order is supposed to be reversed
app.SelectResultNodePaths([[0,0],[1],[1,0]])
self.assertEqual(3,len(app.selected_dupes))
self.assert_(app.selected_dupes[0] is objects[4])
self.assert_(app.selected_dupes[1] is groups[0].ref)
self.assert_(app.selected_dupes[2] is objects[1])
def test_selectResultNodePaths_out_of_range(self):
app = self.app
app.SelectResultNodePaths([[0,0],[0,1],[1],[1,1],[2]])
self.assertEqual(3,len(app.selected_dupes))
def test_selectPowerMarkerRows(self):
app = self.app
objects = self.objects
app.SelectPowerMarkerNodePaths(r2np([0,1,2]))
self.assertEqual(3,len(app.selected_dupes))
self.assert_(app.selected_dupes[0] is objects[1])
self.assert_(app.selected_dupes[1] is objects[2])
self.assert_(app.selected_dupes[2] is objects[4])
def test_selectPowerMarkerRows_empty(self):
self.app.SelectPowerMarkerNodePaths([])
self.assertEqual(0,len(self.app.selected_dupes))
def test_selectPowerMarkerRows_after_sort(self):
app = self.app
objects = self.objects
app.sort_dupes(0,False) #0 = Filename
app.SelectPowerMarkerNodePaths(r2np([0,1,2]))
self.assertEqual(3,len(app.selected_dupes))
self.assert_(app.selected_dupes[0] is objects[4])
self.assert_(app.selected_dupes[1] is objects[2])
self.assert_(app.selected_dupes[2] is objects[1])
def test_selectPowerMarkerRows_out_of_range(self):
app = self.app
app.SelectPowerMarkerNodePaths(r2np([0,1,2,3]))
self.assertEqual(3,len(app.selected_dupes))
def test_toggleSelectedMark(self):
app = self.app
objects = self.objects
app.ToggleSelectedMarkState()
self.assertEqual(0,app.results.mark_count)
app.SelectPowerMarkerNodePaths(r2np([0,2]))
app.ToggleSelectedMarkState()
self.assertEqual(2,app.results.mark_count)
self.assert_(not app.results.is_marked(objects[0]))
self.assert_(app.results.is_marked(objects[1]))
self.assert_(not app.results.is_marked(objects[2]))
self.assert_(not app.results.is_marked(objects[3]))
self.assert_(app.results.is_marked(objects[4]))
def test_refreshDetailsWithSelected(self):
def mock_refresh(dupe,group):
self.called = True
if self.app.selected_dupes:
self.assert_(dupe is self.app.selected_dupes[0])
self.assert_(group is self.app.results.get_group_of_duplicate(dupe))
else:
self.assert_(dupe is None)
self.assert_(group is None)
self.app.RefreshDetailsTable = mock_refresh
self.called = False
self.app.SelectPowerMarkerNodePaths(r2np([0,2]))
self.app.RefreshDetailsWithSelected()
self.assert_(self.called)
self.called = False
self.app.SelectPowerMarkerNodePaths([])
self.app.RefreshDetailsWithSelected()
self.assert_(self.called)
def test_makeSelectedReference(self):
app = self.app
objects = self.objects
groups = self.groups
app.SelectPowerMarkerNodePaths(r2np([0,2]))
app.MakeSelectedReference()
self.assert_(groups[0].ref is objects[1])
self.assert_(groups[1].ref is objects[4])
def test_makeSelectedReference_by_selecting_two_dupes_in_the_same_group(self):
app = self.app
objects = self.objects
groups = self.groups
app.SelectPowerMarkerNodePaths(r2np([0,1,2]))
#Only 0 and 2 must go ref, not 1 because it is a part of the same group
app.MakeSelectedReference()
self.assert_(groups[0].ref is objects[1])
self.assert_(groups[1].ref is objects[4])
def test_removeSelected(self):
app = self.app
app.SelectPowerMarkerNodePaths(r2np([0,2]))
app.RemoveSelected()
self.assertEqual(1,len(app.results.dupes))
app.RemoveSelected()
self.assertEqual(1,len(app.results.dupes))
app.SelectPowerMarkerNodePaths(r2np([0,2]))
app.RemoveSelected()
self.assertEqual(0,len(app.results.dupes))
def test_addDirectory_simple(self):
app = self.app
self.assertEqual(0,app.add_directory(self.datadirpath()))
self.assertEqual(1,len(app.directories))
def test_addDirectory_already_there(self):
app = self.app
self.assertEqual(0,app.add_directory(self.datadirpath()))
self.assertEqual(1,app.add_directory(self.datadirpath()))
def test_addDirectory_does_not_exist(self):
app = self.app
self.assertEqual(2,app.add_directory('/does_not_exist'))
def test_ignore(self):
app = self.app
app.SelectPowerMarkerNodePaths(r2np([2])) #The dupe of the second, 2 sized group
app.AddSelectedToIgnoreList()
self.assertEqual(1,len(app.scanner.ignore_list))
app.SelectPowerMarkerNodePaths(r2np([0])) #first dupe of the 3 dupes group
app.AddSelectedToIgnoreList()
#BOTH the ref and the other dupe should have been added
self.assertEqual(3,len(app.scanner.ignore_list))
def test_purgeIgnoreList(self):
app = self.app
p1 = self.filepath('zerofile')
p2 = self.filepath('zerofill')
dne = '/does_not_exist'
app.scanner.ignore_list.Ignore(dne,p1)
app.scanner.ignore_list.Ignore(p2,dne)
app.scanner.ignore_list.Ignore(p1,p2)
app.PurgeIgnoreList()
self.assertEqual(1,len(app.scanner.ignore_list))
self.assert_(app.scanner.ignore_list.AreIgnored(p1,p2))
self.assert_(not app.scanner.ignore_list.AreIgnored(dne,p1))
def test_only_unicode_is_added_to_ignore_list(self):
def FakeIgnore(first,second):
if not isinstance(first,unicode):
self.fail()
if not isinstance(second,unicode):
self.fail()
app = self.app
app.scanner.ignore_list.Ignore = FakeIgnore
app.SelectPowerMarkerNodePaths(r2np([2])) #The dupe of the second, 2 sized group
app.AddSelectedToIgnoreList()
def test_dirclass(self):
self.assert_(self.app.directories.dirclass is DGDirectory)
class TCDupeGuru_renameSelected(TestCase):
def setUp(self):
p = Path(tempfile.mkdtemp())
fp = open(str(p + 'foo bar 1'),mode='w')
fp.close()
fp = open(str(p + 'foo bar 2'),mode='w')
fp.close()
fp = open(str(p + 'foo bar 3'),mode='w')
fp.close()
refdir = hsfs.phys.Directory(None,str(p))
matches = engine.MatchFactory().getmatches(refdir.files)
groups = engine.get_groups(matches)
g = groups[0]
g.prioritize(lambda x:x.name)
app = DupeGuru()
app.results.groups = groups
self.app = app
self.groups = groups
self.p = p
self.refdir = refdir
def tearDown(self):
shutil.rmtree(str(self.p))
def test_simple(self):
app = self.app
refdir = self.refdir
g = self.groups[0]
app.SelectPowerMarkerNodePaths(r2np([0]))
self.assert_(app.RenameSelected('renamed'))
self.assert_('renamed' in refdir)
self.assert_('foo bar 2' not in refdir)
self.assert_(g.dupes[0] is refdir['renamed'])
self.assert_(g.dupes[0] in refdir)
def test_none_selected(self):
app = self.app
refdir = self.refdir
g = self.groups[0]
app.SelectPowerMarkerNodePaths([])
self.mock(logging, 'warning', log_calls(lambda msg: None))
self.assert_(not app.RenameSelected('renamed'))
msg = logging.warning.calls[0]['msg']
self.assertEqual('dupeGuru Warning: list index out of range', msg)
self.assert_('renamed' not in refdir)
self.assert_('foo bar 2' in refdir)
self.assert_(g.dupes[0] is refdir['foo bar 2'])
def test_name_already_exists(self):
app = self.app
refdir = self.refdir
g = self.groups[0]
app.SelectPowerMarkerNodePaths(r2np([0]))
self.mock(logging, 'warning', log_calls(lambda msg: None))
self.assert_(not app.RenameSelected('foo bar 1'))
msg = logging.warning.calls[0]['msg']
self.assert_(msg.startswith('dupeGuru Warning: \'foo bar 2\' already exists in'))
self.assert_('foo bar 1' in refdir)
self.assert_('foo bar 2' in refdir)
self.assert_(g.dupes[0] is refdir['foo bar 2'])

129
py/tests/app_test.py Normal file
View File

@@ -0,0 +1,129 @@
# Unit Name: dupeguru.tests.app_test
# Created By: Virgil Dupras
# Created On: 2007-06-23
# $Id$
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
import os
from hsutil.testcase import TestCase
from hsutil import io
from hsutil.path import Path
from hsutil.decorators import log_calls
import hsfs as fs
import hsfs.phys
import hsutil.files
from hsutil.job import nulljob
from .. import data, app
from ..app import DupeGuru as DupeGuruBase
class DupeGuru(DupeGuruBase):
def __init__(self):
DupeGuruBase.__init__(self, data, '/tmp', appid=4)
def _start_job(self, jobid, func):
func(nulljob)
class TCDupeGuru(TestCase):
cls_tested_module = app
def test_apply_filter_calls_results_apply_filter(self):
app = DupeGuru()
self.mock(app.results, 'apply_filter', log_calls(app.results.apply_filter))
app.apply_filter('foo')
self.assertEqual(2, len(app.results.apply_filter.calls))
call = app.results.apply_filter.calls[0]
self.assert_(call['filter_str'] is None)
call = app.results.apply_filter.calls[1]
self.assertEqual('foo', call['filter_str'])
def test_apply_filter_escapes_regexp(self):
app = DupeGuru()
self.mock(app.results, 'apply_filter', log_calls(app.results.apply_filter))
app.apply_filter('()[]\\.|+?^abc')
call = app.results.apply_filter.calls[1]
self.assertEqual('\\(\\)\\[\\]\\\\\\.\\|\\+\\?\\^abc', call['filter_str'])
app.apply_filter('(*)') # In "simple mode", we want the * to behave as a wilcard
call = app.results.apply_filter.calls[3]
self.assertEqual('\(.*\)', call['filter_str'])
app.options['escape_filter_regexp'] = False
app.apply_filter('(abc)')
call = app.results.apply_filter.calls[5]
self.assertEqual('(abc)', call['filter_str'])
def test_copy_or_move(self):
# The goal here is just to have a test for a previous blowup I had. I know my test coverage
# for this unit is pathetic. What's done is done. My approach now is to add tests for
# every change I want to make. The blowup was caused by a missing import.
dupe_parent = fs.Directory(None, 'foo')
dupe = fs.File(dupe_parent, 'bar')
dupe.copy = log_calls(lambda dest, newname: None)
self.mock(hsutil.files, 'copy', log_calls(lambda source_path, dest_path: None))
self.mock(os, 'makedirs', lambda path: None) # We don't want the test to create that fake directory
self.mock(fs.phys, 'Directory', fs.Directory) # We don't want an error because makedirs didn't work
app = DupeGuru()
app.copy_or_move(dupe, True, 'some_destination', 0)
self.assertEqual(1, len(hsutil.files.copy.calls))
call = hsutil.files.copy.calls[0]
self.assertEqual('some_destination', call['dest_path'])
self.assertEqual(dupe.path, call['source_path'])
def test_copy_or_move_clean_empty_dirs(self):
tmppath = Path(self.tmpdir())
sourcepath = tmppath + 'source'
io.mkdir(sourcepath)
io.open(sourcepath + 'myfile', 'w')
tmpdir = hsfs.phys.Directory(None, unicode(tmppath))
myfile = tmpdir['source']['myfile']
app = DupeGuru()
self.mock(app, 'clean_empty_dirs', log_calls(lambda path: None))
app.copy_or_move(myfile, False, tmppath + 'dest', 0)
calls = app.clean_empty_dirs.calls
self.assertEqual(1, len(calls))
self.assertEqual(sourcepath, calls[0]['path'])
def test_Scan_with_objects_evaluating_to_false(self):
# At some point, any() was used in a wrong way that made Scan() wrongly return 1
app = DupeGuru()
f1, f2 = [fs.File(None, 'foo') for i in range(2)]
f1.is_ref, f2.is_ref = (False, False)
assert not (bool(f1) and bool(f2))
app.directories.get_files = lambda: [f1, f2]
app.directories._dirs.append('this is just so Scan() doesnt return 3')
app.start_scanning() # no exception
class TCDupeGuru_clean_empty_dirs(TestCase):
cls_tested_module = app
def setUp(self):
self.mock(hsutil.files, 'delete_if_empty', log_calls(lambda path, files_to_delete=[]: None))
self.app = DupeGuru()
def test_option_off(self):
self.app.clean_empty_dirs(Path('/foo/bar'))
self.assertEqual(0, len(hsutil.files.delete_if_empty.calls))
def test_option_on(self):
self.app.options['clean_empty_dirs'] = True
self.app.clean_empty_dirs(Path('/foo/bar'))
calls = hsutil.files.delete_if_empty.calls
self.assertEqual(1, len(calls))
self.assertEqual(Path('/foo/bar'), calls[0]['path'])
self.assertEqual(['.DS_Store'], calls[0]['files_to_delete'])
def test_recurse_up(self):
# delete_if_empty must be recursively called up in the path until it returns False
@log_calls
def mock_delete_if_empty(path, files_to_delete=[]):
return len(path) > 1
self.mock(hsutil.files, 'delete_if_empty', mock_delete_if_empty)
self.app.options['clean_empty_dirs'] = True
self.app.clean_empty_dirs(Path('not-empty/empty/empty'))
calls = hsutil.files.delete_if_empty.calls
self.assertEqual(3, len(calls))
self.assertEqual(Path('not-empty/empty/empty'), calls[0]['path'])
self.assertEqual(Path('not-empty/empty'), calls[1]['path'])
self.assertEqual(Path('not-empty'), calls[2]['path'])

306
py/tests/block_test.py Normal file
View File

@@ -0,0 +1,306 @@
# Unit Name: tests.picture.block
# Created By: Virgil Dupras
# Created On: 2006/09/01
# $Id$
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
# The commented out tests are tests for function that have been converted to pure C for speed
import unittest
from ..picture.block import *
def my_avgdiff(first, second, limit=768, min_iter=3): # this is so I don't have to re-write every call
return avgdiff(first, second, limit, min_iter)
BLACK = (0,0,0)
RED = (0xff,0,0)
GREEN = (0,0xff,0)
BLUE = (0,0,0xff)
class FakeImage(object):
def __init__(self, size, data):
self.size = size
self.data = data
def getdata(self):
return self.data
def crop(self, box):
pixels = []
for i in range(box[1], box[3]):
for j in range(box[0], box[2]):
pixel = self.data[i * self.size[0] + j]
pixels.append(pixel)
return FakeImage((box[2] - box[0], box[3] - box[1]), pixels)
def empty():
return FakeImage((0,0), [])
def single_pixel(): #one red pixel
return FakeImage((1, 1), [(0xff,0,0)])
def four_pixels():
pixels = [RED,(0,0x80,0xff),(0x80,0,0),(0,0x40,0x80)]
return FakeImage((2, 2), pixels)
class TCgetblock(unittest.TestCase):
def test_single_pixel(self):
im = single_pixel()
[b] = getblocks2(im, 1)
self.assertEqual(RED,b)
def test_no_pixel(self):
im = empty()
self.assertEqual([], getblocks2(im, 1))
def test_four_pixels(self):
im = four_pixels()
[b] = getblocks2(im, 1)
meanred = (0xff + 0x80) // 4
meangreen = (0x80 + 0x40) // 4
meanblue = (0xff + 0x80) // 4
self.assertEqual((meanred,meangreen,meanblue),b)
# class TCdiff(unittest.TestCase):
# def test_diff(self):
# b1 = (10, 20, 30)
# b2 = (1, 2, 3)
# self.assertEqual(9 + 18 + 27,diff(b1,b2))
#
# def test_diff_negative(self):
# b1 = (10, 20, 30)
# b2 = (1, 2, 3)
# self.assertEqual(9 + 18 + 27,diff(b2,b1))
#
# def test_diff_mixed_positive_and_negative(self):
# b1 = (1, 5, 10)
# b2 = (10, 1, 15)
# self.assertEqual(9 + 4 + 5,diff(b1,b2))
#
# class TCgetblocks(unittest.TestCase):
# def test_empty_image(self):
# im = empty()
# blocks = getblocks(im,1)
# self.assertEqual(0,len(blocks))
#
# def test_one_block_image(self):
# im = four_pixels()
# blocks = getblocks2(im, 1)
# self.assertEqual(1,len(blocks))
# block = blocks[0]
# meanred = (0xff + 0x80) // 4
# meangreen = (0x80 + 0x40) // 4
# meanblue = (0xff + 0x80) // 4
# self.assertEqual((meanred,meangreen,meanblue),block)
#
# def test_not_enough_height_to_fit_a_block(self):
# im = FakeImage((2,1), [BLACK, BLACK])
# blocks = getblocks(im,2)
# self.assertEqual(0,len(blocks))
#
# def xtest_dont_include_leftovers(self):
# # this test is disabled because getblocks is not used and getblock in cdeffed
# pixels = [
# RED,(0,0x80,0xff),BLACK,
# (0x80,0,0),(0,0x40,0x80),BLACK,
# BLACK,BLACK,BLACK
# ]
# im = FakeImage((3,3), pixels)
# blocks = getblocks(im,2)
# block = blocks[0]
# #Because the block is smaller than the image, only blocksize must be considered.
# meanred = (0xff + 0x80) // 4
# meangreen = (0x80 + 0x40) // 4
# meanblue = (0xff + 0x80) // 4
# self.assertEqual((meanred,meangreen,meanblue),block)
#
# def xtest_two_blocks(self):
# # this test is disabled because getblocks is not used and getblock in cdeffed
# pixels = [BLACK for i in xrange(4 * 2)]
# pixels[0] = RED
# pixels[1] = (0,0x80,0xff)
# pixels[4] = (0x80,0,0)
# pixels[5] = (0,0x40,0x80)
# im = FakeImage((4, 2), pixels)
# blocks = getblocks(im,2)
# self.assertEqual(2,len(blocks))
# block = blocks[0]
# #Because the block is smaller than the image, only blocksize must be considered.
# meanred = (0xff + 0x80) // 4
# meangreen = (0x80 + 0x40) // 4
# meanblue = (0xff + 0x80) // 4
# self.assertEqual((meanred,meangreen,meanblue),block)
# self.assertEqual(BLACK,blocks[1])
#
# def test_four_blocks(self):
# pixels = [BLACK for i in xrange(4 * 4)]
# pixels[0] = RED
# pixels[1] = (0,0x80,0xff)
# pixels[4] = (0x80,0,0)
# pixels[5] = (0,0x40,0x80)
# im = FakeImage((4, 4), pixels)
# blocks = getblocks2(im, 2)
# self.assertEqual(4,len(blocks))
# block = blocks[0]
# #Because the block is smaller than the image, only blocksize must be considered.
# meanred = (0xff + 0x80) // 4
# meangreen = (0x80 + 0x40) // 4
# meanblue = (0xff + 0x80) // 4
# self.assertEqual((meanred,meangreen,meanblue),block)
# self.assertEqual(BLACK,blocks[1])
# self.assertEqual(BLACK,blocks[2])
# self.assertEqual(BLACK,blocks[3])
#
class TCgetblocks2(unittest.TestCase):
def test_empty_image(self):
im = empty()
blocks = getblocks2(im,1)
self.assertEqual(0,len(blocks))
def test_one_block_image(self):
im = four_pixels()
blocks = getblocks2(im,1)
self.assertEqual(1,len(blocks))
block = blocks[0]
meanred = (0xff + 0x80) // 4
meangreen = (0x80 + 0x40) // 4
meanblue = (0xff + 0x80) // 4
self.assertEqual((meanred,meangreen,meanblue),block)
def test_four_blocks_all_black(self):
im = FakeImage((2, 2), [BLACK, BLACK, BLACK, BLACK])
blocks = getblocks2(im,2)
self.assertEqual(4,len(blocks))
for block in blocks:
self.assertEqual(BLACK,block)
def test_two_pixels_image_horizontal(self):
pixels = [RED,BLUE]
im = FakeImage((2, 1), pixels)
blocks = getblocks2(im,2)
self.assertEqual(4,len(blocks))
self.assertEqual(RED,blocks[0])
self.assertEqual(BLUE,blocks[1])
self.assertEqual(RED,blocks[2])
self.assertEqual(BLUE,blocks[3])
def test_two_pixels_image_vertical(self):
pixels = [RED,BLUE]
im = FakeImage((1, 2), pixels)
blocks = getblocks2(im,2)
self.assertEqual(4,len(blocks))
self.assertEqual(RED,blocks[0])
self.assertEqual(RED,blocks[1])
self.assertEqual(BLUE,blocks[2])
self.assertEqual(BLUE,blocks[3])
class TCavgdiff(unittest.TestCase):
def test_empty(self):
self.assertRaises(NoBlocksError, my_avgdiff, [], [])
def test_two_blocks(self):
im = empty()
b1 = (5,10,15)
b2 = (255,250,245)
b3 = (0,0,0)
b4 = (255,0,255)
blocks1 = [b1,b2]
blocks2 = [b3,b4]
expected1 = 5 + 10 + 15
expected2 = 0 + 250 + 10
expected = (expected1 + expected2) // 2
self.assertEqual(expected, my_avgdiff(blocks1, blocks2))
def test_blocks_not_the_same_size(self):
b = (0,0,0)
self.assertRaises(DifferentBlockCountError,my_avgdiff,[b,b],[b])
def test_first_arg_is_empty_but_not_second(self):
#Don't return 0 (as when the 2 lists are empty), raise!
b = (0,0,0)
self.assertRaises(DifferentBlockCountError,my_avgdiff,[],[b])
def test_limit(self):
ref = (0,0,0)
b1 = (10,10,10) #avg 30
b2 = (20,20,20) #avg 45
b3 = (30,30,30) #avg 60
blocks1 = [ref,ref,ref]
blocks2 = [b1,b2,b3]
self.assertEqual(45,my_avgdiff(blocks1,blocks2,44))
def test_min_iterations(self):
ref = (0,0,0)
b1 = (10,10,10) #avg 30
b2 = (20,20,20) #avg 45
b3 = (10,10,10) #avg 40
blocks1 = [ref,ref,ref]
blocks2 = [b1,b2,b3]
self.assertEqual(40,my_avgdiff(blocks1,blocks2,45 - 1,3))
# Bah, I don't know why this test fails, but I don't think it matters very much
# def test_just_over_the_limit(self):
# #A score just over the limit might return exactly the limit due to truncating. We should
# #ceil() the result in this case.
# ref = (0,0,0)
# b1 = (10,0,0)
# b2 = (11,0,0)
# blocks1 = [ref,ref]
# blocks2 = [b1,b2]
# self.assertEqual(11,my_avgdiff(blocks1,blocks2,10))
#
def test_return_at_least_1_at_the_slightest_difference(self):
ref = (0,0,0)
b1 = (1,0,0)
blocks1 = [ref for i in xrange(250)]
blocks2 = [ref for i in xrange(250)]
blocks2[0] = b1
self.assertEqual(1,my_avgdiff(blocks1,blocks2))
def test_return_0_if_there_is_no_difference(self):
ref = (0,0,0)
blocks1 = [ref,ref]
blocks2 = [ref,ref]
self.assertEqual(0,my_avgdiff(blocks1,blocks2))
# class TCmaxdiff(unittest.TestCase):
# def test_empty(self):
# self.assertRaises(NoBlocksError,maxdiff,[],[])
#
# def test_two_blocks(self):
# b1 = (5,10,15)
# b2 = (255,250,245)
# b3 = (0,0,0)
# b4 = (255,0,255)
# blocks1 = [b1,b2]
# blocks2 = [b3,b4]
# expected1 = 5 + 10 + 15
# expected2 = 0 + 250 + 10
# expected = max(expected1,expected2)
# self.assertEqual(expected,maxdiff(blocks1,blocks2))
#
# def test_blocks_not_the_same_size(self):
# b = (0,0,0)
# self.assertRaises(DifferentBlockCountError,maxdiff,[b,b],[b])
#
# def test_first_arg_is_empty_but_not_second(self):
# #Don't return 0 (as when the 2 lists are empty), raise!
# b = (0,0,0)
# self.assertRaises(DifferentBlockCountError,maxdiff,[],[b])
#
# def test_limit(self):
# b1 = (5,10,15)
# b2 = (255,250,245)
# b3 = (0,0,0)
# b4 = (255,0,255)
# blocks1 = [b1,b2]
# blocks2 = [b3,b4]
# expected1 = 5 + 10 + 15
# expected2 = 0 + 250 + 10
# self.assertEqual(expected1,maxdiff(blocks1,blocks2,expected1 - 1))
#

152
py/tests/cache_test.py Normal file
View File

@@ -0,0 +1,152 @@
# Unit Name: dupeguru.tests.cache_test
# Created By: Virgil Dupras
# Created On: 2006/09/14
# $Id$
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
from StringIO import StringIO
import os.path as op
import os
import threading
from hsutil.testcase import TestCase
from ..picture.cache import *
class TCcolors_to_string(TestCase):
def test_no_color(self):
self.assertEqual('',colors_to_string([]))
def test_single_color(self):
self.assertEqual('000000',colors_to_string([(0,0,0)]))
self.assertEqual('010101',colors_to_string([(1,1,1)]))
self.assertEqual('0a141e',colors_to_string([(10,20,30)]))
def test_two_colors(self):
self.assertEqual('000102030405',colors_to_string([(0,1,2),(3,4,5)]))
class TCstring_to_colors(TestCase):
def test_empty(self):
self.assertEqual([],string_to_colors(''))
def test_single_color(self):
self.assertEqual([(0,0,0)],string_to_colors('000000'))
self.assertEqual([(2,3,4)],string_to_colors('020304'))
self.assertEqual([(10,20,30)],string_to_colors('0a141e'))
def test_two_colors(self):
self.assertEqual([(10,20,30),(40,50,60)],string_to_colors('0a141e28323c'))
def test_incomplete_color(self):
# don't return anything if it's not a complete color
self.assertEqual([],string_to_colors('102'))
class TCCache(TestCase):
def test_empty(self):
c = Cache()
self.assertEqual(0,len(c))
self.assertRaises(KeyError,c.__getitem__,'foo')
def test_set_then_retrieve_blocks(self):
c = Cache()
b = [(0,0,0),(1,2,3)]
c['foo'] = b
self.assertEqual(b,c['foo'])
def test_delitem(self):
c = Cache()
c['foo'] = ''
del c['foo']
self.assert_('foo' not in c)
self.assertRaises(KeyError,c.__delitem__,'foo')
def test_persistance(self):
DBNAME = op.join(self.tmpdir(), 'hstest.db')
c = Cache(DBNAME)
c['foo'] = [(1,2,3)]
del c
c = Cache(DBNAME)
self.assertEqual([(1,2,3)],c['foo'])
del c
os.remove(DBNAME)
def test_filter(self):
c = Cache()
c['foo'] = ''
c['bar'] = ''
c['baz'] = ''
c.filter(lambda p:p != 'bar') #only 'bar' is removed
self.assertEqual(2,len(c))
self.assert_('foo' in c)
self.assert_('baz' in c)
self.assert_('bar' not in c)
def test_clear(self):
c = Cache()
c['foo'] = ''
c['bar'] = ''
c['baz'] = ''
c.clear()
self.assertEqual(0,len(c))
self.assert_('foo' not in c)
self.assert_('baz' not in c)
self.assert_('bar' not in c)
def test_corrupted_db(self):
dbname = op.join(self.tmpdir(), 'foo.db')
fp = open(dbname, 'w')
fp.write('invalid sqlite content')
fp.close()
c = Cache(dbname) # should not raise a DatabaseError
c['foo'] = [(1, 2, 3)]
del c
c = Cache(dbname)
self.assertEqual(c['foo'], [(1, 2, 3)])
def test_by_id(self):
# it's possible to use the cache by referring to the files by their row_id
c = Cache()
b = [(0,0,0),(1,2,3)]
c['foo'] = b
foo_id = c.get_id('foo')
self.assertEqual(c[foo_id], b)
class TCCacheSQLEscape(TestCase):
def test_contains(self):
c = Cache()
self.assert_("foo'bar" not in c)
def test_getitem(self):
c = Cache()
self.assertRaises(KeyError, c.__getitem__, "foo'bar")
def test_setitem(self):
c = Cache()
c["foo'bar"] = []
def test_delitem(self):
c = Cache()
c["foo'bar"] = []
try:
del c["foo'bar"]
except KeyError:
self.fail()
class TCCacheThreaded(TestCase):
def test_access_cache(self):
def thread_run():
try:
c['foo'] = [(1,2,3)]
except sqlite.ProgrammingError:
self.fail()
c = Cache()
t = threading.Thread(target=thread_run)
t.start()
t.join()
self.assertEqual([(1,2,3)], c['foo'])

View File

@@ -0,0 +1,272 @@
# Unit Name: dupeguru.tests.directories_test
# Created By: Virgil Dupras
# Created On: 2006/02/27
# $Id$
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
import os.path as op
import os
import time
import shutil
from hsutil import job, io
from hsutil.path import Path
from hsutil.testcase import TestCase
import hsfs.phys
from hsfs.phys import phys_test
from ..directories import *
testpath = Path(TestCase.datadirpath())
class TCDirectories(TestCase):
def test_empty(self):
d = Directories()
self.assertEqual(0,len(d))
self.assert_('foobar' not in d)
def test_add_path(self):
d = Directories()
p = testpath + 'utils'
added = d.add_path(p)
self.assertEqual(1,len(d))
self.assert_(p in d)
self.assert_((p + 'foobar') in d)
self.assert_(p[:-1] not in d)
self.assertEqual(p,added.path)
self.assert_(d[0] is added)
p = self.tmppath()
d.add_path(p)
self.assertEqual(2,len(d))
self.assert_(p in d)
def test_AddPath_when_path_is_already_there(self):
d = Directories()
p = testpath + 'utils'
d.add_path(p)
self.assertRaises(AlreadyThereError, d.add_path, p)
self.assertRaises(AlreadyThereError, d.add_path, p + 'foobar')
self.assertEqual(1, len(d))
def test_AddPath_containing_paths_already_there(self):
d = Directories()
d.add_path(testpath + 'utils')
self.assertEqual(1, len(d))
added = d.add_path(testpath)
self.assertEqual(1, len(d))
self.assert_(added is d[0])
def test_AddPath_non_latin(self):
p = Path(self.tmpdir())
to_add = p + u'unicode\u201a'
os.mkdir(unicode(to_add))
d = Directories()
try:
d.add_path(to_add)
except UnicodeDecodeError:
self.fail()
def test_del(self):
d = Directories()
d.add_path(testpath + 'utils')
try:
del d[1]
self.fail()
except IndexError:
pass
d.add_path(self.tmppath())
del d[1]
self.assertEqual(1, len(d))
def test_states(self):
d = Directories()
p = testpath + 'utils'
d.add_path(p)
self.assertEqual(STATE_NORMAL,d.GetState(p))
d.SetState(p,STATE_REFERENCE)
self.assertEqual(STATE_REFERENCE,d.GetState(p))
self.assertEqual(STATE_REFERENCE,d.GetState(p + 'dir1'))
self.assertEqual(1,len(d.states))
self.assertEqual(p,d.states.keys()[0])
self.assertEqual(STATE_REFERENCE,d.states[p])
def test_GetState_with_path_not_there(self):
d = Directories()
d.add_path(testpath + 'utils')
self.assertRaises(LookupError,d.GetState,testpath)
def test_states_remain_when_larger_directory_eat_smaller_ones(self):
d = Directories()
p = testpath + 'utils'
d.add_path(p)
d.SetState(p,STATE_EXCLUDED)
d.add_path(testpath)
d.SetState(testpath,STATE_REFERENCE)
self.assertEqual(STATE_EXCLUDED,d.GetState(p))
self.assertEqual(STATE_EXCLUDED,d.GetState(p + 'dir1'))
self.assertEqual(STATE_REFERENCE,d.GetState(testpath))
def test_SetState_keep_state_dict_size_to_minimum(self):
d = Directories()
p = Path(phys_test.create_fake_fs(self.tmpdir()))
d.add_path(p)
d.SetState(p,STATE_REFERENCE)
d.SetState(p + 'dir1',STATE_REFERENCE)
self.assertEqual(1,len(d.states))
self.assertEqual(STATE_REFERENCE,d.GetState(p + 'dir1'))
d.SetState(p + 'dir1',STATE_NORMAL)
self.assertEqual(2,len(d.states))
self.assertEqual(STATE_NORMAL,d.GetState(p + 'dir1'))
d.SetState(p + 'dir1',STATE_REFERENCE)
self.assertEqual(1,len(d.states))
self.assertEqual(STATE_REFERENCE,d.GetState(p + 'dir1'))
def test_get_files(self):
d = Directories()
p = Path(phys_test.create_fake_fs(self.tmpdir()))
d.add_path(p)
d.SetState(p + 'dir1',STATE_REFERENCE)
d.SetState(p + 'dir2',STATE_EXCLUDED)
files = d.get_files()
self.assertEqual(5, len(list(files)))
for f in files:
if f.parent.path == p + 'dir1':
self.assert_(f.is_ref)
else:
self.assert_(not f.is_ref)
def test_get_files_with_inherited_exclusion(self):
d = Directories()
p = testpath + 'utils'
d.add_path(p)
d.SetState(p,STATE_EXCLUDED)
self.assertEqual([], list(d.get_files()))
def test_save_and_load(self):
d1 = Directories()
d2 = Directories()
p1 = self.tmppath()
p2 = self.tmppath()
d1.add_path(p1)
d1.add_path(p2)
d1.SetState(p1, STATE_REFERENCE)
d1.SetState(p1 + 'dir1',STATE_EXCLUDED)
tmpxml = op.join(self.tmpdir(), 'directories_testunit.xml')
d1.SaveToFile(tmpxml)
d2.LoadFromFile(tmpxml)
self.assertEqual(2, len(d2))
self.assertEqual(STATE_REFERENCE,d2.GetState(p1))
self.assertEqual(STATE_EXCLUDED,d2.GetState(p1 + 'dir1'))
def test_invalid_path(self):
d = Directories()
p = Path('does_not_exist')
self.assertRaises(InvalidPathError, d.add_path, p)
self.assertEqual(0, len(d))
def test_SetState_on_invalid_path(self):
d = Directories()
try:
d.SetState(Path('foobar',),STATE_NORMAL)
except LookupError:
self.fail()
def test_default_dirclass(self):
self.assert_(Directories().dirclass is hsfs.phys.Directory)
def test_dirclass(self):
class MySpecialDirclass(hsfs.phys.Directory): pass
d = Directories()
d.dirclass = MySpecialDirclass
d.add_path(testpath)
self.assert_(isinstance(d[0], MySpecialDirclass))
def test_LoadFromFile_with_invalid_path(self):
#This test simulates a load from file resulting in a
#InvalidPath raise. Other directories must be loaded.
d1 = Directories()
d1.add_path(testpath + 'utils')
#Will raise InvalidPath upon loading
d1.add_path(self.tmppath()).name = 'does_not_exist'
tmpxml = op.join(self.tmpdir(), 'directories_testunit.xml')
d1.SaveToFile(tmpxml)
d2 = Directories()
d2.LoadFromFile(tmpxml)
self.assertEqual(1, len(d2))
def test_LoadFromFile_with_same_paths(self):
#This test simulates a load from file resulting in a
#AlreadyExists raise. Other directories must be loaded.
d1 = Directories()
p1 = self.tmppath()
p2 = self.tmppath()
d1.add_path(p1)
d1.add_path(p2)
#Will raise AlreadyExists upon loading
d1.add_path(self.tmppath()).name = unicode(p1)
tmpxml = op.join(self.tmpdir(), 'directories_testunit.xml')
d1.SaveToFile(tmpxml)
d2 = Directories()
d2.LoadFromFile(tmpxml)
self.assertEqual(2, len(d2))
def test_Remove(self):
d = Directories()
d1 = d.add_path(self.tmppath())
d2 = d.add_path(self.tmppath())
d.Remove(d1)
self.assertEqual(1, len(d))
self.assert_(d[0] is d2)
def test_unicode_save(self):
d = Directories()
p1 = self.tmppath() + u'hello\xe9'
io.mkdir(p1)
io.mkdir(p1 + u'foo\xe9')
d.add_path(p1)
d.SetState(d[0][0].path, STATE_EXCLUDED)
tmpxml = op.join(self.tmpdir(), 'directories_testunit.xml')
try:
d.SaveToFile(tmpxml)
except UnicodeDecodeError:
self.fail()
def test_get_files_refreshes_its_directories(self):
d = Directories()
p = Path(phys_test.create_fake_fs(self.tmpdir()))
d.add_path(p)
files = d.get_files()
self.assertEqual(6, len(list(files)))
time.sleep(1)
os.remove(str(p + ('dir1','file1.test')))
files = d.get_files()
self.assertEqual(5, len(list(files)))
def test_get_files_does_not_choke_on_non_existing_directories(self):
d = Directories()
p = Path(self.tmpdir())
d.add_path(p)
io.rmtree(p)
self.assertEqual([], list(d.get_files()))
def test_GetState_returns_excluded_by_default_for_hidden_directories(self):
d = Directories()
p = Path(self.tmpdir())
hidden_dir_path = p + '.foo'
io.mkdir(p + '.foo')
d.add_path(p)
self.assertEqual(d.GetState(hidden_dir_path), STATE_EXCLUDED)
# But it can be overriden
d.SetState(hidden_dir_path, STATE_NORMAL)
self.assertEqual(d.GetState(hidden_dir_path), STATE_NORMAL)
def test_special_dirclasses(self):
# if a path is in special_dirclasses, use this class instead
class MySpecialDirclass(hsfs.phys.Directory): pass
d = Directories()
p1 = self.tmppath()
p2 = self.tmppath()
d.special_dirclasses[p1] = MySpecialDirclass
self.assert_(isinstance(d.add_path(p2), hsfs.phys.Directory))
self.assert_(isinstance(d.add_path(p1), MySpecialDirclass))

814
py/tests/engine_test.py Normal file
View File

@@ -0,0 +1,814 @@
# Unit Name: dupeguru.tests.engine_test
# Created By: Virgil Dupras
# Created On: 2006/01/29
# $Id$
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
import sys
from hsutil import job
from hsutil.decorators import log_calls
from hsutil.testcase import TestCase
from .. import engine
from ..engine import *
class NamedObject(object):
def __init__(self, name="foobar", with_words=False):
self.name = name
if with_words:
self.words = getwords(name)
def get_match_triangle():
o1 = NamedObject(with_words=True)
o2 = NamedObject(with_words=True)
o3 = NamedObject(with_words=True)
m1 = get_match(o1,o2)
m2 = get_match(o1,o3)
m3 = get_match(o2,o3)
return [m1, m2, m3]
def get_test_group():
m1, m2, m3 = get_match_triangle()
result = Group()
result.add_match(m1)
result.add_match(m2)
result.add_match(m3)
return result
class TCgetwords(TestCase):
def test_spaces(self):
self.assertEqual(['a', 'b', 'c', 'd'], getwords("a b c d"))
self.assertEqual(['a', 'b', 'c', 'd'], getwords(" a b c d "))
def test_splitter_chars(self):
self.assertEqual(
[chr(i) for i in xrange(ord('a'),ord('z')+1)],
getwords("a-b_c&d+e(f)g;h\\i[j]k{l}m:n.o,p<q>r/s?t~u!v@w#x$y*z")
)
def test_joiner_chars(self):
self.assertEqual(["aec"], getwords(u"a'e\u0301c"))
def test_empty(self):
self.assertEqual([], getwords(''))
def test_returns_lowercase(self):
self.assertEqual(['foo', 'bar'], getwords('FOO BAR'))
def test_decompose_unicode(self):
self.assertEqual(getwords(u'foo\xe9bar'), ['fooebar'])
class TCgetfields(TestCase):
def test_simple(self):
self.assertEqual([['a', 'b'], ['c', 'd', 'e']], getfields('a b - c d e'))
def test_empty(self):
self.assertEqual([], getfields(''))
def test_cleans_empty_fields(self):
expected = [['a', 'bc', 'def']]
actual = getfields(' - a bc def')
self.assertEqual(expected, actual)
expected = [['bc', 'def']]
class TCunpack_fields(TestCase):
def test_with_fields(self):
expected = ['a', 'b', 'c', 'd', 'e', 'f']
actual = unpack_fields([['a'], ['b', 'c'], ['d', 'e', 'f']])
self.assertEqual(expected, actual)
def test_without_fields(self):
expected = ['a', 'b', 'c', 'd', 'e', 'f']
actual = unpack_fields(['a', 'b', 'c', 'd', 'e', 'f'])
self.assertEqual(expected, actual)
def test_empty(self):
self.assertEqual([], unpack_fields([]))
class TCWordCompare(TestCase):
def test_list(self):
self.assertEqual(100, compare(['a', 'b', 'c', 'd'],['a', 'b', 'c', 'd']))
self.assertEqual(86, compare(['a', 'b', 'c', 'd'],['a', 'b', 'c']))
def test_unordered(self):
#Sometimes, users don't want fuzzy matching too much When they set the slider
#to 100, they don't expect a filename with the same words, but not the same order, to match.
#Thus, we want to return 99 in that case.
self.assertEqual(99, compare(['a', 'b', 'c', 'd'], ['d', 'b', 'c', 'a']))
def test_word_occurs_twice(self):
#if a word occurs twice in first, but once in second, we want the word to be only counted once
self.assertEqual(89, compare(['a', 'b', 'c', 'd', 'a'], ['d', 'b', 'c', 'a']))
def test_uses_copy_of_lists(self):
first = ['foo', 'bar']
second = ['bar', 'bleh']
compare(first, second)
self.assertEqual(['foo', 'bar'], first)
self.assertEqual(['bar', 'bleh'], second)
def test_word_weight(self):
self.assertEqual(int((6.0 / 13.0) * 100), compare(['foo', 'bar'], ['bar', 'bleh'], (WEIGHT_WORDS, )))
def test_similar_words(self):
self.assertEqual(100, compare(['the', 'white', 'stripes'],['the', 'whites', 'stripe'], (MATCH_SIMILAR_WORDS, )))
def test_empty(self):
self.assertEqual(0, compare([], []))
def test_with_fields(self):
self.assertEqual(67, compare([['a', 'b'], ['c', 'd', 'e']], [['a', 'b'], ['c', 'd', 'f']]))
def test_propagate_flags_with_fields(self):
def mock_compare(first, second, flags):
self.assertEqual((0, 1, 2, 3, 5), flags)
self.mock(engine, 'compare_fields', mock_compare)
compare([['a']], [['a']], (0, 1, 2, 3, 5))
class TCWordCompareWithFields(TestCase):
def test_simple(self):
self.assertEqual(67, compare_fields([['a', 'b'], ['c', 'd', 'e']], [['a', 'b'], ['c', 'd', 'f']]))
def test_empty(self):
self.assertEqual(0, compare_fields([], []))
def test_different_length(self):
self.assertEqual(0, compare_fields([['a'], ['b']], [['a'], ['b'], ['c']]))
def test_propagates_flags(self):
def mock_compare(first, second, flags):
self.assertEqual((0, 1, 2, 3, 5), flags)
self.mock(engine, 'compare_fields', mock_compare)
compare_fields([['a']], [['a']],(0, 1, 2, 3, 5))
def test_order(self):
first = [['a', 'b'], ['c', 'd', 'e']]
second = [['c', 'd', 'f'], ['a', 'b']]
self.assertEqual(0, compare_fields(first, second))
def test_no_order(self):
first = [['a','b'],['c','d','e']]
second = [['c','d','f'],['a','b']]
self.assertEqual(67, compare_fields(first, second, (NO_FIELD_ORDER, )))
first = [['a','b'],['a','b']] #a field can only be matched once.
second = [['c','d','f'],['a','b']]
self.assertEqual(0, compare_fields(first, second, (NO_FIELD_ORDER, )))
first = [['a','b'],['a','b','c']]
second = [['c','d','f'],['a','b']]
self.assertEqual(33, compare_fields(first, second, (NO_FIELD_ORDER, )))
def test_compare_fields_without_order_doesnt_alter_fields(self):
#The NO_ORDER comp type altered the fields!
first = [['a','b'],['c','d','e']]
second = [['c','d','f'],['a','b']]
self.assertEqual(67, compare_fields(first, second, (NO_FIELD_ORDER, )))
self.assertEqual([['a','b'],['c','d','e']],first)
self.assertEqual([['c','d','f'],['a','b']],second)
class TCbuild_word_dict(TestCase):
def test_with_standard_words(self):
l = [NamedObject('foo bar',True)]
l.append(NamedObject('bar baz',True))
l.append(NamedObject('baz bleh foo',True))
d = build_word_dict(l)
self.assertEqual(4,len(d))
self.assertEqual(2,len(d['foo']))
self.assert_(l[0] in d['foo'])
self.assert_(l[2] in d['foo'])
self.assertEqual(2,len(d['bar']))
self.assert_(l[0] in d['bar'])
self.assert_(l[1] in d['bar'])
self.assertEqual(2,len(d['baz']))
self.assert_(l[1] in d['baz'])
self.assert_(l[2] in d['baz'])
self.assertEqual(1,len(d['bleh']))
self.assert_(l[2] in d['bleh'])
def test_unpack_fields(self):
o = NamedObject('')
o.words = [['foo','bar'],['baz']]
d = build_word_dict([o])
self.assertEqual(3,len(d))
self.assertEqual(1,len(d['foo']))
def test_words_are_unaltered(self):
o = NamedObject('')
o.words = [['foo','bar'],['baz']]
d = build_word_dict([o])
self.assertEqual([['foo','bar'],['baz']],o.words)
def test_object_instances_can_only_be_once_in_words_object_list(self):
o = NamedObject('foo foo',True)
d = build_word_dict([o])
self.assertEqual(1,len(d['foo']))
def test_job(self):
def do_progress(p,d=''):
self.log.append(p)
return True
j = job.Job(1,do_progress)
self.log = []
s = "foo bar"
build_word_dict([NamedObject(s, True), NamedObject(s, True), NamedObject(s, True)], j)
self.assertEqual(0,self.log[0])
self.assertEqual(33,self.log[1])
self.assertEqual(66,self.log[2])
self.assertEqual(100,self.log[3])
class TCmerge_similar_words(TestCase):
def test_some_similar_words(self):
d = {
'foobar':set([1]),
'foobar1':set([2]),
'foobar2':set([3]),
}
merge_similar_words(d)
self.assertEqual(1,len(d))
self.assertEqual(3,len(d['foobar']))
class TCreduce_common_words(TestCase):
def test_typical(self):
d = {
'foo': set([NamedObject('foo bar',True) for i in range(50)]),
'bar': set([NamedObject('foo bar',True) for i in range(49)])
}
reduce_common_words(d, 50)
self.assert_('foo' not in d)
self.assertEqual(49,len(d['bar']))
def test_dont_remove_objects_with_only_common_words(self):
d = {
'common': set([NamedObject("common uncommon",True) for i in range(50)] + [NamedObject("common",True)]),
'uncommon': set([NamedObject("common uncommon",True)])
}
reduce_common_words(d, 50)
self.assertEqual(1,len(d['common']))
self.assertEqual(1,len(d['uncommon']))
def test_values_still_are_set_instances(self):
d = {
'common': set([NamedObject("common uncommon",True) for i in range(50)] + [NamedObject("common",True)]),
'uncommon': set([NamedObject("common uncommon",True)])
}
reduce_common_words(d, 50)
self.assert_(isinstance(d['common'],set))
self.assert_(isinstance(d['uncommon'],set))
def test_dont_raise_KeyError_when_a_word_has_been_removed(self):
#If a word has been removed by the reduce, an object in a subsequent common word that
#contains the word that has been removed would cause a KeyError.
d = {
'foo': set([NamedObject('foo bar baz',True) for i in range(50)]),
'bar': set([NamedObject('foo bar baz',True) for i in range(50)]),
'baz': set([NamedObject('foo bar baz',True) for i in range(49)])
}
try:
reduce_common_words(d, 50)
except KeyError:
self.fail()
def test_unpack_fields(self):
#object.words may be fields.
def create_it():
o = NamedObject('')
o.words = [['foo','bar'],['baz']]
return o
d = {
'foo': set([create_it() for i in range(50)])
}
try:
reduce_common_words(d, 50)
except TypeError:
self.fail("must support fields.")
def test_consider_a_reduced_common_word_common_even_after_reduction(self):
#There was a bug in the code that causeda word that has already been reduced not to
#be counted as a common word for subsequent words. For example, if 'foo' is processed
#as a common word, keeping a "foo bar" file in it, and the 'bar' is processed, "foo bar"
#would not stay in 'bar' because 'foo' is not a common word anymore.
only_common = NamedObject('foo bar',True)
d = {
'foo': set([NamedObject('foo bar baz',True) for i in range(49)] + [only_common]),
'bar': set([NamedObject('foo bar baz',True) for i in range(49)] + [only_common]),
'baz': set([NamedObject('foo bar baz',True) for i in range(49)])
}
reduce_common_words(d, 50)
self.assertEqual(1,len(d['foo']))
self.assertEqual(1,len(d['bar']))
self.assertEqual(49,len(d['baz']))
class TCget_match(TestCase):
def test_simple(self):
o1 = NamedObject("foo bar",True)
o2 = NamedObject("bar bleh",True)
m = get_match(o1,o2)
self.assertEqual(50,m.percentage)
self.assertEqual(['foo','bar'],m.first.words)
self.assertEqual(['bar','bleh'],m.second.words)
self.assert_(m.first is o1)
self.assert_(m.second is o2)
def test_in(self):
o1 = NamedObject("foo",True)
o2 = NamedObject("bar",True)
m = get_match(o1,o2)
self.assert_(o1 in m)
self.assert_(o2 in m)
self.assert_(object() not in m)
def test_word_weight(self):
self.assertEqual(int((6.0 / 13.0) * 100),get_match(NamedObject("foo bar",True),NamedObject("bar bleh",True),(WEIGHT_WORDS,)).percentage)
class TCMatchFactory(TestCase):
def test_empty(self):
self.assertEqual([],MatchFactory().getmatches([]))
def test_defaults(self):
mf = MatchFactory()
self.assertEqual(50,mf.common_word_threshold)
self.assertEqual(False,mf.weight_words)
self.assertEqual(False,mf.match_similar_words)
self.assertEqual(False,mf.no_field_order)
self.assertEqual(0,mf.min_match_percentage)
def test_simple(self):
l = [NamedObject("foo bar"),NamedObject("bar bleh"),NamedObject("a b c foo")]
r = MatchFactory().getmatches(l)
self.assertEqual(2,len(r))
seek = [m for m in r if m.percentage == 50] #"foo bar" and "bar bleh"
m = seek[0]
self.assertEqual(['foo','bar'],m.first.words)
self.assertEqual(['bar','bleh'],m.second.words)
seek = [m for m in r if m.percentage == 33] #"foo bar" and "a b c foo"
m = seek[0]
self.assertEqual(['foo','bar'],m.first.words)
self.assertEqual(['a','b','c','foo'],m.second.words)
def test_null_and_unrelated_objects(self):
l = [NamedObject("foo bar"),NamedObject("bar bleh"),NamedObject(""),NamedObject("unrelated object")]
r = MatchFactory().getmatches(l)
self.assertEqual(1,len(r))
m = r[0]
self.assertEqual(50,m.percentage)
self.assertEqual(['foo','bar'],m.first.words)
self.assertEqual(['bar','bleh'],m.second.words)
def test_twice_the_same_word(self):
l = [NamedObject("foo foo bar"),NamedObject("bar bleh")]
r = MatchFactory().getmatches(l)
self.assertEqual(1,len(r))
def test_twice_the_same_word_when_preworded(self):
l = [NamedObject("foo foo bar",True),NamedObject("bar bleh",True)]
r = MatchFactory().getmatches(l)
self.assertEqual(1,len(r))
def test_two_words_match(self):
l = [NamedObject("foo bar"),NamedObject("foo bar bleh")]
r = MatchFactory().getmatches(l)
self.assertEqual(1,len(r))
def test_match_files_with_only_common_words(self):
#If a word occurs more than 50 times, it is excluded from the matching process
#The problem with the common_word_threshold is that the files containing only common
#words will never be matched together. We *should* match them.
mf = MatchFactory()
mf.common_word_threshold = 50
l = [NamedObject("foo") for i in range(50)]
r = mf.getmatches(l)
self.assertEqual(1225,len(r))
def test_use_words_already_there_if_there(self):
o1 = NamedObject('foo')
o2 = NamedObject('bar')
o2.words = ['foo']
self.assertEqual(1,len(MatchFactory().getmatches([o1,o2])))
def test_job(self):
def do_progress(p,d=''):
self.log.append(p)
return True
j = job.Job(1,do_progress)
self.log = []
s = "foo bar"
MatchFactory().getmatches([NamedObject(s),NamedObject(s),NamedObject(s)],j)
self.assert_(len(self.log) > 2)
self.assertEqual(0,self.log[0])
self.assertEqual(100,self.log[-1])
def test_weight_words(self):
mf = MatchFactory()
mf.weight_words = True
l = [NamedObject("foo bar"),NamedObject("bar bleh")]
m = mf.getmatches(l)[0]
self.assertEqual(int((6.0 / 13.0) * 100),m.percentage)
def test_similar_word(self):
mf = MatchFactory()
mf.match_similar_words = True
l = [NamedObject("foobar"),NamedObject("foobars")]
self.assertEqual(1,len(mf.getmatches(l)))
self.assertEqual(100,mf.getmatches(l)[0].percentage)
l = [NamedObject("foobar"),NamedObject("foo")]
self.assertEqual(0,len(mf.getmatches(l))) #too far
l = [NamedObject("bizkit"),NamedObject("bizket")]
self.assertEqual(1,len(mf.getmatches(l)))
l = [NamedObject("foobar"),NamedObject("foosbar")]
self.assertEqual(1,len(mf.getmatches(l)))
def test_single_object_with_similar_words(self):
mf = MatchFactory()
mf.match_similar_words = True
l = [NamedObject("foo foos")]
self.assertEqual(0,len(mf.getmatches(l)))
def test_double_words_get_counted_only_once(self):
mf = MatchFactory()
l = [NamedObject("foo bar foo bleh"),NamedObject("foo bar bleh bar")]
m = mf.getmatches(l)[0]
self.assertEqual(75,m.percentage)
def test_with_fields(self):
mf = MatchFactory()
o1 = NamedObject("foo bar - foo bleh")
o2 = NamedObject("foo bar - bleh bar")
o1.words = getfields(o1.name)
o2.words = getfields(o2.name)
m = mf.getmatches([o1, o2])[0]
self.assertEqual(50, m.percentage)
def test_with_fields_no_order(self):
mf = MatchFactory()
mf.no_field_order = True
o1 = NamedObject("foo bar - foo bleh")
o2 = NamedObject("bleh bang - foo bar")
o1.words = getfields(o1.name)
o2.words = getfields(o2.name)
m = mf.getmatches([o1, o2])[0]
self.assertEqual(50 ,m.percentage)
def test_only_match_similar_when_the_option_is_set(self):
mf = MatchFactory()
mf.match_similar_words = False
l = [NamedObject("foobar"),NamedObject("foobars")]
self.assertEqual(0,len(mf.getmatches(l)))
def test_dont_recurse_do_match(self):
# with nosetests, the stack is increased. The number has to be high enough not to be failing falsely
sys.setrecursionlimit(100)
mf = MatchFactory()
files = [NamedObject('foo bar') for i in range(101)]
try:
mf.getmatches(files)
except RuntimeError:
self.fail()
finally:
sys.setrecursionlimit(1000)
def test_min_match_percentage(self):
l = [NamedObject("foo bar"),NamedObject("bar bleh"),NamedObject("a b c foo")]
mf = MatchFactory()
mf.min_match_percentage = 50
r = mf.getmatches(l)
self.assertEqual(1,len(r)) #Only "foo bar" / "bar bleh" should match
def test_limit(self):
l = [NamedObject(),NamedObject(),NamedObject()]
mf = MatchFactory()
mf.limit = 2
r = mf.getmatches(l)
self.assertEqual(2,len(r))
def test_MemoryError(self):
@log_calls
def mocked_match(first, second, flags):
if len(mocked_match.calls) > 42:
raise MemoryError()
return Match(first, second, 0)
objects = [NamedObject() for i in range(10)] # results in 45 matches
self.mock(engine, 'get_match', mocked_match)
mf = MatchFactory()
try:
r = mf.getmatches(objects)
except MemoryError:
self.fail('MemorryError must be handled')
self.assertEqual(42, len(r))
class TCGroup(TestCase):
def test_empy(self):
g = Group()
self.assertEqual(None,g.ref)
self.assertEqual([],g.dupes)
self.assertEqual(0,len(g.matches))
def test_add_match(self):
g = Group()
m = get_match(NamedObject("foo",True),NamedObject("bar",True))
g.add_match(m)
self.assert_(g.ref is m.first)
self.assertEqual([m.second],g.dupes)
self.assertEqual(1,len(g.matches))
self.assert_(m in g.matches)
def test_multiple_add_match(self):
g = Group()
o1 = NamedObject("a",True)
o2 = NamedObject("b",True)
o3 = NamedObject("c",True)
o4 = NamedObject("d",True)
g.add_match(get_match(o1,o2))
self.assert_(g.ref is o1)
self.assertEqual([o2],g.dupes)
self.assertEqual(1,len(g.matches))
g.add_match(get_match(o1,o3))
self.assertEqual([o2],g.dupes)
self.assertEqual(2,len(g.matches))
g.add_match(get_match(o2,o3))
self.assertEqual([o2,o3],g.dupes)
self.assertEqual(3,len(g.matches))
g.add_match(get_match(o1,o4))
self.assertEqual([o2,o3],g.dupes)
self.assertEqual(4,len(g.matches))
g.add_match(get_match(o2,o4))
self.assertEqual([o2,o3],g.dupes)
self.assertEqual(5,len(g.matches))
g.add_match(get_match(o3,o4))
self.assertEqual([o2,o3,o4],g.dupes)
self.assertEqual(6,len(g.matches))
def test_len(self):
g = Group()
self.assertEqual(0,len(g))
g.add_match(get_match(NamedObject("foo",True),NamedObject("bar",True)))
self.assertEqual(2,len(g))
def test_add_same_match_twice(self):
g = Group()
m = get_match(NamedObject("foo",True),NamedObject("foo",True))
g.add_match(m)
self.assertEqual(2,len(g))
self.assertEqual(1,len(g.matches))
g.add_match(m)
self.assertEqual(2,len(g))
self.assertEqual(1,len(g.matches))
def test_in(self):
g = Group()
o1 = NamedObject("foo",True)
o2 = NamedObject("bar",True)
self.assert_(o1 not in g)
g.add_match(get_match(o1,o2))
self.assert_(o1 in g)
self.assert_(o2 in g)
def test_remove(self):
g = Group()
o1 = NamedObject("foo",True)
o2 = NamedObject("bar",True)
o3 = NamedObject("bleh",True)
g.add_match(get_match(o1,o2))
g.add_match(get_match(o1,o3))
g.add_match(get_match(o2,o3))
self.assertEqual(3,len(g.matches))
self.assertEqual(3,len(g))
g.remove_dupe(o3)
self.assertEqual(1,len(g.matches))
self.assertEqual(2,len(g))
g.remove_dupe(o1)
self.assertEqual(0,len(g.matches))
self.assertEqual(0,len(g))
def test_remove_with_ref_dupes(self):
g = Group()
o1 = NamedObject("foo",True)
o2 = NamedObject("bar",True)
o3 = NamedObject("bleh",True)
g.add_match(get_match(o1,o2))
g.add_match(get_match(o1,o3))
g.add_match(get_match(o2,o3))
o1.is_ref = True
o2.is_ref = True
g.remove_dupe(o3)
self.assertEqual(0,len(g))
def test_switch_ref(self):
o1 = NamedObject(with_words=True)
o2 = NamedObject(with_words=True)
g = Group()
g.add_match(get_match(o1,o2))
self.assert_(o1 is g.ref)
g.switch_ref(o2)
self.assert_(o2 is g.ref)
self.assertEqual([o1],g.dupes)
g.switch_ref(o2)
self.assert_(o2 is g.ref)
g.switch_ref(NamedObject('',True))
self.assert_(o2 is g.ref)
def test_get_match_of(self):
g = Group()
for m in get_match_triangle():
g.add_match(m)
o = g.dupes[0]
m = g.get_match_of(o)
self.assert_(g.ref in m)
self.assert_(o in m)
self.assert_(g.get_match_of(NamedObject('',True)) is None)
self.assert_(g.get_match_of(g.ref) is None)
def test_percentage(self):
#percentage should return the avg percentage in relation to the ref
m1,m2,m3 = get_match_triangle()
m1 = Match(m1[0], m1[1], 100)
m2 = Match(m2[0], m2[1], 50)
m3 = Match(m3[0], m3[1], 33)
g = Group()
g.add_match(m1)
g.add_match(m2)
g.add_match(m3)
self.assertEqual(75,g.percentage)
g.switch_ref(g.dupes[0])
self.assertEqual(66,g.percentage)
g.remove_dupe(g.dupes[0])
self.assertEqual(33,g.percentage)
g.add_match(m1)
g.add_match(m2)
self.assertEqual(66,g.percentage)
def test_percentage_on_empty_group(self):
g = Group()
self.assertEqual(0,g.percentage)
def test_prioritize(self):
m1,m2,m3 = get_match_triangle()
o1 = m1.first
o2 = m1.second
o3 = m2.second
o1.name = 'c'
o2.name = 'b'
o3.name = 'a'
g = Group()
g.add_match(m1)
g.add_match(m2)
g.add_match(m3)
self.assert_(o1 is g.ref)
g.prioritize(lambda x:x.name)
self.assert_(o3 is g.ref)
def test_prioritize_with_tie_breaker(self):
# if the ref has the same key as one or more of the dupe, run the tie_breaker func among them
g = get_test_group()
o1, o2, o3 = g.ordered
tie_breaker = lambda ref, dupe: dupe is o3
g.prioritize(lambda x:0, tie_breaker)
self.assertTrue(g.ref is o3)
def test_prioritize_with_tie_breaker_runs_on_all_dupes(self):
# Even if a dupe is chosen to switch with ref with a tie breaker, we still run the tie breaker
# with other dupes and the newly chosen ref
g = get_test_group()
o1, o2, o3 = g.ordered
o1.foo = 1
o2.foo = 2
o3.foo = 3
tie_breaker = lambda ref, dupe: dupe.foo > ref.foo
g.prioritize(lambda x:0, tie_breaker)
self.assertTrue(g.ref is o3)
def test_prioritize_with_tie_breaker_runs_only_on_tie_dupes(self):
# The tie breaker only runs on dupes that had the same value for the key_func
g = get_test_group()
o1, o2, o3 = g.ordered
o1.foo = 2
o2.foo = 2
o3.foo = 1
o1.bar = 1
o2.bar = 2
o3.bar = 3
key_func = lambda x: -x.foo
tie_breaker = lambda ref, dupe: dupe.bar > ref.bar
g.prioritize(key_func, tie_breaker)
self.assertTrue(g.ref is o2)
def test_list_like(self):
g = Group()
o1,o2 = (NamedObject("foo",True),NamedObject("bar",True))
g.add_match(get_match(o1,o2))
self.assert_(g[0] is o1)
self.assert_(g[1] is o2)
def test_clean_matches(self):
g = Group()
o1,o2,o3 = (NamedObject("foo",True),NamedObject("bar",True),NamedObject("baz",True))
g.add_match(get_match(o1,o2))
g.add_match(get_match(o1,o3))
g.clean_matches()
self.assertEqual(1,len(g.matches))
self.assertEqual(0,len(g.candidates))
class TCget_groups(TestCase):
def test_empty(self):
r = get_groups([])
self.assertEqual([],r)
def test_simple(self):
l = [NamedObject("foo bar"),NamedObject("bar bleh")]
matches = MatchFactory().getmatches(l)
m = matches[0]
r = get_groups(matches)
self.assertEqual(1,len(r))
g = r[0]
self.assert_(g.ref is m.first)
self.assertEqual([m.second],g.dupes)
def test_group_with_multiple_matches(self):
#This results in 3 matches
l = [NamedObject("foo"),NamedObject("foo"),NamedObject("foo")]
matches = MatchFactory().getmatches(l)
r = get_groups(matches)
self.assertEqual(1,len(r))
g = r[0]
self.assertEqual(3,len(g))
def test_must_choose_a_group(self):
l = [NamedObject("a b"),NamedObject("a b"),NamedObject("b c"),NamedObject("c d"),NamedObject("c d")]
#There will be 2 groups here: group "a b" and group "c d"
#"b c" can go either of them, but not both.
matches = MatchFactory().getmatches(l)
r = get_groups(matches)
self.assertEqual(2,len(r))
self.assertEqual(5,len(r[0])+len(r[1]))
def test_should_all_go_in_the_same_group(self):
l = [NamedObject("a b"),NamedObject("a b"),NamedObject("a b"),NamedObject("a b")]
#There will be 2 groups here: group "a b" and group "c d"
#"b c" can fit in both, but it must be in only one of them
matches = MatchFactory().getmatches(l)
r = get_groups(matches)
self.assertEqual(1,len(r))
def test_give_priority_to_matches_with_higher_percentage(self):
o1 = NamedObject(with_words=True)
o2 = NamedObject(with_words=True)
o3 = NamedObject(with_words=True)
m1 = Match(o1, o2, 1)
m2 = Match(o2, o3, 2)
r = get_groups([m1,m2])
self.assertEqual(1,len(r))
g = r[0]
self.assertEqual(2,len(g))
self.assert_(o1 not in g)
self.assert_(o2 in g)
self.assert_(o3 in g)
def test_four_sized_group(self):
l = [NamedObject("foobar") for i in xrange(4)]
m = MatchFactory().getmatches(l)
r = get_groups(m)
self.assertEqual(1,len(r))
self.assertEqual(4,len(r[0]))
def test_referenced_by_ref2(self):
o1 = NamedObject(with_words=True)
o2 = NamedObject(with_words=True)
o3 = NamedObject(with_words=True)
m1 = get_match(o1,o2)
m2 = get_match(o3,o1)
m3 = get_match(o3,o2)
r = get_groups([m1,m2,m3])
self.assertEqual(3,len(r[0]))
def test_job(self):
def do_progress(p,d=''):
self.log.append(p)
return True
self.log = []
j = job.Job(1,do_progress)
m1,m2,m3 = get_match_triangle()
#101%: To make sure it is processed first so the job test works correctly
m4 = Match(NamedObject('a',True), NamedObject('a',True), 101)
get_groups([m1,m2,m3,m4],j)
self.assertEqual(0,self.log[0])
self.assertEqual(100,self.log[-1])

83
py/tests/export_test.py Normal file
View File

@@ -0,0 +1,83 @@
# Unit Name: dupeguru.tests.export_test
# Created By: Virgil Dupras
# Created On: 2006/09/16
# $Id$
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
from xml.dom import minidom
from StringIO import StringIO
from hsutil.testcase import TestCase
from .. import export
from ..export import *
class TCoutput_columns_xml(TestCase):
def test_empty_columns(self):
f = StringIO()
output_column_xml(f,[])
f.seek(0)
doc = minidom.parse(f)
root = doc.documentElement
self.assertEqual('columns',root.nodeName)
self.assertEqual(0,len(root.childNodes))
def test_some_columns(self):
f = StringIO()
output_column_xml(f,[('foo',True),('bar',False),('baz',True)])
f.seek(0)
doc = minidom.parse(f)
columns = doc.getElementsByTagName('column')
self.assertEqual(3,len(columns))
c1,c2,c3 = columns
self.assertEqual('foo',c1.getAttribute('display'))
self.assertEqual('bar',c2.getAttribute('display'))
self.assertEqual('baz',c3.getAttribute('display'))
self.assertEqual('y',c1.getAttribute('enabled'))
self.assertEqual('n',c2.getAttribute('enabled'))
self.assertEqual('y',c3.getAttribute('enabled'))
class TCmerge_css_into_xhtml(TestCase):
def test_main(self):
css = StringIO()
css.write('foobar')
css.seek(0)
xhtml = StringIO()
xhtml.write("""<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>dupeGuru - Duplicate file scanner</title>
<link rel="SHORTCUT ICON" href="/favicon.ico" />
<link rel="stylesheet" href="../hardcoded.css" type="text/css" />
</head>
<body>
</body>
</html>""")
xhtml.seek(0)
self.assert_(merge_css_into_xhtml(xhtml,css))
xhtml.seek(0)
doc = minidom.parse(xhtml)
head = doc.getElementsByTagName('head')[0]
#A style node should have been added in head.
styles = head.getElementsByTagName('style')
self.assertEqual(1,len(styles))
style = styles[0]
self.assertEqual('text/css',style.getAttribute('type'))
self.assertEqual('foobar',style.firstChild.nodeValue.strip())
#all <link rel="stylesheet"> should be removed
self.assertEqual(1,len(head.getElementsByTagName('link')))
def test_empty(self):
self.assert_(not merge_css_into_xhtml(StringIO(),StringIO()))
def test_malformed(self):
xhtml = StringIO()
xhtml.write("""<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">""")
xhtml.seek(0)
self.assert_(not merge_css_into_xhtml(xhtml,StringIO()))

149
py/tests/ignore_test.py Normal file
View File

@@ -0,0 +1,149 @@
# Unit Name: dupeguru.tests.ignore_test
# Created By: Virgil Dupras
# Created On: 2006/05/02
# $Id$
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
import cStringIO
import xml.dom.minidom
from nose.tools import eq_
from ..ignore import *
def test_empty():
il = IgnoreList()
eq_(0,len(il))
assert not il.AreIgnored('foo','bar')
def test_simple():
il = IgnoreList()
il.Ignore('foo','bar')
assert il.AreIgnored('foo','bar')
assert il.AreIgnored('bar','foo')
assert not il.AreIgnored('foo','bleh')
assert not il.AreIgnored('bleh','bar')
eq_(1,len(il))
def test_multiple():
il = IgnoreList()
il.Ignore('foo','bar')
il.Ignore('foo','bleh')
il.Ignore('bleh','bar')
il.Ignore('aybabtu','bleh')
assert il.AreIgnored('foo','bar')
assert il.AreIgnored('bar','foo')
assert il.AreIgnored('foo','bleh')
assert il.AreIgnored('bleh','bar')
assert not il.AreIgnored('aybabtu','bar')
eq_(4,len(il))
def test_clear():
il = IgnoreList()
il.Ignore('foo','bar')
il.Clear()
assert not il.AreIgnored('foo','bar')
assert not il.AreIgnored('bar','foo')
eq_(0,len(il))
def test_add_same_twice():
il = IgnoreList()
il.Ignore('foo','bar')
il.Ignore('bar','foo')
eq_(1,len(il))
def test_save_to_xml():
il = IgnoreList()
il.Ignore('foo','bar')
il.Ignore('foo','bleh')
il.Ignore('bleh','bar')
f = cStringIO.StringIO()
il.save_to_xml(f)
f.seek(0)
doc = xml.dom.minidom.parse(f)
root = doc.documentElement
eq_('ignore_list',root.nodeName)
children = [c for c in root.childNodes if c.localName]
eq_(2,len(children))
eq_(2,len([c for c in children if c.nodeName == 'file']))
f1,f2 = children
subchildren = [c for c in f1.childNodes if c.localName == 'file'] +\
[c for c in f2.childNodes if c.localName == 'file']
eq_(3,len(subchildren))
def test_SaveThenLoad():
il = IgnoreList()
il.Ignore('foo','bar')
il.Ignore('foo','bleh')
il.Ignore('bleh','bar')
il.Ignore(u'\u00e9','bar')
f = cStringIO.StringIO()
il.save_to_xml(f)
f.seek(0)
il = IgnoreList()
il.load_from_xml(f)
eq_(4,len(il))
assert il.AreIgnored(u'\u00e9','bar')
def test_LoadXML_with_empty_file_tags():
f = cStringIO.StringIO()
f.write('<?xml version="1.0" encoding="utf-8"?><ignore_list><file><file/></file></ignore_list>')
f.seek(0)
il = IgnoreList()
il.load_from_xml(f)
eq_(0,len(il))
def test_AreIgnore_works_when_a_child_is_a_key_somewhere_else():
il = IgnoreList()
il.Ignore('foo','bar')
il.Ignore('bar','baz')
assert il.AreIgnored('bar','foo')
def test_no_dupes_when_a_child_is_a_key_somewhere_else():
il = IgnoreList()
il.Ignore('foo','bar')
il.Ignore('bar','baz')
il.Ignore('bar','foo')
eq_(2,len(il))
def test_iterate():
#It must be possible to iterate through ignore list
il = IgnoreList()
expected = [('foo','bar'),('bar','baz'),('foo','baz')]
for i in expected:
il.Ignore(i[0],i[1])
for i in il:
expected.remove(i) #No exception should be raised
assert not expected #expected should be empty
def test_filter():
il = IgnoreList()
il.Ignore('foo','bar')
il.Ignore('bar','baz')
il.Ignore('foo','baz')
il.Filter(lambda f,s: f == 'bar')
eq_(1,len(il))
assert not il.AreIgnored('foo','bar')
assert il.AreIgnored('bar','baz')
def test_save_with_non_ascii_non_unicode_items():
il = IgnoreList()
il.Ignore('\xac','\xbf')
f = cStringIO.StringIO()
try:
il.save_to_xml(f)
except Exception as e:
raise AssertionError(unicode(e))
def test_len():
il = IgnoreList()
eq_(0,len(il))
il.Ignore('foo','bar')
eq_(1,len(il))
def test_nonzero():
il = IgnoreList()
assert not il
il.Ignore('foo','bar')
assert il

734
py/tests/results_test.py Normal file
View File

@@ -0,0 +1,734 @@
# Unit Name: dupeguru.tests.results_test
# Created By: Virgil Dupras
# Created On: 2006/02/23
# $Id$
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
import unittest
import StringIO
import xml.dom.minidom
import os.path as op
from hsutil.path import Path
from hsutil.testcase import TestCase
from hsutil.misc import first
from . import engine_test
from .. import data, engine
from ..results import *
class NamedObject(engine_test.NamedObject):
size = 1
path = property(lambda x:Path('basepath') + x.name)
is_ref = False
def __nonzero__(self):
return False #Make sure that operations are made correctly when the bool value of files is false.
# Returns a group set that looks like that:
# "foo bar" (1)
# "bar bleh" (1024)
# "foo bleh" (1)
# "ibabtu" (1)
# "ibabtu" (1)
def GetTestGroups():
objects = [NamedObject("foo bar"),NamedObject("bar bleh"),NamedObject("foo bleh"),NamedObject("ibabtu"),NamedObject("ibabtu")]
objects[1].size = 1024
matches = engine.MatchFactory().getmatches(objects) #we should have 5 matches
groups = engine.get_groups(matches) #We should have 2 groups
for g in groups:
g.prioritize(lambda x:objects.index(x)) #We want the dupes to be in the same order as the list is
groups.sort(key=len, reverse=True) # We want the group with 3 members to be first.
return (objects,matches,groups)
class TCResultsEmpty(TestCase):
def setUp(self):
self.results = Results(data)
def test_stat_line(self):
self.assertEqual("0 / 0 (0.00 B / 0.00 B) duplicates marked.",self.results.stat_line)
def test_groups(self):
self.assertEqual(0,len(self.results.groups))
def test_get_group_of_duplicate(self):
self.assert_(self.results.get_group_of_duplicate('foo') is None)
def test_save_to_xml(self):
f = StringIO.StringIO()
self.results.save_to_xml(f)
f.seek(0)
doc = xml.dom.minidom.parse(f)
root = doc.documentElement
self.assertEqual('results',root.nodeName)
class TCResultsWithSomeGroups(TestCase):
def setUp(self):
self.results = Results(data)
self.objects,self.matches,self.groups = GetTestGroups()
self.results.groups = self.groups
def test_stat_line(self):
self.assertEqual("0 / 3 (0.00 B / 1.01 KB) duplicates marked.",self.results.stat_line)
def test_groups(self):
self.assertEqual(2,len(self.results.groups))
def test_get_group_of_duplicate(self):
for o in self.objects:
g = self.results.get_group_of_duplicate(o)
self.assert_(isinstance(g, engine.Group))
self.assert_(o in g)
self.assert_(self.results.get_group_of_duplicate(self.groups[0]) is None)
def test_remove_duplicates(self):
g1,g2 = self.results.groups
self.results.remove_duplicates([g1.dupes[0]])
self.assertEqual(2,len(g1))
self.assert_(g1 in self.results.groups)
self.results.remove_duplicates([g1.ref])
self.assertEqual(2,len(g1))
self.assert_(g1 in self.results.groups)
self.results.remove_duplicates([g1.dupes[0]])
self.assertEqual(0,len(g1))
self.assert_(g1 not in self.results.groups)
self.results.remove_duplicates([g2.dupes[0]])
self.assertEqual(0,len(g2))
self.assert_(g2 not in self.results.groups)
self.assertEqual(0,len(self.results.groups))
def test_remove_duplicates_with_ref_files(self):
g1,g2 = self.results.groups
self.objects[0].is_ref = True
self.objects[1].is_ref = True
self.results.remove_duplicates([self.objects[2]])
self.assertEqual(0,len(g1))
self.assert_(g1 not in self.results.groups)
def test_make_ref(self):
g = self.results.groups[0]
d = g.dupes[0]
self.results.make_ref(d)
self.assert_(d is g.ref)
def test_sort_groups(self):
self.results.make_ref(self.objects[1]) #We want to make the 1024 sized object to go ref.
g1,g2 = self.groups
self.results.sort_groups(2) #2 is the key for size
self.assert_(self.results.groups[0] is g2)
self.assert_(self.results.groups[1] is g1)
self.results.sort_groups(2,False)
self.assert_(self.results.groups[0] is g1)
self.assert_(self.results.groups[1] is g2)
def test_set_groups_when_sorted(self):
self.results.make_ref(self.objects[1]) #We want to make the 1024 sized object to go ref.
self.results.sort_groups(2)
objects,matches,groups = GetTestGroups()
g1,g2 = groups
g1.switch_ref(objects[1])
self.results.groups = groups
self.assert_(self.results.groups[0] is g2)
self.assert_(self.results.groups[1] is g1)
def test_get_dupe_list(self):
self.assertEqual([self.objects[1],self.objects[2],self.objects[4]],self.results.dupes)
def test_dupe_list_is_cached(self):
self.assert_(self.results.dupes is self.results.dupes)
def test_dupe_list_cache_is_invalidated_when_needed(self):
o1,o2,o3,o4,o5 = self.objects
self.assertEqual([o2,o3,o5],self.results.dupes)
self.results.make_ref(o2)
self.assertEqual([o1,o3,o5],self.results.dupes)
objects,matches,groups = GetTestGroups()
o1,o2,o3,o4,o5 = objects
self.results.groups = groups
self.assertEqual([o2,o3,o5],self.results.dupes)
def test_dupe_list_sort(self):
o1,o2,o3,o4,o5 = self.objects
o1.size = 5
o2.size = 4
o3.size = 3
o4.size = 2
o5.size = 1
self.results.sort_dupes(2)
self.assertEqual([o5,o3,o2],self.results.dupes)
self.results.sort_dupes(2,False)
self.assertEqual([o2,o3,o5],self.results.dupes)
def test_dupe_list_remember_sort(self):
o1,o2,o3,o4,o5 = self.objects
o1.size = 5
o2.size = 4
o3.size = 3
o4.size = 2
o5.size = 1
self.results.sort_dupes(2)
self.results.make_ref(o2)
self.assertEqual([o5,o3,o1],self.results.dupes)
def test_dupe_list_sort_delta_values(self):
o1,o2,o3,o4,o5 = self.objects
o1.size = 10
o2.size = 2 #-8
o3.size = 3 #-7
o4.size = 20
o5.size = 1 #-19
self.results.sort_dupes(2,delta=True)
self.assertEqual([o5,o2,o3],self.results.dupes)
def test_sort_empty_list(self):
#There was an infinite loop when sorting an empty list.
r = Results(data)
r.sort_dupes(0)
self.assertEqual([],r.dupes)
def test_dupe_list_update_on_remove_duplicates(self):
o1,o2,o3,o4,o5 = self.objects
self.assertEqual(3,len(self.results.dupes))
self.results.remove_duplicates([o2])
self.assertEqual(2,len(self.results.dupes))
class TCResultsMarkings(TestCase):
def setUp(self):
self.results = Results(data)
self.objects,self.matches,self.groups = GetTestGroups()
self.results.groups = self.groups
def test_stat_line(self):
self.assertEqual("0 / 3 (0.00 B / 1.01 KB) duplicates marked.",self.results.stat_line)
self.results.mark(self.objects[1])
self.assertEqual("1 / 3 (1.00 KB / 1.01 KB) duplicates marked.",self.results.stat_line)
self.results.mark_invert()
self.assertEqual("2 / 3 (2.00 B / 1.01 KB) duplicates marked.",self.results.stat_line)
self.results.mark_invert()
self.results.unmark(self.objects[1])
self.results.mark(self.objects[2])
self.results.mark(self.objects[4])
self.assertEqual("2 / 3 (2.00 B / 1.01 KB) duplicates marked.",self.results.stat_line)
self.results.mark(self.objects[0]) #this is a ref, it can't be counted
self.assertEqual("2 / 3 (2.00 B / 1.01 KB) duplicates marked.",self.results.stat_line)
self.results.groups = self.groups
self.assertEqual("0 / 3 (0.00 B / 1.01 KB) duplicates marked.",self.results.stat_line)
def test_with_ref_duplicate(self):
self.objects[1].is_ref = True
self.results.groups = self.groups
self.assert_(not self.results.mark(self.objects[1]))
self.results.mark(self.objects[2])
self.assertEqual("1 / 2 (1.00 B / 2.00 B) duplicates marked.",self.results.stat_line)
def test_perform_on_marked(self):
def log_object(o):
log.append(o)
return True
log = []
self.results.mark_all()
self.results.perform_on_marked(log_object,False)
self.assert_(self.objects[1] in log)
self.assert_(self.objects[2] in log)
self.assert_(self.objects[4] in log)
self.assertEqual(3,len(log))
log = []
self.results.mark_none()
self.results.mark(self.objects[4])
self.results.perform_on_marked(log_object,True)
self.assertEqual(1,len(log))
self.assert_(self.objects[4] in log)
self.assertEqual(1,len(self.results.groups))
def test_perform_on_marked_with_problems(self):
def log_object(o):
log.append(o)
return o is not self.objects[1]
log = []
self.results.mark_all()
self.assert_(self.results.is_marked(self.objects[1]))
self.assertEqual(1,self.results.perform_on_marked(log_object, True))
self.assertEqual(3,len(log))
self.assertEqual(1,len(self.results.groups))
self.assertEqual(2,len(self.results.groups[0]))
self.assert_(self.objects[1] in self.results.groups[0])
self.assert_(not self.results.is_marked(self.objects[2]))
self.assert_(self.results.is_marked(self.objects[1]))
def test_perform_on_marked_with_ref(self):
def log_object(o):
log.append(o)
return True
log = []
self.objects[0].is_ref = True
self.objects[1].is_ref = True
self.results.mark_all()
self.results.perform_on_marked(log_object,True)
self.assert_(self.objects[1] not in log)
self.assert_(self.objects[2] in log)
self.assert_(self.objects[4] in log)
self.assertEqual(2,len(log))
self.assertEqual(0,len(self.results.groups))
def test_perform_on_marked_remove_objects_only_at_the_end(self):
def check_groups(o):
self.assertEqual(3,len(g1))
self.assertEqual(2,len(g2))
return True
g1,g2 = self.results.groups
self.results.mark_all()
self.results.perform_on_marked(check_groups,True)
self.assertEqual(0,len(g1))
self.assertEqual(0,len(g2))
self.assertEqual(0,len(self.results.groups))
def test_remove_duplicates(self):
g1 = self.results.groups[0]
g2 = self.results.groups[1]
self.results.mark(g1.dupes[0])
self.assertEqual("1 / 3 (1.00 KB / 1.01 KB) duplicates marked.",self.results.stat_line)
self.results.remove_duplicates([g1.dupes[1]])
self.assertEqual("1 / 2 (1.00 KB / 1.01 KB) duplicates marked.",self.results.stat_line)
self.results.remove_duplicates([g1.dupes[0]])
self.assertEqual("0 / 1 (0.00 B / 1.00 B) duplicates marked.",self.results.stat_line)
def test_make_ref(self):
g = self.results.groups[0]
d = g.dupes[0]
self.results.mark(d)
self.assertEqual("1 / 3 (1.00 KB / 1.01 KB) duplicates marked.",self.results.stat_line)
self.results.make_ref(d)
self.assertEqual("0 / 3 (0.00 B / 3.00 B) duplicates marked.",self.results.stat_line)
self.results.make_ref(d)
self.assertEqual("0 / 3 (0.00 B / 3.00 B) duplicates marked.",self.results.stat_line)
def test_SaveXML(self):
self.results.mark(self.objects[1])
self.results.mark_invert()
f = StringIO.StringIO()
self.results.save_to_xml(f)
f.seek(0)
doc = xml.dom.minidom.parse(f)
root = doc.documentElement
g1,g2 = root.getElementsByTagName('group')
d1,d2,d3 = g1.getElementsByTagName('file')
self.assertEqual('n',d1.getAttributeNode('marked').nodeValue)
self.assertEqual('n',d2.getAttributeNode('marked').nodeValue)
self.assertEqual('y',d3.getAttributeNode('marked').nodeValue)
d1,d2 = g2.getElementsByTagName('file')
self.assertEqual('n',d1.getAttributeNode('marked').nodeValue)
self.assertEqual('y',d2.getAttributeNode('marked').nodeValue)
def test_LoadXML(self):
def get_file(path):
return [f for f in self.objects if str(f.path) == path][0]
self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
self.results.mark(self.objects[1])
self.results.mark_invert()
f = StringIO.StringIO()
self.results.save_to_xml(f)
f.seek(0)
r = Results(data)
r.load_from_xml(f,get_file)
self.assert_(not r.is_marked(self.objects[0]))
self.assert_(not r.is_marked(self.objects[1]))
self.assert_(r.is_marked(self.objects[2]))
self.assert_(not r.is_marked(self.objects[3]))
self.assert_(r.is_marked(self.objects[4]))
class TCResultsXML(TestCase):
def setUp(self):
self.results = Results(data)
self.objects, self.matches, self.groups = GetTestGroups()
self.results.groups = self.groups
def get_file(self, path): # use this as a callback for load_from_xml
return [o for o in self.objects if o.path == path][0]
def test_save_to_xml(self):
self.objects[0].is_ref = True
self.objects[0].words = [['foo','bar']]
f = StringIO.StringIO()
self.results.save_to_xml(f)
f.seek(0)
doc = xml.dom.minidom.parse(f)
root = doc.documentElement
self.assertEqual('results',root.nodeName)
children = [c for c in root.childNodes if c.localName]
self.assertEqual(2,len(children))
self.assertEqual(2,len([c for c in children if c.nodeName == 'group']))
g1,g2 = children
children = [c for c in g1.childNodes if c.localName]
self.assertEqual(6,len(children))
self.assertEqual(3,len([c for c in children if c.nodeName == 'file']))
self.assertEqual(3,len([c for c in children if c.nodeName == 'match']))
d1,d2,d3 = [c for c in children if c.nodeName == 'file']
self.assertEqual(op.join('basepath','foo bar'),d1.getAttributeNode('path').nodeValue)
self.assertEqual(op.join('basepath','bar bleh'),d2.getAttributeNode('path').nodeValue)
self.assertEqual(op.join('basepath','foo bleh'),d3.getAttributeNode('path').nodeValue)
self.assertEqual('y',d1.getAttributeNode('is_ref').nodeValue)
self.assertEqual('n',d2.getAttributeNode('is_ref').nodeValue)
self.assertEqual('n',d3.getAttributeNode('is_ref').nodeValue)
self.assertEqual('foo,bar',d1.getAttributeNode('words').nodeValue)
self.assertEqual('bar,bleh',d2.getAttributeNode('words').nodeValue)
self.assertEqual('foo,bleh',d3.getAttributeNode('words').nodeValue)
children = [c for c in g2.childNodes if c.localName]
self.assertEqual(3,len(children))
self.assertEqual(2,len([c for c in children if c.nodeName == 'file']))
self.assertEqual(1,len([c for c in children if c.nodeName == 'match']))
d1,d2 = [c for c in children if c.nodeName == 'file']
self.assertEqual(op.join('basepath','ibabtu'),d1.getAttributeNode('path').nodeValue)
self.assertEqual(op.join('basepath','ibabtu'),d2.getAttributeNode('path').nodeValue)
self.assertEqual('n',d1.getAttributeNode('is_ref').nodeValue)
self.assertEqual('n',d2.getAttributeNode('is_ref').nodeValue)
self.assertEqual('ibabtu',d1.getAttributeNode('words').nodeValue)
self.assertEqual('ibabtu',d2.getAttributeNode('words').nodeValue)
def test_save_to_xml_with_columns(self):
class FakeDataModule:
def GetDisplayInfo(self,dupe,group):
return [str(dupe.size),dupe.foo.upper()]
for i,object in enumerate(self.objects):
object.size = i
object.foo = u'bar\u00e9'
f = StringIO.StringIO()
self.results.data = FakeDataModule()
self.results.save_to_xml(f,True)
f.seek(0)
doc = xml.dom.minidom.parse(f)
root = doc.documentElement
g1,g2 = root.getElementsByTagName('group')
d1,d2,d3 = g1.getElementsByTagName('file')
d4,d5 = g2.getElementsByTagName('file')
self.assertEqual('0',d1.getElementsByTagName('data')[0].getAttribute('value'))
self.assertEqual(u'BAR\u00c9',d1.getElementsByTagName('data')[1].getAttribute('value')) #\u00c9 is upper of \u00e9
self.assertEqual('1',d2.getElementsByTagName('data')[0].getAttribute('value'))
self.assertEqual('2',d3.getElementsByTagName('data')[0].getAttribute('value'))
self.assertEqual('3',d4.getElementsByTagName('data')[0].getAttribute('value'))
self.assertEqual('4',d5.getElementsByTagName('data')[0].getAttribute('value'))
def test_LoadXML(self):
def get_file(path):
return [f for f in self.objects if str(f.path) == path][0]
self.objects[0].is_ref = True
self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
f = StringIO.StringIO()
self.results.save_to_xml(f)
f.seek(0)
r = Results(data)
r.load_from_xml(f,get_file)
self.assertEqual(2,len(r.groups))
g1,g2 = r.groups
self.assertEqual(3,len(g1))
self.assert_(g1[0].is_ref)
self.assert_(not g1[1].is_ref)
self.assert_(not g1[2].is_ref)
self.assert_(g1[0] is self.objects[0])
self.assert_(g1[1] is self.objects[1])
self.assert_(g1[2] is self.objects[2])
self.assertEqual(['foo','bar'],g1[0].words)
self.assertEqual(['bar','bleh'],g1[1].words)
self.assertEqual(['foo','bleh'],g1[2].words)
self.assertEqual(2,len(g2))
self.assert_(not g2[0].is_ref)
self.assert_(not g2[1].is_ref)
self.assert_(g2[0] is self.objects[3])
self.assert_(g2[1] is self.objects[4])
self.assertEqual(['ibabtu'],g2[0].words)
self.assertEqual(['ibabtu'],g2[1].words)
def test_LoadXML_with_filename(self):
def get_file(path):
return [f for f in self.objects if str(f.path) == path][0]
filename = op.join(self.tmpdir(), 'dupeguru_results.xml')
self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
self.results.save_to_xml(filename)
r = Results(data)
r.load_from_xml(filename,get_file)
self.assertEqual(2,len(r.groups))
def test_LoadXML_with_some_files_that_dont_exist_anymore(self):
def get_file(path):
if path.endswith('ibabtu 2'):
return None
return [f for f in self.objects if str(f.path) == path][0]
self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
f = StringIO.StringIO()
self.results.save_to_xml(f)
f.seek(0)
r = Results(data)
r.load_from_xml(f,get_file)
self.assertEqual(1,len(r.groups))
self.assertEqual(3,len(r.groups[0]))
def test_LoadXML_missing_attributes_and_bogus_elements(self):
def get_file(path):
return [f for f in self.objects if str(f.path) == path][0]
doc = xml.dom.minidom.Document()
root = doc.appendChild(doc.createElement('foobar')) #The root element shouldn't matter, really.
group_node = root.appendChild(doc.createElement('group'))
dupe_node = group_node.appendChild(doc.createElement('file')) #Perfectly correct file
dupe_node.setAttribute('path',op.join('basepath','foo bar'))
dupe_node.setAttribute('is_ref','y')
dupe_node.setAttribute('words','foo,bar')
dupe_node = group_node.appendChild(doc.createElement('file')) #is_ref missing, default to 'n'
dupe_node.setAttribute('path',op.join('basepath','foo bleh'))
dupe_node.setAttribute('words','foo,bleh')
dupe_node = group_node.appendChild(doc.createElement('file')) #words are missing, invalid.
dupe_node.setAttribute('path',op.join('basepath','bar bleh'))
dupe_node = group_node.appendChild(doc.createElement('file')) #path is missing, invalid.
dupe_node.setAttribute('words','foo,bleh')
dupe_node = group_node.appendChild(doc.createElement('foobar')) #Invalid element name
dupe_node.setAttribute('path',op.join('basepath','bar bleh'))
dupe_node.setAttribute('is_ref','y')
dupe_node.setAttribute('words','bar,bleh')
match_node = group_node.appendChild(doc.createElement('match')) # match pointing to a bad index
match_node.setAttribute('first', '42')
match_node.setAttribute('second', '45')
match_node = group_node.appendChild(doc.createElement('match')) # match with missing attrs
match_node = group_node.appendChild(doc.createElement('match')) # match with non-int values
match_node.setAttribute('first', 'foo')
match_node.setAttribute('second', 'bar')
match_node.setAttribute('percentage', 'baz')
group_node = root.appendChild(doc.createElement('foobar')) #invalid group
group_node = root.appendChild(doc.createElement('group')) #empty group
f = StringIO.StringIO()
doc.writexml(f,'\t','\t','\n',encoding='utf-8')
f.seek(0)
r = Results(data)
r.load_from_xml(f,get_file)
self.assertEqual(1,len(r.groups))
self.assertEqual(2,len(r.groups[0]))
def test_xml_non_ascii(self):
def get_file(path):
if path == op.join('basepath',u'\xe9foo bar'):
return objects[0]
if path == op.join('basepath',u'bar bleh'):
return objects[1]
objects = [NamedObject(u"\xe9foo bar",True),NamedObject("bar bleh",True)]
matches = engine.MatchFactory().getmatches(objects) #we should have 5 matches
groups = engine.get_groups(matches) #We should have 2 groups
for g in groups:
g.prioritize(lambda x:objects.index(x)) #We want the dupes to be in the same order as the list is
results = Results(data)
results.groups = groups
f = StringIO.StringIO()
results.save_to_xml(f)
f.seek(0)
r = Results(data)
r.load_from_xml(f,get_file)
g = r.groups[0]
self.assertEqual(u"\xe9foo bar",g[0].name)
self.assertEqual(['efoo','bar'],g[0].words)
def test_load_invalid_xml(self):
f = StringIO.StringIO()
f.write('<this is invalid')
f.seek(0)
r = Results(data)
r.load_from_xml(f,None)
self.assertEqual(0,len(r.groups))
def test_load_non_existant_xml(self):
r = Results(data)
try:
r.load_from_xml('does_not_exist.xml', None)
except IOError:
self.fail()
self.assertEqual(0,len(r.groups))
def test_remember_match_percentage(self):
group = self.groups[0]
d1, d2, d3 = group
fake_matches = set()
fake_matches.add(engine.Match(d1, d2, 42))
fake_matches.add(engine.Match(d1, d3, 43))
fake_matches.add(engine.Match(d2, d3, 46))
group.matches = fake_matches
f = StringIO.StringIO()
results = self.results
results.save_to_xml(f)
f.seek(0)
results = Results(data)
results.load_from_xml(f, self.get_file)
group = results.groups[0]
d1, d2, d3 = group
match = group.get_match_of(d2) #d1 - d2
self.assertEqual(42, match[2])
match = group.get_match_of(d3) #d1 - d3
self.assertEqual(43, match[2])
group.switch_ref(d2)
match = group.get_match_of(d3) #d2 - d3
self.assertEqual(46, match[2])
def test_save_and_load(self):
# previously, when reloading matches, they wouldn't be reloaded as namedtuples
f = StringIO.StringIO()
self.results.save_to_xml(f)
f.seek(0)
self.results.load_from_xml(f, self.get_file)
first(self.results.groups[0].matches).percentage
class TCResultsFilter(TestCase):
def setUp(self):
self.results = Results(data)
self.objects, self.matches, self.groups = GetTestGroups()
self.results.groups = self.groups
self.results.apply_filter(r'foo')
def test_groups(self):
self.assertEqual(1, len(self.results.groups))
self.assert_(self.results.groups[0] is self.groups[0])
def test_dupes(self):
# There are 2 objects matching. The first one is ref. Only the 3rd one is supposed to be in dupes.
self.assertEqual(1, len(self.results.dupes))
self.assert_(self.results.dupes[0] is self.objects[2])
def test_cancel_filter(self):
self.results.apply_filter(None)
self.assertEqual(3, len(self.results.dupes))
self.assertEqual(2, len(self.results.groups))
def test_dupes_reconstructed_filtered(self):
# make_ref resets self.__dupes to None. When it's reconstructed, we want it filtered
dupe = self.results.dupes[0] #3rd object
self.results.make_ref(dupe)
self.assertEqual(1, len(self.results.dupes))
self.assert_(self.results.dupes[0] is self.objects[0])
def test_include_ref_dupes_in_filter(self):
# When only the ref of a group match the filter, include it in the group
self.results.apply_filter(None)
self.results.apply_filter(r'foo bar')
self.assertEqual(1, len(self.results.groups))
self.assertEqual(0, len(self.results.dupes))
def test_filters_build_on_one_another(self):
self.results.apply_filter(r'bar')
self.assertEqual(1, len(self.results.groups))
self.assertEqual(0, len(self.results.dupes))
def test_stat_line(self):
expected = '0 / 1 (0.00 B / 1.00 B) duplicates marked. filter: foo'
self.assertEqual(expected, self.results.stat_line)
self.results.apply_filter(r'bar')
expected = '0 / 0 (0.00 B / 0.00 B) duplicates marked. filter: foo --> bar'
self.assertEqual(expected, self.results.stat_line)
self.results.apply_filter(None)
expected = '0 / 3 (0.00 B / 1.01 KB) duplicates marked.'
self.assertEqual(expected, self.results.stat_line)
def test_mark_count_is_filtered_as_well(self):
self.results.apply_filter(None)
# We don't want to perform mark_all() because we want the mark list to contain objects
for dupe in self.results.dupes:
self.results.mark(dupe)
self.results.apply_filter(r'foo')
expected = '1 / 1 (1.00 B / 1.00 B) duplicates marked. filter: foo'
self.assertEqual(expected, self.results.stat_line)
def test_sort_groups(self):
self.results.apply_filter(None)
self.results.make_ref(self.objects[1]) # to have the 1024 b obkect as ref
g1,g2 = self.groups
self.results.apply_filter('a') # Matches both group
self.results.sort_groups(2) #2 is the key for size
self.assert_(self.results.groups[0] is g2)
self.assert_(self.results.groups[1] is g1)
self.results.apply_filter(None)
self.assert_(self.results.groups[0] is g2)
self.assert_(self.results.groups[1] is g1)
self.results.sort_groups(2, False)
self.results.apply_filter('a')
self.assert_(self.results.groups[1] is g2)
self.assert_(self.results.groups[0] is g1)
def test_set_group(self):
#We want the new group to be filtered
self.objects, self.matches, self.groups = GetTestGroups()
self.results.groups = self.groups
self.assertEqual(1, len(self.results.groups))
self.assert_(self.results.groups[0] is self.groups[0])
def test_load_cancels_filter(self):
def get_file(path):
return [f for f in self.objects if str(f.path) == path][0]
filename = op.join(self.tmpdir(), 'dupeguru_results.xml')
self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
self.results.save_to_xml(filename)
r = Results(data)
r.apply_filter('foo')
r.load_from_xml(filename,get_file)
self.assertEqual(2,len(r.groups))
def test_remove_dupe(self):
self.results.remove_duplicates([self.results.dupes[0]])
self.results.apply_filter(None)
self.assertEqual(2,len(self.results.groups))
self.assertEqual(2,len(self.results.dupes))
self.results.apply_filter('ibabtu')
self.results.remove_duplicates([self.results.dupes[0]])
self.results.apply_filter(None)
self.assertEqual(1,len(self.results.groups))
self.assertEqual(1,len(self.results.dupes))
def test_filter_is_case_insensitive(self):
self.results.apply_filter(None)
self.results.apply_filter('FOO')
self.assertEqual(1, len(self.results.dupes))
def test_make_ref_on_filtered_out_doesnt_mess_stats(self):
# When filtered, a group containing filtered out dupes will display them as being reference.
# When calling make_ref on such a dupe, the total size and dupecount stats gets messed up
# because they are *not* counted in the stats in the first place.
g1, g2 = self.groups
bar_bleh = g1[1] # The "bar bleh" dupe is filtered out
self.results.make_ref(bar_bleh)
# Now the stats should display *2* markable dupes (instead of 1)
expected = '0 / 2 (0.00 B / 2.00 B) duplicates marked. filter: foo'
self.assertEqual(expected, self.results.stat_line)
self.results.apply_filter(None) # Now let's make sure our unfiltered results aren't fucked up
expected = '0 / 3 (0.00 B / 3.00 B) duplicates marked.'
self.assertEqual(expected, self.results.stat_line)
class TCResultsRefFile(TestCase):
def setUp(self):
self.results = Results(data)
self.objects, self.matches, self.groups = GetTestGroups()
self.objects[0].is_ref = True
self.objects[1].is_ref = True
self.results.groups = self.groups
def test_stat_line(self):
expected = '0 / 2 (0.00 B / 2.00 B) duplicates marked.'
self.assertEqual(expected, self.results.stat_line)
def test_make_ref(self):
d = self.results.groups[0].dupes[1] #non-ref
r = self.results.groups[0].ref
self.results.make_ref(d)
expected = '0 / 1 (0.00 B / 1.00 B) duplicates marked.'
self.assertEqual(expected, self.results.stat_line)
self.results.make_ref(r)
expected = '0 / 2 (0.00 B / 2.00 B) duplicates marked.'
self.assertEqual(expected, self.results.stat_line)

459
py/tests/scanner_test.py Normal file
View File

@@ -0,0 +1,459 @@
# Unit Name: dupeguru.tests.scanner_test
# Created By: Virgil Dupras
# Created On: 2006/03/03
# $Id$
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
from hsutil import job
from hsutil.path import Path
from hsutil.testcase import TestCase
from ..engine import getwords, Match
from ..ignore import IgnoreList
from ..scanner import *
class NamedObject(object):
def __init__(self, name="foobar", size=1):
self.name = name
self.size = size
self.path = Path('')
self.words = getwords(name)
no = NamedObject
class TCScanner(TestCase):
def test_empty(self):
s = Scanner()
r = s.GetDupeGroups([])
self.assertEqual([],r)
def test_default_settings(self):
s = Scanner()
self.assertEqual(80,s.min_match_percentage)
self.assertEqual(SCAN_TYPE_FILENAME,s.scan_type)
self.assertEqual(True,s.mix_file_kind)
self.assertEqual(False,s.word_weighting)
self.assertEqual(False,s.match_similar_words)
self.assert_(isinstance(s.ignore_list,IgnoreList))
def test_simple_with_default_settings(self):
s = Scanner()
f = [no('foo bar'),no('foo bar'),no('foo bleh')]
r = s.GetDupeGroups(f)
self.assertEqual(1,len(r))
g = r[0]
#'foo bleh' cannot be in the group because the default min match % is 80
self.assertEqual(2,len(g))
self.assert_(g.ref in f[:2])
self.assert_(g.dupes[0] in f[:2])
def test_simple_with_lower_min_match(self):
s = Scanner()
s.min_match_percentage = 50
f = [no('foo bar'),no('foo bar'),no('foo bleh')]
r = s.GetDupeGroups(f)
self.assertEqual(1,len(r))
g = r[0]
self.assertEqual(3,len(g))
def test_trim_all_ref_groups(self):
s = Scanner()
f = [no('foo'),no('foo'),no('bar'),no('bar')]
f[2].is_ref = True
f[3].is_ref = True
r = s.GetDupeGroups(f)
self.assertEqual(1,len(r))
def test_priorize(self):
s = Scanner()
f = [no('foo'),no('foo'),no('bar'),no('bar')]
f[1].size = 2
f[2].size = 3
f[3].is_ref = True
r = s.GetDupeGroups(f)
g1,g2 = r
self.assert_(f[1] in (g1.ref,g2.ref))
self.assert_(f[0] in (g1.dupes[0],g2.dupes[0]))
self.assert_(f[3] in (g1.ref,g2.ref))
self.assert_(f[2] in (g1.dupes[0],g2.dupes[0]))
def test_content_scan(self):
s = Scanner()
s.scan_type = SCAN_TYPE_CONTENT
f = [no('foo'), no('bar'), no('bleh')]
f[0].md5 = 'foobar'
f[1].md5 = 'foobar'
f[2].md5 = 'bleh'
r = s.GetDupeGroups(f)
self.assertEqual(len(r), 1)
self.assertEqual(len(r[0]), 2)
self.assertEqual(s.discarded_file_count, 0) # don't count the different md5 as discarded!
def test_content_scan_compare_sizes_first(self):
class MyFile(no):
def get_md5(file):
self.fail()
md5 = property(get_md5)
s = Scanner()
s.scan_type = SCAN_TYPE_CONTENT
f = [MyFile('foo',1),MyFile('bar',2)]
self.assertEqual(0,len(s.GetDupeGroups(f)))
def test_min_match_perc_doesnt_matter_for_content_scan(self):
s = Scanner()
s.scan_type = SCAN_TYPE_CONTENT
f = [no('foo'),no('bar'),no('bleh')]
f[0].md5 = 'foobar'
f[1].md5 = 'foobar'
f[2].md5 = 'bleh'
s.min_match_percentage = 101
r = s.GetDupeGroups(f)
self.assertEqual(1,len(r))
self.assertEqual(2,len(r[0]))
s.min_match_percentage = 0
r = s.GetDupeGroups(f)
self.assertEqual(1,len(r))
self.assertEqual(2,len(r[0]))
def test_content_scan_puts_md5_in_words_at_the_end(self):
s = Scanner()
s.scan_type = SCAN_TYPE_CONTENT
f = [no('foo'),no('bar')]
f[0].md5 = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
f[1].md5 = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
r = s.GetDupeGroups(f)
g = r[0]
self.assertEqual(['--'],g.ref.words)
self.assertEqual(['--'],g.dupes[0].words)
def test_extension_is_not_counted_in_filename_scan(self):
s = Scanner()
s.min_match_percentage = 100
f = [no('foo.bar'),no('foo.bleh')]
r = s.GetDupeGroups(f)
self.assertEqual(1,len(r))
self.assertEqual(2,len(r[0]))
def test_job(self):
def do_progress(progress,desc=''):
log.append(progress)
return True
s = Scanner()
log = []
f = [no('foo bar'),no('foo bar'),no('foo bleh')]
r = s.GetDupeGroups(f, job.Job(1,do_progress))
self.assertEqual(0,log[0])
self.assertEqual(100,log[-1])
def test_mix_file_kind(self):
s = Scanner()
s.mix_file_kind = False
f = [no('foo.1'),no('foo.2')]
r = s.GetDupeGroups(f)
self.assertEqual(0,len(r))
def test_word_weighting(self):
s = Scanner()
s.min_match_percentage = 75
s.word_weighting = True
f = [no('foo bar'),no('foo bar bleh')]
r = s.GetDupeGroups(f)
self.assertEqual(1,len(r))
g = r[0]
m = g.get_match_of(g.dupes[0])
self.assertEqual(75,m.percentage) # 16 letters, 12 matching
def test_similar_words(self):
s = Scanner()
s.match_similar_words = True
f = [no('The White Stripes'),no('The Whites Stripe'),no('Limp Bizkit'),no('Limp Bizkitt')]
r = s.GetDupeGroups(f)
self.assertEqual(2,len(r))
def test_fields(self):
s = Scanner()
s.scan_type = SCAN_TYPE_FIELDS
f = [no('The White Stripes - Little Ghost'),no('The White Stripes - Little Acorn')]
r = s.GetDupeGroups(f)
self.assertEqual(0,len(r))
def test_fields_no_order(self):
s = Scanner()
s.scan_type = SCAN_TYPE_FIELDS_NO_ORDER
f = [no('The White Stripes - Little Ghost'),no('Little Ghost - The White Stripes')]
r = s.GetDupeGroups(f)
self.assertEqual(1,len(r))
def test_tag_scan(self):
s = Scanner()
s.scan_type = SCAN_TYPE_TAG
o1 = no('foo')
o2 = no('bar')
o1.artist = 'The White Stripes'
o1.title = 'The Air Near My Fingers'
o2.artist = 'The White Stripes'
o2.title = 'The Air Near My Fingers'
r = s.GetDupeGroups([o1,o2])
self.assertEqual(1,len(r))
def test_tag_with_album_scan(self):
s = Scanner()
s.scan_type = SCAN_TYPE_TAG_WITH_ALBUM
o1 = no('foo')
o2 = no('bar')
o3 = no('bleh')
o1.artist = 'The White Stripes'
o1.title = 'The Air Near My Fingers'
o1.album = 'Elephant'
o2.artist = 'The White Stripes'
o2.title = 'The Air Near My Fingers'
o2.album = 'Elephant'
o3.artist = 'The White Stripes'
o3.title = 'The Air Near My Fingers'
o3.album = 'foobar'
r = s.GetDupeGroups([o1,o2,o3])
self.assertEqual(1,len(r))
def test_that_dash_in_tags_dont_create_new_fields(self):
s = Scanner()
s.scan_type = SCAN_TYPE_TAG_WITH_ALBUM
s.min_match_percentage = 50
o1 = no('foo')
o2 = no('bar')
o1.artist = 'The White Stripes - a'
o1.title = 'The Air Near My Fingers - a'
o1.album = 'Elephant - a'
o2.artist = 'The White Stripes - b'
o2.title = 'The Air Near My Fingers - b'
o2.album = 'Elephant - b'
r = s.GetDupeGroups([o1,o2])
self.assertEqual(1,len(r))
def test_tag_scan_with_different_scanned(self):
s = Scanner()
s.scan_type = SCAN_TYPE_TAG
s.scanned_tags = set(['track', 'year'])
o1 = no('foo')
o2 = no('bar')
o1.artist = 'The White Stripes'
o1.title = 'some title'
o1.track = 'foo'
o1.year = 'bar'
o2.artist = 'The White Stripes'
o2.title = 'another title'
o2.track = 'foo'
o2.year = 'bar'
r = s.GetDupeGroups([o1, o2])
self.assertEqual(1, len(r))
def test_tag_scan_only_scans_existing_tags(self):
s = Scanner()
s.scan_type = SCAN_TYPE_TAG
s.scanned_tags = set(['artist', 'foo'])
o1 = no('foo')
o2 = no('bar')
o1.artist = 'The White Stripes'
o1.foo = 'foo'
o2.artist = 'The White Stripes'
o2.foo = 'bar'
r = s.GetDupeGroups([o1, o2])
self.assertEqual(1, len(r)) # Because 'foo' is not scanned, they match
def test_tag_scan_converts_to_str(self):
s = Scanner()
s.scan_type = SCAN_TYPE_TAG
s.scanned_tags = set(['track'])
o1 = no('foo')
o2 = no('bar')
o1.track = 42
o2.track = 42
try:
r = s.GetDupeGroups([o1, o2])
except TypeError:
self.fail()
self.assertEqual(1, len(r))
def test_tag_scan_non_ascii(self):
s = Scanner()
s.scan_type = SCAN_TYPE_TAG
s.scanned_tags = set(['title'])
o1 = no('foo')
o2 = no('bar')
o1.title = u'foobar\u00e9'
o2.title = u'foobar\u00e9'
try:
r = s.GetDupeGroups([o1, o2])
except UnicodeEncodeError:
self.fail()
self.assertEqual(1, len(r))
def test_audio_content_scan(self):
s = Scanner()
s.scan_type = SCAN_TYPE_CONTENT_AUDIO
f = [no('foo'),no('bar'),no('bleh')]
f[0].md5 = 'foo'
f[1].md5 = 'bar'
f[2].md5 = 'bleh'
f[0].md5partial = 'foo'
f[1].md5partial = 'foo'
f[2].md5partial = 'bleh'
f[0].audiosize = 1
f[1].audiosize = 1
f[2].audiosize = 1
r = s.GetDupeGroups(f)
self.assertEqual(1,len(r))
self.assertEqual(2,len(r[0]))
def test_audio_content_scan_compare_sizes_first(self):
class MyFile(no):
def get_md5(file):
self.fail()
md5partial = property(get_md5)
s = Scanner()
s.scan_type = SCAN_TYPE_CONTENT_AUDIO
f = [MyFile('foo'),MyFile('bar')]
f[0].audiosize = 1
f[1].audiosize = 2
self.assertEqual(0,len(s.GetDupeGroups(f)))
def test_ignore_list(self):
s = Scanner()
f1 = no('foobar')
f2 = no('foobar')
f3 = no('foobar')
f1.path = Path('dir1/foobar')
f2.path = Path('dir2/foobar')
f3.path = Path('dir3/foobar')
s.ignore_list.Ignore(str(f1.path),str(f2.path))
s.ignore_list.Ignore(str(f1.path),str(f3.path))
r = s.GetDupeGroups([f1,f2,f3])
self.assertEqual(1,len(r))
g = r[0]
self.assertEqual(1,len(g.dupes))
self.assert_(f1 not in g)
self.assert_(f2 in g)
self.assert_(f3 in g)
# Ignored matches are not counted as discarded
self.assertEqual(s.discarded_file_count, 0)
def test_ignore_list_checks_for_unicode(self):
#scanner was calling path_str for ignore list checks. Since the Path changes, it must
#be unicode(path)
s = Scanner()
f1 = no('foobar')
f2 = no('foobar')
f3 = no('foobar')
f1.path = Path(u'foo1\u00e9')
f2.path = Path(u'foo2\u00e9')
f3.path = Path(u'foo3\u00e9')
s.ignore_list.Ignore(unicode(f1.path),unicode(f2.path))
s.ignore_list.Ignore(unicode(f1.path),unicode(f3.path))
r = s.GetDupeGroups([f1,f2,f3])
self.assertEqual(1,len(r))
g = r[0]
self.assertEqual(1,len(g.dupes))
self.assert_(f1 not in g)
self.assert_(f2 in g)
self.assert_(f3 in g)
def test_custom_match_factory(self):
class MatchFactory(object):
def getmatches(self,objects,j=None):
return [Match(objects[0], objects[1], 420)]
s = Scanner()
s.match_factory = MatchFactory()
o1,o2 = no('foo'),no('bar')
groups = s.GetDupeGroups([o1,o2])
self.assertEqual(1,len(groups))
g = groups[0]
self.assertEqual(2,len(g))
g.switch_ref(o1)
m = g.get_match_of(o2)
self.assertEqual((o1,o2,420),m)
def test_file_evaluates_to_false(self):
# A very wrong way to use any() was added at some point, causing resulting group list
# to be empty.
class FalseNamedObject(NamedObject):
def __nonzero__(self):
return False
s = Scanner()
f1 = FalseNamedObject('foobar')
f2 = FalseNamedObject('foobar')
r = s.GetDupeGroups([f1,f2])
self.assertEqual(1,len(r))
def test_size_threshold(self):
# Only file equal or higher than the size_threshold in size are scanned
s = Scanner()
f1 = no('foo', 1)
f2 = no('foo', 2)
f3 = no('foo', 3)
s.size_threshold = 2
groups = s.GetDupeGroups([f1,f2,f3])
self.assertEqual(len(groups), 1)
[group] = groups
self.assertEqual(len(group), 2)
self.assertTrue(f1 not in group)
self.assertTrue(f2 in group)
self.assertTrue(f3 in group)
def test_tie_breaker_path_deepness(self):
# If there is a tie in prioritization, path deepness is used as a tie breaker
s = Scanner()
o1, o2 = no('foo'), no('foo')
o1.path = Path('foo')
o2.path = Path('foo/bar')
[group] = s.GetDupeGroups([o1, o2])
self.assertTrue(group.ref is o2)
def test_tie_breaker_copy(self):
# if copy is in the words used (even if it has a deeper path), it becomes a dupe
s = Scanner()
o1, o2 = no('foo bar Copy'), no('foo bar')
o1.path = Path('deeper/path')
o2.path = Path('foo')
[group] = s.GetDupeGroups([o1, o2])
self.assertTrue(group.ref is o2)
def test_tie_breaker_same_name_plus_digit(self):
# if ref has the same words as dupe, but has some just one extra word which is a digit, it
# becomes a dupe
s = Scanner()
o1, o2 = no('foo bar 42'), no('foo bar')
o1.path = Path('deeper/path')
o2.path = Path('foo')
[group] = s.GetDupeGroups([o1, o2])
self.assertTrue(group.ref is o2)
def test_partial_group_match(self):
# Count the number od discarded matches (when a file doesn't match all other dupes of the
# group) in Scanner.discarded_file_count
s = Scanner()
o1, o2, o3 = no('a b'), no('a'), no('b')
s.min_match_percentage = 50
[group] = s.GetDupeGroups([o1, o2, o3])
self.assertEqual(len(group), 2)
self.assertTrue(o1 in group)
self.assertTrue(o2 in group)
self.assertTrue(o3 not in group)
self.assertEqual(s.discarded_file_count, 1)
class TCScannerME(TestCase):
def test_priorize(self):
# in ScannerME, bitrate goes first (right after is_ref) in priorization
s = ScannerME()
o1, o2 = no('foo'), no('foo')
o1.bitrate = 1
o2.bitrate = 2
[group] = s.GetDupeGroups([o1, o2])
self.assertTrue(group.ref is o2)