mirror of
https://github.com/arsenetar/dupeguru.git
synced 2024-11-16 20:29:02 +00:00
838f8ae352
--HG-- rename : base/cocoa/AppDelegate.h => cocoa/base/AppDelegate.h rename : base/cocoa/AppDelegate.m => cocoa/base/AppDelegate.m rename : base/cocoa/Consts.h => cocoa/base/Consts.h rename : base/cocoa/DetailsPanel.h => cocoa/base/DetailsPanel.h rename : base/cocoa/DetailsPanel.m => cocoa/base/DetailsPanel.m rename : base/cocoa/DirectoryPanel.h => cocoa/base/DirectoryPanel.h rename : base/cocoa/DirectoryPanel.m => cocoa/base/DirectoryPanel.m rename : base/cocoa/PyDupeGuru.h => cocoa/base/PyDupeGuru.h rename : base/cocoa/ResultWindow.h => cocoa/base/ResultWindow.h rename : base/cocoa/ResultWindow.m => cocoa/base/ResultWindow.m rename : base/cocoa/dsa_pub.pem => cocoa/base/dsa_pub.pem rename : base/cocoa/xib/DetailsPanel.xib => cocoa/base/xib/DetailsPanel.xib rename : base/cocoa/xib/DirectoryPanel.xib => cocoa/base/xib/DirectoryPanel.xib rename : base/cocoa/xib/MainMenu.xib => cocoa/base/xib/MainMenu.xib rename : me/cocoa/AppDelegate.h => cocoa/me/AppDelegate.h rename : me/cocoa/AppDelegate.m => cocoa/me/AppDelegate.m rename : me/cocoa/Consts.h => cocoa/me/Consts.h rename : me/cocoa/DetailsPanel.h => cocoa/me/DetailsPanel.h rename : me/cocoa/DetailsPanel.m => cocoa/me/DetailsPanel.m rename : me/cocoa/DirectoryPanel.h => cocoa/me/DirectoryPanel.h rename : me/cocoa/DirectoryPanel.m => cocoa/me/DirectoryPanel.m rename : me/cocoa/Info.plist => cocoa/me/Info.plist rename : me/cocoa/PyDupeGuru.h => cocoa/me/PyDupeGuru.h rename : me/cocoa/ResultWindow.h => cocoa/me/ResultWindow.h rename : me/cocoa/ResultWindow.m => cocoa/me/ResultWindow.m rename : me/cocoa/dupeguru.icns => cocoa/me/dupeguru.icns rename : me/cocoa/dupeguru.xcodeproj/project.pbxproj => cocoa/me/dupeguru.xcodeproj/project.pbxproj rename : me/cocoa/gen.py => cocoa/me/gen.py rename : me/cocoa/main.m => cocoa/me/main.m rename : me/cocoa/py/dg_cocoa.py => cocoa/me/py/dg_cocoa.py rename : me/cocoa/py/setup.py => cocoa/me/py/setup.py rename : me/cocoa/xib/Preferences.xib => cocoa/me/xib/Preferences.xib rename : pe/cocoa/AppDelegate.h => cocoa/pe/AppDelegate.h rename : pe/cocoa/AppDelegate.m => cocoa/pe/AppDelegate.m rename : pe/cocoa/Consts.h => cocoa/pe/Consts.h rename : pe/cocoa/DetailsPanel.h => cocoa/pe/DetailsPanel.h rename : pe/cocoa/DetailsPanel.m => cocoa/pe/DetailsPanel.m rename : pe/cocoa/DirectoryPanel.h => cocoa/pe/DirectoryPanel.h rename : pe/cocoa/DirectoryPanel.m => cocoa/pe/DirectoryPanel.m rename : pe/cocoa/Info.plist => cocoa/pe/Info.plist rename : pe/cocoa/PictureBlocks.h => cocoa/pe/PictureBlocks.h rename : pe/cocoa/PictureBlocks.m => cocoa/pe/PictureBlocks.m rename : pe/cocoa/PyDupeGuru.h => cocoa/pe/PyDupeGuru.h rename : pe/cocoa/ResultWindow.h => cocoa/pe/ResultWindow.h rename : pe/cocoa/ResultWindow.m => cocoa/pe/ResultWindow.m rename : pe/cocoa/dupeguru.icns => cocoa/pe/dupeguru.icns rename : pe/cocoa/dupeguru.xcodeproj/project.pbxproj => cocoa/pe/dupeguru.xcodeproj/project.pbxproj rename : pe/cocoa/gen.py => cocoa/pe/gen.py rename : pe/cocoa/main.m => cocoa/pe/main.m rename : pe/cocoa/py/dg_cocoa.py => cocoa/pe/py/dg_cocoa.py rename : pe/cocoa/py/setup.py => cocoa/pe/py/setup.py rename : pe/cocoa/xib/DetailsPanel.xib => cocoa/pe/xib/DetailsPanel.xib rename : pe/cocoa/xib/Preferences.xib => cocoa/pe/xib/Preferences.xib rename : se/cocoa/AppDelegate.h => cocoa/se/AppDelegate.h rename : se/cocoa/AppDelegate.m => cocoa/se/AppDelegate.m rename : se/cocoa/Consts.h => cocoa/se/Consts.h rename : se/cocoa/DetailsPanel.h => cocoa/se/DetailsPanel.h rename : se/cocoa/DetailsPanel.m => cocoa/se/DetailsPanel.m rename : se/cocoa/DirectoryPanel.h => cocoa/se/DirectoryPanel.h rename : se/cocoa/DirectoryPanel.m => cocoa/se/DirectoryPanel.m rename : se/cocoa/Info.plist => cocoa/se/Info.plist rename : se/cocoa/PyDupeGuru.h => cocoa/se/PyDupeGuru.h rename : se/cocoa/ResultWindow.h => cocoa/se/ResultWindow.h rename : se/cocoa/ResultWindow.m => cocoa/se/ResultWindow.m rename : se/cocoa/dupeguru.icns => cocoa/se/dupeguru.icns rename : se/cocoa/dupeguru.xcodeproj/project.pbxproj => cocoa/se/dupeguru.xcodeproj/project.pbxproj rename : se/cocoa/gen.py => cocoa/se/gen.py rename : se/cocoa/main.m => cocoa/se/main.m rename : se/cocoa/py/dg_cocoa.py => cocoa/se/py/dg_cocoa.py rename : se/cocoa/py/setup.py => cocoa/se/py/setup.py rename : se/cocoa/xib/Preferences.xib => cocoa/se/xib/Preferences.xib rename : base/core/LICENSE => core/LICENSE rename : base/core/__init__.py => core/__init__.py rename : base/core/app.py => core/app.py rename : base/core/app_cocoa.py => core/app_cocoa.py rename : base/core/data.py => core/data.py rename : base/core/directories.py => core/directories.py rename : base/core/engine.py => core/engine.py rename : base/core/export.py => core/export.py rename : base/core/fs.py => core/fs.py rename : base/core/ignore.py => core/ignore.py rename : base/core/results.py => core/results.py rename : base/core/scanner.py => core/scanner.py rename : base/core/tests/__init__.py => core/tests/__init__.py rename : base/core/tests/app_cocoa_test.py => core/tests/app_cocoa_test.py rename : base/core/tests/app_test.py => core/tests/app_test.py rename : base/core/tests/data.py => core/tests/data.py rename : base/core/tests/directories_test.py => core/tests/directories_test.py rename : base/core/tests/engine_test.py => core/tests/engine_test.py rename : base/core/tests/ignore_test.py => core/tests/ignore_test.py rename : base/core/tests/results_test.py => core/tests/results_test.py rename : base/core/tests/scanner_test.py => core/tests/scanner_test.py rename : me/core/__init__.py => core_me/__init__.py rename : me/core/app_cocoa.py => core_me/app_cocoa.py rename : me/core/data.py => core_me/data.py rename : me/core/fs.py => core_me/fs.py rename : me/core/scanner.py => core_me/scanner.py rename : me/core/tests/__init__.py => core_me/tests/__init__.py rename : me/core/tests/scanner_test.py => core_me/tests/scanner_test.py rename : pe/core/LICENSE => core_pe/LICENSE rename : pe/core/__init__.py => core_pe/__init__.py rename : pe/core/app_cocoa.py => core_pe/app_cocoa.py rename : pe/core/block.py => core_pe/block.py rename : pe/core/cache.py => core_pe/cache.py rename : pe/core/data.py => core_pe/data.py rename : pe/core/gen.py => core_pe/gen.py rename : pe/core/matchbase.py => core_pe/matchbase.py rename : pe/core/modules/block/block.pyx => core_pe/modules/block/block.pyx rename : pe/core/modules/block/setup.py => core_pe/modules/block/setup.py rename : pe/core/modules/cache/cache.pyx => core_pe/modules/cache/cache.pyx rename : pe/core/modules/cache/setup.py => core_pe/modules/cache/setup.py rename : pe/core/scanner.py => core_pe/scanner.py rename : pe/core/tests/__init__.py => core_pe/tests/__init__.py rename : pe/core/tests/block_test.py => core_pe/tests/block_test.py rename : pe/core/tests/cache_test.py => core_pe/tests/cache_test.py rename : se/core/LICENSE => core_se/LICENSE rename : se/core/__init__.py => core_se/__init__.py rename : se/core/app_cocoa.py => core_se/app_cocoa.py rename : se/core/data.py => core_se/data.py rename : se/core/fs.py => core_se/fs.py rename : se/core/tests/__init__.py => core_se/tests/__init__.py rename : se/core/tests/fs_test.py => core_se/tests/fs_test.py rename : me/help/LICENSE => help_me/LICENSE rename : me/help/__init__.py => help_me/__init__.py rename : me/help/changelog.yaml => help_me/changelog.yaml rename : me/help/gen.py => help_me/gen.py rename : me/help/skeleton/hardcoded.css => help_me/skeleton/hardcoded.css rename : me/help/skeleton/images/hs_title.png => help_me/skeleton/images/hs_title.png rename : me/help/templates/base_dg.mako => help_me/templates/base_dg.mako rename : me/help/templates/credits.mako => help_me/templates/credits.mako rename : me/help/templates/directories.mako => help_me/templates/directories.mako rename : me/help/templates/faq.mako => help_me/templates/faq.mako rename : me/help/templates/intro.mako => help_me/templates/intro.mako rename : me/help/templates/power_marker.mako => help_me/templates/power_marker.mako rename : me/help/templates/preferences.mako => help_me/templates/preferences.mako rename : me/help/templates/quick_start.mako => help_me/templates/quick_start.mako rename : me/help/templates/results.mako => help_me/templates/results.mako rename : me/help/templates/versions.mako => help_me/templates/versions.mako rename : pe/help/LICENSE => help_pe/LICENSE rename : pe/help/__init__.py => help_pe/__init__.py rename : pe/help/changelog.yaml => help_pe/changelog.yaml rename : pe/help/gen.py => help_pe/gen.py rename : pe/help/skeleton/hardcoded.css => help_pe/skeleton/hardcoded.css rename : pe/help/skeleton/images/hs_title.png => help_pe/skeleton/images/hs_title.png rename : pe/help/templates/base_dg.mako => help_pe/templates/base_dg.mako rename : pe/help/templates/credits.mako => help_pe/templates/credits.mako rename : pe/help/templates/directories.mako => help_pe/templates/directories.mako rename : pe/help/templates/faq.mako => help_pe/templates/faq.mako rename : pe/help/templates/intro.mako => help_pe/templates/intro.mako rename : pe/help/templates/power_marker.mako => help_pe/templates/power_marker.mako rename : pe/help/templates/preferences.mako => help_pe/templates/preferences.mako rename : pe/help/templates/quick_start.mako => help_pe/templates/quick_start.mako rename : pe/help/templates/results.mako => help_pe/templates/results.mako rename : pe/help/templates/versions.mako => help_pe/templates/versions.mako rename : se/help/LICENSE => help_se/LICENSE rename : se/help/changelog.yaml => help_se/changelog.yaml rename : se/help/gen.py => help_se/gen.py rename : se/help/skeleton/hardcoded.css => help_se/skeleton/hardcoded.css rename : se/help/skeleton/images/hs_title.png => help_se/skeleton/images/hs_title.png rename : se/help/templates/base_dg.mako => help_se/templates/base_dg.mako rename : se/help/templates/credits.mako => help_se/templates/credits.mako rename : se/help/templates/directories.mako => help_se/templates/directories.mako rename : se/help/templates/faq.mako => help_se/templates/faq.mako rename : se/help/templates/intro.mako => help_se/templates/intro.mako rename : se/help/templates/power_marker.mako => help_se/templates/power_marker.mako rename : se/help/templates/preferences.mako => help_se/templates/preferences.mako rename : se/help/templates/quick_start.mako => help_se/templates/quick_start.mako rename : se/help/templates/results.mako => help_se/templates/results.mako rename : se/help/templates/versions.mako => help_se/templates/versions.mako rename : base/qt/WARNING => qt/WARNING rename : base/qt/__init__.py => qt/base/__init__.py rename : base/qt/app.py => qt/base/app.py rename : base/qt/details_table.py => qt/base/details_table.py rename : base/qt/dg.qrc => qt/base/dg.qrc rename : base/qt/directories_dialog.py => qt/base/directories_dialog.py rename : base/qt/directories_dialog.ui => qt/base/directories_dialog.ui rename : base/qt/directories_model.py => qt/base/directories_model.py rename : base/qt/main_window.py => qt/base/main_window.py rename : base/qt/main_window.ui => qt/base/main_window.ui rename : base/qt/platform.py => qt/base/platform.py rename : base/qt/platform_osx.py => qt/base/platform_osx.py rename : base/qt/platform_win.py => qt/base/platform_win.py rename : base/qt/preferences.py => qt/base/preferences.py rename : base/qt/results_model.py => qt/base/results_model.py rename : me/qt/app.py => qt/me/app.py rename : me/qt/build.py => qt/me/build.py rename : me/qt/details_dialog.py => qt/me/details_dialog.py rename : me/qt/details_dialog.ui => qt/me/details_dialog.ui rename : me/qt/dgme.spec => qt/me/dgme.spec rename : me/qt/gen.py => qt/me/gen.py rename : me/qt/installer.aip => qt/me/installer.aip rename : me/qt/preferences.py => qt/me/preferences.py rename : me/qt/preferences_dialog.py => qt/me/preferences_dialog.py rename : me/qt/preferences_dialog.ui => qt/me/preferences_dialog.ui rename : me/qt/profile.py => qt/me/profile.py rename : me/qt/start.py => qt/me/start.py rename : me/qt/verinfo => qt/me/verinfo rename : pe/qt/app.py => qt/pe/app.py rename : pe/qt/block.py => qt/pe/block.py rename : pe/qt/build.py => qt/pe/build.py rename : pe/qt/details_dialog.py => qt/pe/details_dialog.py rename : pe/qt/details_dialog.ui => qt/pe/details_dialog.ui rename : pe/qt/dgpe.spec => qt/pe/dgpe.spec rename : pe/qt/gen.py => qt/pe/gen.py rename : pe/qt/installer.aip => qt/pe/installer.aip rename : pe/qt/main_window.py => qt/pe/main_window.py rename : pe/qt/modules/block/block.pyx => qt/pe/modules/block/block.pyx rename : pe/qt/modules/block/setup.py => qt/pe/modules/block/setup.py rename : pe/qt/preferences.py => qt/pe/preferences.py rename : pe/qt/preferences_dialog.py => qt/pe/preferences_dialog.py rename : pe/qt/preferences_dialog.ui => qt/pe/preferences_dialog.ui rename : pe/qt/profile.py => qt/pe/profile.py rename : pe/qt/start.py => qt/pe/start.py rename : pe/qt/verinfo => qt/pe/verinfo rename : se/qt/app.py => qt/se/app.py rename : se/qt/build.py => qt/se/build.py rename : se/qt/details_dialog.py => qt/se/details_dialog.py rename : se/qt/details_dialog.ui => qt/se/details_dialog.ui rename : se/qt/dgse.spec => qt/se/dgse.spec rename : se/qt/gen.py => qt/se/gen.py rename : se/qt/installer.aip => qt/se/installer.aip rename : se/qt/preferences.py => qt/se/preferences.py rename : se/qt/preferences_dialog.py => qt/se/preferences_dialog.py rename : se/qt/preferences_dialog.ui => qt/se/preferences_dialog.ui rename : se/qt/profile.py => qt/se/profile.py rename : se/qt/start.py => qt/se/start.py rename : se/qt/verinfo => qt/se/verinfo extra : convert_revision : svn%3Ac306627e-7827-47d3-bdf0-9a457c9553a1/trunk%40285
467 lines
15 KiB
Python
467 lines
15 KiB
Python
# Created By: Virgil Dupras
|
|
# Created On: 2006/03/03
|
|
# $Id$
|
|
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
|
|
#
|
|
# This software is licensed under the "HS" License as described in the "LICENSE" file,
|
|
# which should be included with this package. The terms are also available at
|
|
# http://www.hardcoded.net/licenses/hs_license
|
|
|
|
from nose.tools import eq_
|
|
|
|
from hsutil import job, io
|
|
from hsutil.path import Path
|
|
from hsutil.testcase import TestCase
|
|
|
|
from .. import fs
|
|
from ..engine import getwords, Match
|
|
from ..ignore import IgnoreList
|
|
from ..scanner import *
|
|
|
|
class NamedObject(object):
|
|
def __init__(self, name="foobar", size=1):
|
|
self.name = name
|
|
self.size = size
|
|
self.path = Path('')
|
|
self.words = getwords(name)
|
|
|
|
|
|
no = NamedObject
|
|
|
|
#--- Scanner
|
|
class ScannerTestFakeFiles(TestCase):
|
|
def setUp(self):
|
|
# This is a hack to avoid invalidating all previous tests since the scanner started to test
|
|
# for file existence before doing the match grouping.
|
|
self.mock(io, 'exists', lambda _: True)
|
|
|
|
def test_empty(self):
|
|
s = Scanner()
|
|
r = s.GetDupeGroups([])
|
|
eq_(r, [])
|
|
|
|
def test_default_settings(self):
|
|
s = Scanner()
|
|
eq_(s.min_match_percentage, 80)
|
|
eq_(s.scan_type, SCAN_TYPE_FILENAME)
|
|
eq_(s.mix_file_kind, True)
|
|
eq_(s.word_weighting, False)
|
|
eq_(s.match_similar_words, False)
|
|
assert isinstance(s.ignore_list, IgnoreList)
|
|
|
|
def test_simple_with_default_settings(self):
|
|
s = Scanner()
|
|
f = [no('foo bar'), no('foo bar'), no('foo bleh')]
|
|
r = s.GetDupeGroups(f)
|
|
eq_(len(r), 1)
|
|
g = r[0]
|
|
#'foo bleh' cannot be in the group because the default min match % is 80
|
|
eq_(len(g), 2)
|
|
assert g.ref in f[:2]
|
|
assert g.dupes[0] in f[:2]
|
|
|
|
def test_simple_with_lower_min_match(self):
|
|
s = Scanner()
|
|
s.min_match_percentage = 50
|
|
f = [no('foo bar'), no('foo bar'), no('foo bleh')]
|
|
r = s.GetDupeGroups(f)
|
|
eq_(len(r), 1)
|
|
g = r[0]
|
|
eq_(len(g), 3)
|
|
|
|
def test_trim_all_ref_groups(self):
|
|
# When all files of a group are ref, don't include that group in the results, but also don't
|
|
# count the files from that group as discarded.
|
|
s = Scanner()
|
|
f = [no('foo'), no('foo'), no('bar'), no('bar')]
|
|
f[2].is_ref = True
|
|
f[3].is_ref = True
|
|
r = s.GetDupeGroups(f)
|
|
eq_(len(r), 1)
|
|
eq_(s.discarded_file_count, 0)
|
|
|
|
def test_priorize(self):
|
|
s = Scanner()
|
|
f = [no('foo'), no('foo'), no('bar'), no('bar')]
|
|
f[1].size = 2
|
|
f[2].size = 3
|
|
f[3].is_ref = True
|
|
r = s.GetDupeGroups(f)
|
|
g1, g2 = r
|
|
assert f[1] in (g1.ref,g2.ref)
|
|
assert f[0] in (g1.dupes[0],g2.dupes[0])
|
|
assert f[3] in (g1.ref,g2.ref)
|
|
assert f[2] in (g1.dupes[0],g2.dupes[0])
|
|
|
|
def test_content_scan(self):
|
|
s = Scanner()
|
|
s.scan_type = SCAN_TYPE_CONTENT
|
|
f = [no('foo'), no('bar'), no('bleh')]
|
|
f[0].md5 = f[0].md5partial = 'foobar'
|
|
f[1].md5 = f[1].md5partial = 'foobar'
|
|
f[2].md5 = f[2].md5partial = 'bleh'
|
|
r = s.GetDupeGroups(f)
|
|
eq_(len(r), 1)
|
|
eq_(len(r[0]), 2)
|
|
eq_(s.discarded_file_count, 0) # don't count the different md5 as discarded!
|
|
|
|
def test_content_scan_compare_sizes_first(self):
|
|
class MyFile(no):
|
|
@property
|
|
def md5(file):
|
|
raise AssertionError()
|
|
|
|
s = Scanner()
|
|
s.scan_type = SCAN_TYPE_CONTENT
|
|
f = [MyFile('foo', 1), MyFile('bar', 2)]
|
|
eq_(len(s.GetDupeGroups(f)), 0)
|
|
|
|
def test_min_match_perc_doesnt_matter_for_content_scan(self):
|
|
s = Scanner()
|
|
s.scan_type = SCAN_TYPE_CONTENT
|
|
f = [no('foo'), no('bar'), no('bleh')]
|
|
f[0].md5 = f[0].md5partial = 'foobar'
|
|
f[1].md5 = f[1].md5partial = 'foobar'
|
|
f[2].md5 = f[2].md5partial = 'bleh'
|
|
s.min_match_percentage = 101
|
|
r = s.GetDupeGroups(f)
|
|
eq_(len(r), 1)
|
|
eq_(len(r[0]), 2)
|
|
s.min_match_percentage = 0
|
|
r = s.GetDupeGroups(f)
|
|
eq_(len(r), 1)
|
|
eq_(len(r[0]), 2)
|
|
|
|
def test_content_scan_doesnt_put_md5_in_words_at_the_end(self):
|
|
s = Scanner()
|
|
s.scan_type = SCAN_TYPE_CONTENT
|
|
f = [no('foo'),no('bar')]
|
|
f[0].md5 = f[0].md5partial = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
|
|
f[1].md5 = f[1].md5partial = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
|
|
r = s.GetDupeGroups(f)
|
|
g = r[0]
|
|
|
|
def test_extension_is_not_counted_in_filename_scan(self):
|
|
s = Scanner()
|
|
s.min_match_percentage = 100
|
|
f = [no('foo.bar'), no('foo.bleh')]
|
|
r = s.GetDupeGroups(f)
|
|
eq_(len(r), 1)
|
|
eq_(len(r[0]), 2)
|
|
|
|
def test_job(self):
|
|
def do_progress(progress, desc=''):
|
|
log.append(progress)
|
|
return True
|
|
|
|
s = Scanner()
|
|
log = []
|
|
f = [no('foo bar'), no('foo bar'), no('foo bleh')]
|
|
r = s.GetDupeGroups(f, job.Job(1, do_progress))
|
|
eq_(log[0], 0)
|
|
eq_(log[-1], 100)
|
|
|
|
def test_mix_file_kind(self):
|
|
s = Scanner()
|
|
s.mix_file_kind = False
|
|
f = [no('foo.1'), no('foo.2')]
|
|
r = s.GetDupeGroups(f)
|
|
eq_(len(r), 0)
|
|
|
|
def test_word_weighting(self):
|
|
s = Scanner()
|
|
s.min_match_percentage = 75
|
|
s.word_weighting = True
|
|
f = [no('foo bar'), no('foo bar bleh')]
|
|
r = s.GetDupeGroups(f)
|
|
eq_(len(r), 1)
|
|
g = r[0]
|
|
m = g.get_match_of(g.dupes[0])
|
|
eq_(m.percentage, 75) # 16 letters, 12 matching
|
|
|
|
def test_similar_words(self):
|
|
s = Scanner()
|
|
s.match_similar_words = True
|
|
f = [no('The White Stripes'), no('The Whites Stripe'), no('Limp Bizkit'), no('Limp Bizkitt')]
|
|
r = s.GetDupeGroups(f)
|
|
eq_(len(r), 2)
|
|
|
|
def test_fields(self):
|
|
s = Scanner()
|
|
s.scan_type = SCAN_TYPE_FIELDS
|
|
f = [no('The White Stripes - Little Ghost'), no('The White Stripes - Little Acorn')]
|
|
r = s.GetDupeGroups(f)
|
|
eq_(len(r), 0)
|
|
|
|
def test_fields_no_order(self):
|
|
s = Scanner()
|
|
s.scan_type = SCAN_TYPE_FIELDS_NO_ORDER
|
|
f = [no('The White Stripes - Little Ghost'), no('Little Ghost - The White Stripes')]
|
|
r = s.GetDupeGroups(f)
|
|
eq_(len(r), 1)
|
|
|
|
def test_tag_scan(self):
|
|
s = Scanner()
|
|
s.scan_type = SCAN_TYPE_TAG
|
|
o1 = no('foo')
|
|
o2 = no('bar')
|
|
o1.artist = 'The White Stripes'
|
|
o1.title = 'The Air Near My Fingers'
|
|
o2.artist = 'The White Stripes'
|
|
o2.title = 'The Air Near My Fingers'
|
|
r = s.GetDupeGroups([o1,o2])
|
|
eq_(len(r), 1)
|
|
|
|
def test_tag_with_album_scan(self):
|
|
s = Scanner()
|
|
s.scan_type = SCAN_TYPE_TAG
|
|
s.scanned_tags = set(['artist', 'album', 'title'])
|
|
o1 = no('foo')
|
|
o2 = no('bar')
|
|
o3 = no('bleh')
|
|
o1.artist = 'The White Stripes'
|
|
o1.title = 'The Air Near My Fingers'
|
|
o1.album = 'Elephant'
|
|
o2.artist = 'The White Stripes'
|
|
o2.title = 'The Air Near My Fingers'
|
|
o2.album = 'Elephant'
|
|
o3.artist = 'The White Stripes'
|
|
o3.title = 'The Air Near My Fingers'
|
|
o3.album = 'foobar'
|
|
r = s.GetDupeGroups([o1,o2,o3])
|
|
eq_(len(r), 1)
|
|
|
|
def test_that_dash_in_tags_dont_create_new_fields(self):
|
|
s = Scanner()
|
|
s.scan_type = SCAN_TYPE_TAG
|
|
s.scanned_tags = set(['artist', 'album', 'title'])
|
|
s.min_match_percentage = 50
|
|
o1 = no('foo')
|
|
o2 = no('bar')
|
|
o1.artist = 'The White Stripes - a'
|
|
o1.title = 'The Air Near My Fingers - a'
|
|
o1.album = 'Elephant - a'
|
|
o2.artist = 'The White Stripes - b'
|
|
o2.title = 'The Air Near My Fingers - b'
|
|
o2.album = 'Elephant - b'
|
|
r = s.GetDupeGroups([o1,o2])
|
|
eq_(len(r), 1)
|
|
|
|
def test_tag_scan_with_different_scanned(self):
|
|
s = Scanner()
|
|
s.scan_type = SCAN_TYPE_TAG
|
|
s.scanned_tags = set(['track', 'year'])
|
|
o1 = no('foo')
|
|
o2 = no('bar')
|
|
o1.artist = 'The White Stripes'
|
|
o1.title = 'some title'
|
|
o1.track = 'foo'
|
|
o1.year = 'bar'
|
|
o2.artist = 'The White Stripes'
|
|
o2.title = 'another title'
|
|
o2.track = 'foo'
|
|
o2.year = 'bar'
|
|
r = s.GetDupeGroups([o1, o2])
|
|
eq_(len(r), 1)
|
|
|
|
def test_tag_scan_only_scans_existing_tags(self):
|
|
s = Scanner()
|
|
s.scan_type = SCAN_TYPE_TAG
|
|
s.scanned_tags = set(['artist', 'foo'])
|
|
o1 = no('foo')
|
|
o2 = no('bar')
|
|
o1.artist = 'The White Stripes'
|
|
o1.foo = 'foo'
|
|
o2.artist = 'The White Stripes'
|
|
o2.foo = 'bar'
|
|
r = s.GetDupeGroups([o1, o2])
|
|
eq_(len(r), 1) # Because 'foo' is not scanned, they match
|
|
|
|
def test_tag_scan_converts_to_str(self):
|
|
s = Scanner()
|
|
s.scan_type = SCAN_TYPE_TAG
|
|
s.scanned_tags = set(['track'])
|
|
o1 = no('foo')
|
|
o2 = no('bar')
|
|
o1.track = 42
|
|
o2.track = 42
|
|
try:
|
|
r = s.GetDupeGroups([o1, o2])
|
|
except TypeError:
|
|
raise AssertionError()
|
|
eq_(len(r), 1)
|
|
|
|
def test_tag_scan_non_ascii(self):
|
|
s = Scanner()
|
|
s.scan_type = SCAN_TYPE_TAG
|
|
s.scanned_tags = set(['title'])
|
|
o1 = no('foo')
|
|
o2 = no('bar')
|
|
o1.title = u'foobar\u00e9'
|
|
o2.title = u'foobar\u00e9'
|
|
try:
|
|
r = s.GetDupeGroups([o1, o2])
|
|
except UnicodeEncodeError:
|
|
raise AssertionError()
|
|
eq_(len(r), 1)
|
|
|
|
def test_audio_content_scan(self):
|
|
s = Scanner()
|
|
s.scan_type = SCAN_TYPE_CONTENT_AUDIO
|
|
f = [no('foo'), no('bar'), no('bleh')]
|
|
f[0].md5 = 'foo'
|
|
f[1].md5 = 'bar'
|
|
f[2].md5 = 'bleh'
|
|
f[0].md5partial = 'foo'
|
|
f[1].md5partial = 'foo'
|
|
f[2].md5partial = 'bleh'
|
|
f[0].audiosize = 1
|
|
f[1].audiosize = 1
|
|
f[2].audiosize = 1
|
|
r = s.GetDupeGroups(f)
|
|
eq_(len(r), 1)
|
|
eq_(len(r[0]), 2)
|
|
|
|
def test_audio_content_scan_compare_sizes_first(self):
|
|
class MyFile(no):
|
|
@property
|
|
def md5partial(file):
|
|
raise AssertionError()
|
|
|
|
s = Scanner()
|
|
s.scan_type = SCAN_TYPE_CONTENT_AUDIO
|
|
f = [MyFile('foo'), MyFile('bar')]
|
|
f[0].audiosize = 1
|
|
f[1].audiosize = 2
|
|
eq_(len(s.GetDupeGroups(f)), 0)
|
|
|
|
def test_ignore_list(self):
|
|
s = Scanner()
|
|
f1 = no('foobar')
|
|
f2 = no('foobar')
|
|
f3 = no('foobar')
|
|
f1.path = Path('dir1/foobar')
|
|
f2.path = Path('dir2/foobar')
|
|
f3.path = Path('dir3/foobar')
|
|
s.ignore_list.Ignore(str(f1.path),str(f2.path))
|
|
s.ignore_list.Ignore(str(f1.path),str(f3.path))
|
|
r = s.GetDupeGroups([f1,f2,f3])
|
|
eq_(len(r), 1)
|
|
g = r[0]
|
|
eq_(len(g.dupes), 1)
|
|
assert f1 not in g
|
|
assert f2 in g
|
|
assert f3 in g
|
|
# Ignored matches are not counted as discarded
|
|
eq_(s.discarded_file_count, 0)
|
|
|
|
def test_ignore_list_checks_for_unicode(self):
|
|
#scanner was calling path_str for ignore list checks. Since the Path changes, it must
|
|
#be unicode(path)
|
|
s = Scanner()
|
|
f1 = no('foobar')
|
|
f2 = no('foobar')
|
|
f3 = no('foobar')
|
|
f1.path = Path(u'foo1\u00e9')
|
|
f2.path = Path(u'foo2\u00e9')
|
|
f3.path = Path(u'foo3\u00e9')
|
|
s.ignore_list.Ignore(unicode(f1.path),unicode(f2.path))
|
|
s.ignore_list.Ignore(unicode(f1.path),unicode(f3.path))
|
|
r = s.GetDupeGroups([f1,f2,f3])
|
|
eq_(len(r), 1)
|
|
g = r[0]
|
|
eq_(len(g.dupes), 1)
|
|
assert f1 not in g
|
|
assert f2 in g
|
|
assert f3 in g
|
|
|
|
def test_file_evaluates_to_false(self):
|
|
# A very wrong way to use any() was added at some point, causing resulting group list
|
|
# to be empty.
|
|
class FalseNamedObject(NamedObject):
|
|
def __nonzero__(self):
|
|
return False
|
|
|
|
|
|
s = Scanner()
|
|
f1 = FalseNamedObject('foobar')
|
|
f2 = FalseNamedObject('foobar')
|
|
r = s.GetDupeGroups([f1, f2])
|
|
eq_(len(r), 1)
|
|
|
|
def test_size_threshold(self):
|
|
# Only file equal or higher than the size_threshold in size are scanned
|
|
s = Scanner()
|
|
f1 = no('foo', 1)
|
|
f2 = no('foo', 2)
|
|
f3 = no('foo', 3)
|
|
s.size_threshold = 2
|
|
groups = s.GetDupeGroups([f1,f2,f3])
|
|
eq_(len(groups), 1)
|
|
[group] = groups
|
|
eq_(len(group), 2)
|
|
assert f1 not in group
|
|
assert f2 in group
|
|
assert f3 in group
|
|
|
|
def test_tie_breaker_path_deepness(self):
|
|
# If there is a tie in prioritization, path deepness is used as a tie breaker
|
|
s = Scanner()
|
|
o1, o2 = no('foo'), no('foo')
|
|
o1.path = Path('foo')
|
|
o2.path = Path('foo/bar')
|
|
[group] = s.GetDupeGroups([o1, o2])
|
|
assert group.ref is o2
|
|
|
|
def test_tie_breaker_copy(self):
|
|
# if copy is in the words used (even if it has a deeper path), it becomes a dupe
|
|
s = Scanner()
|
|
o1, o2 = no('foo bar Copy'), no('foo bar')
|
|
o1.path = Path('deeper/path')
|
|
o2.path = Path('foo')
|
|
[group] = s.GetDupeGroups([o1, o2])
|
|
assert group.ref is o2
|
|
|
|
def test_tie_breaker_same_name_plus_digit(self):
|
|
# if ref has the same words as dupe, but has some just one extra word which is a digit, it
|
|
# becomes a dupe
|
|
s = Scanner()
|
|
o1, o2 = no('foo bar 42'), no('foo bar')
|
|
o1.path = Path('deeper/path')
|
|
o2.path = Path('foo')
|
|
[group] = s.GetDupeGroups([o1, o2])
|
|
assert group.ref is o2
|
|
|
|
def test_partial_group_match(self):
|
|
# Count the number od discarded matches (when a file doesn't match all other dupes of the
|
|
# group) in Scanner.discarded_file_count
|
|
s = Scanner()
|
|
o1, o2, o3 = no('a b'), no('a'), no('b')
|
|
s.min_match_percentage = 50
|
|
[group] = s.GetDupeGroups([o1, o2, o3])
|
|
eq_(len(group), 2)
|
|
assert o1 in group
|
|
assert o2 in group
|
|
assert o3 not in group
|
|
eq_(s.discarded_file_count, 1)
|
|
|
|
|
|
class ScannerTest(TestCase):
|
|
def test_dont_group_files_that_dont_exist(self):
|
|
# when creating groups, check that files exist first. It's possible that these files have
|
|
# been moved during the scan by the user.
|
|
# In this test, we have to delete one of the files between the get_matches() part and the
|
|
# get_groups() part.
|
|
s = Scanner()
|
|
s.scan_type = SCAN_TYPE_CONTENT
|
|
p = self.tmppath()
|
|
io.open(p + 'file1', 'w').write('foo')
|
|
io.open(p + 'file2', 'w').write('foo')
|
|
file1, file2 = fs.get_files(p)
|
|
def getmatches(*args, **kw):
|
|
io.remove(file2.path)
|
|
return [Match(file1, file2, 100)]
|
|
s._getmatches = getmatches
|
|
|
|
assert not s.GetDupeGroups([file1, file2])
|
|
|