[#15 state:fixed] Improved tie breaker in cases where filenames end with digits inside brackets.

This commit is contained in:
Virgil Dupras 2010-08-14 19:32:09 +02:00
parent 531430d44a
commit 5b2d506462
2 changed files with 25 additions and 7 deletions

View File

@ -7,7 +7,7 @@
# http://www.hardcoded.net/licenses/hs_license
import logging
import re
from hscommon import job
from hsutil import io
@ -27,6 +27,15 @@ SCAN_TYPE_CONTENT_AUDIO) = range(7)
SCANNABLE_TAGS = ['track', 'artist', 'album', 'title', 'genre', 'year']
RE_DIGIT_ENDING = re.compile(r'\d+|\(\d+\)|\[\d+\]|{\d+}')
def is_same_with_digit(name, refname):
# Returns True if name is the same as refname, but with digits (with brackets or not) at the end
if not name.startswith(refname):
return False
end = name[len(refname):].strip()
return RE_DIGIT_ENDING.match(end) is not None
class Scanner(object):
def __init__(self):
self.ignore_list = IgnoreList()
@ -71,9 +80,9 @@ class Scanner(object):
return False
if 'copy' in refname:
return True
if dupename.startswith(refname) and (dupename[len(refname):].strip().isdigit()):
if is_same_with_digit(dupename, refname):
return False
if refname.startswith(dupename) and (refname[len(dupename):].strip().isdigit()):
if is_same_with_digit(refname, dupename):
return True
return len(dupe.path) > len(ref.path)

View File

@ -429,11 +429,20 @@ class ScannerTestFakeFiles(TestCase):
# if ref has the same words as dupe, but has some just one extra word which is a digit, it
# becomes a dupe
s = Scanner()
o1, o2 = no('foo bar 42'), no('foo bar')
o1 = no('foo bar 42')
o2 = no('foo bar [42]')
o3 = no('foo bar (42)')
o4 = no('foo bar {42}')
o5 = no('foo bar')
# all numbered names have deeper paths, so they'll end up ref if the digits aren't correctly
# used as tie breakers
o1.path = Path('deeper/path')
o2.path = Path('foo')
[group] = s.GetDupeGroups([o1, o2])
assert group.ref is o2
o2.path = Path('deeper/path')
o3.path = Path('deeper/path')
o4.path = Path('deeper/path')
o5.path = Path('foo')
[group] = s.GetDupeGroups([o1, o2, o3, o4, o5])
assert group.ref is o5
def test_partial_group_match(self):
# Count the number od discarded matches (when a file doesn't match all other dupes of the