mirror of
https://github.com/arsenetar/dupeguru.git
synced 2025-03-10 05:34:36 +00:00
[#15 state:fixed] Improved tie breaker in cases where filenames end with digits inside brackets.
This commit is contained in:
parent
531430d44a
commit
5b2d506462
@ -7,7 +7,7 @@
|
||||
# http://www.hardcoded.net/licenses/hs_license
|
||||
|
||||
import logging
|
||||
|
||||
import re
|
||||
|
||||
from hscommon import job
|
||||
from hsutil import io
|
||||
@ -27,6 +27,15 @@ SCAN_TYPE_CONTENT_AUDIO) = range(7)
|
||||
|
||||
SCANNABLE_TAGS = ['track', 'artist', 'album', 'title', 'genre', 'year']
|
||||
|
||||
RE_DIGIT_ENDING = re.compile(r'\d+|\(\d+\)|\[\d+\]|{\d+}')
|
||||
|
||||
def is_same_with_digit(name, refname):
|
||||
# Returns True if name is the same as refname, but with digits (with brackets or not) at the end
|
||||
if not name.startswith(refname):
|
||||
return False
|
||||
end = name[len(refname):].strip()
|
||||
return RE_DIGIT_ENDING.match(end) is not None
|
||||
|
||||
class Scanner(object):
|
||||
def __init__(self):
|
||||
self.ignore_list = IgnoreList()
|
||||
@ -71,9 +80,9 @@ class Scanner(object):
|
||||
return False
|
||||
if 'copy' in refname:
|
||||
return True
|
||||
if dupename.startswith(refname) and (dupename[len(refname):].strip().isdigit()):
|
||||
if is_same_with_digit(dupename, refname):
|
||||
return False
|
||||
if refname.startswith(dupename) and (refname[len(dupename):].strip().isdigit()):
|
||||
if is_same_with_digit(refname, dupename):
|
||||
return True
|
||||
return len(dupe.path) > len(ref.path)
|
||||
|
||||
|
@ -429,11 +429,20 @@ class ScannerTestFakeFiles(TestCase):
|
||||
# if ref has the same words as dupe, but has some just one extra word which is a digit, it
|
||||
# becomes a dupe
|
||||
s = Scanner()
|
||||
o1, o2 = no('foo bar 42'), no('foo bar')
|
||||
o1 = no('foo bar 42')
|
||||
o2 = no('foo bar [42]')
|
||||
o3 = no('foo bar (42)')
|
||||
o4 = no('foo bar {42}')
|
||||
o5 = no('foo bar')
|
||||
# all numbered names have deeper paths, so they'll end up ref if the digits aren't correctly
|
||||
# used as tie breakers
|
||||
o1.path = Path('deeper/path')
|
||||
o2.path = Path('foo')
|
||||
[group] = s.GetDupeGroups([o1, o2])
|
||||
assert group.ref is o2
|
||||
o2.path = Path('deeper/path')
|
||||
o3.path = Path('deeper/path')
|
||||
o4.path = Path('deeper/path')
|
||||
o5.path = Path('foo')
|
||||
[group] = s.GetDupeGroups([o1, o2, o3, o4, o5])
|
||||
assert group.ref is o5
|
||||
|
||||
def test_partial_group_match(self):
|
||||
# Count the number od discarded matches (when a file doesn't match all other dupes of the
|
||||
|
Loading…
x
Reference in New Issue
Block a user