mirror of
https://github.com/arsenetar/dupeguru.git
synced 2025-03-10 05:34:36 +00:00
[#15 state:fixed] Improved tie breaker in cases where filenames end with digits inside brackets.
This commit is contained in:
parent
531430d44a
commit
5b2d506462
@ -7,7 +7,7 @@
|
|||||||
# http://www.hardcoded.net/licenses/hs_license
|
# http://www.hardcoded.net/licenses/hs_license
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
|
|
||||||
from hscommon import job
|
from hscommon import job
|
||||||
from hsutil import io
|
from hsutil import io
|
||||||
@ -27,6 +27,15 @@ SCAN_TYPE_CONTENT_AUDIO) = range(7)
|
|||||||
|
|
||||||
SCANNABLE_TAGS = ['track', 'artist', 'album', 'title', 'genre', 'year']
|
SCANNABLE_TAGS = ['track', 'artist', 'album', 'title', 'genre', 'year']
|
||||||
|
|
||||||
|
RE_DIGIT_ENDING = re.compile(r'\d+|\(\d+\)|\[\d+\]|{\d+}')
|
||||||
|
|
||||||
|
def is_same_with_digit(name, refname):
|
||||||
|
# Returns True if name is the same as refname, but with digits (with brackets or not) at the end
|
||||||
|
if not name.startswith(refname):
|
||||||
|
return False
|
||||||
|
end = name[len(refname):].strip()
|
||||||
|
return RE_DIGIT_ENDING.match(end) is not None
|
||||||
|
|
||||||
class Scanner(object):
|
class Scanner(object):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.ignore_list = IgnoreList()
|
self.ignore_list = IgnoreList()
|
||||||
@ -71,9 +80,9 @@ class Scanner(object):
|
|||||||
return False
|
return False
|
||||||
if 'copy' in refname:
|
if 'copy' in refname:
|
||||||
return True
|
return True
|
||||||
if dupename.startswith(refname) and (dupename[len(refname):].strip().isdigit()):
|
if is_same_with_digit(dupename, refname):
|
||||||
return False
|
return False
|
||||||
if refname.startswith(dupename) and (refname[len(dupename):].strip().isdigit()):
|
if is_same_with_digit(refname, dupename):
|
||||||
return True
|
return True
|
||||||
return len(dupe.path) > len(ref.path)
|
return len(dupe.path) > len(ref.path)
|
||||||
|
|
||||||
|
@ -429,11 +429,20 @@ class ScannerTestFakeFiles(TestCase):
|
|||||||
# if ref has the same words as dupe, but has some just one extra word which is a digit, it
|
# if ref has the same words as dupe, but has some just one extra word which is a digit, it
|
||||||
# becomes a dupe
|
# becomes a dupe
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
o1, o2 = no('foo bar 42'), no('foo bar')
|
o1 = no('foo bar 42')
|
||||||
|
o2 = no('foo bar [42]')
|
||||||
|
o3 = no('foo bar (42)')
|
||||||
|
o4 = no('foo bar {42}')
|
||||||
|
o5 = no('foo bar')
|
||||||
|
# all numbered names have deeper paths, so they'll end up ref if the digits aren't correctly
|
||||||
|
# used as tie breakers
|
||||||
o1.path = Path('deeper/path')
|
o1.path = Path('deeper/path')
|
||||||
o2.path = Path('foo')
|
o2.path = Path('deeper/path')
|
||||||
[group] = s.GetDupeGroups([o1, o2])
|
o3.path = Path('deeper/path')
|
||||||
assert group.ref is o2
|
o4.path = Path('deeper/path')
|
||||||
|
o5.path = Path('foo')
|
||||||
|
[group] = s.GetDupeGroups([o1, o2, o3, o4, o5])
|
||||||
|
assert group.ref is o5
|
||||||
|
|
||||||
def test_partial_group_match(self):
|
def test_partial_group_match(self):
|
||||||
# Count the number od discarded matches (when a file doesn't match all other dupes of the
|
# Count the number od discarded matches (when a file doesn't match all other dupes of the
|
||||||
|
Loading…
x
Reference in New Issue
Block a user