[#15 state:fixed] Improved tie breaker in cases where filenames end with digits inside brackets.

2026-03-15 20:51:38 +00:00 · 2010-08-14 19:32:09 +02:00
parent 531430d44a
commit 5b2d506462
2 changed files with 25 additions and 7 deletions
--- a/core/scanner.py
+++ b/core/scanner.py
@@ -7,7 +7,7 @@
 # http://www.hardcoded.net/licenses/hs_license
 import logging
-
+import re
 from hscommon import job
 from hsutil import io
@@ -27,6 +27,15 @@ SCAN_TYPE_CONTENT_AUDIO) = range(7)
 SCANNABLE_TAGS = ['track', 'artist', 'album', 'title', 'genre', 'year']
 RE_DIGIT_ENDING = re.compile(r'\d+|\(\d+\)|\[\d+\]|{\d+}')
 def is_same_with_digit(name, refname):
    # Returns True if name is the same as refname, but with digits (with brackets or not) at the end
    if not name.startswith(refname):
        return False
    end = name[len(refname):].strip()
    return RE_DIGIT_ENDING.match(end) is not None
 class Scanner(object):
    def __init__(self):
        self.ignore_list = IgnoreList()
@@ -71,9 +80,9 @@ class Scanner(object):
            return False
        if 'copy' in refname:
            return True
-        if dupename.startswith(refname) and (dupename[len(refname):].strip().isdigit()):
+        if is_same_with_digit(dupename, refname):
            return False
-        if refname.startswith(dupename) and (refname[len(dupename):].strip().isdigit()):
+        if is_same_with_digit(refname, dupename):
            return True
        return len(dupe.path) > len(ref.path)
--- a/core/tests/scanner_test.py
+++ b/core/tests/scanner_test.py
@@ -429,11 +429,20 @@ class ScannerTestFakeFiles(TestCase):
        # if ref has the same words as dupe, but has some just one extra word which is a digit, it
        # becomes a dupe
        s = Scanner()
-        o1, o2 = no('foo bar 42'), no('foo bar')
+        o1 = no('foo bar 42')
        o2 = no('foo bar [42]')
        o3 = no('foo bar (42)')
        o4 = no('foo bar {42}')
        o5 = no('foo bar')
        # all numbered names have deeper paths, so they'll end up ref if the digits aren't correctly
        # used as tie breakers
        o1.path = Path('deeper/path')
-        o2.path = Path('foo')
+        o2.path = Path('deeper/path')
-        [group] = s.GetDupeGroups([o1, o2])
+        o3.path = Path('deeper/path')
-        assert group.ref is o2
+        o4.path = Path('deeper/path')
        o5.path = Path('foo')
        [group] = s.GetDupeGroups([o1, o2, o3, o4, o5])
        assert group.ref is o5
    def test_partial_group_match(self):
        # Count the number od discarded matches (when a file doesn't match all other dupes of the