refactoring: take ignore_list out of Scanner class

It's now `DupeGuru` that holds it and passes it to `get_dupe_groups()`, the only place where it's actually used in `Scanner`. This will make the SE/ME/PE merge easier by allowing us to instantiate the Scanner on-the-fly since it doesn't hold state anymore.
2025-09-11 17:58:17 +00:00 · 2016-05-29 14:13:19 -04:00 · 2016-05-29 14:13:19 -04:00 · 9ed4b7abf0
commit 9ed4b7abf0
parent a0a90e8ef8
5 changed files with 45 additions and 50 deletions
--- a/core/app.py
+++ b/core/app.py
@ -1,6 +1,4 @@
-# Created By: Virgil Dupras
-# Created On: 2006/11/11
-# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
+# Copyright 2016 Hardcoded Software (http://www.hardcoded.net)
 #
 # This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
 # which should be included with this package. The terms are also available at
@ -26,6 +24,7 @@ from hscommon.plat import ISWINDOWS
 from hscommon import desktop

 from . import directories, results, scanner, export, fs
+from .ignore import IgnoreList
 from .gui.deletion_options import DeletionOptions
 from .gui.details_panel import DetailsPanel
 from .gui.directory_tree import DirectoryTree
@ -168,6 +167,7 @@ class DupeGuru(Broadcaster):
            os.makedirs(self.appdata)
        self.directories = directories.Directories()
        self.results = results.Results(self)
+        self.ignore_list = IgnoreList()
        self.scanner = self.SCANNER_CLASS()
        self.options = {
            'escape_filter_regexp': True,
@ -373,7 +373,7 @@ class DupeGuru(Broadcaster):
            g = self.results.get_group_of_duplicate(dupe)
            for other in g:
                if other is not dupe:
-                    self.scanner.ignore_list.Ignore(str(other.path), str(dupe.path))
+                    self.ignore_list.Ignore(str(other.path), str(dupe.path))
        self.remove_duplicates(dupes)
        self.ignore_list_dialog.refresh()

@ -531,7 +531,7 @@ class DupeGuru(Broadcaster):
        self.directories.load_from_file(op.join(self.appdata, 'last_directories.xml'))
        self.notify('directories_changed')
        p = op.join(self.appdata, 'ignore_list.xml')
-        self.scanner.ignore_list.load_from_xml(p)
+        self.ignore_list.load_from_xml(p)
        self.ignore_list_dialog.refresh()

    def load_from(self, filename):
@ -620,7 +620,7 @@ class DupeGuru(Broadcaster):
    def purge_ignore_list(self):
        """Remove files that don't exist from :attr:`ignore_list`.
        """
-        self.scanner.ignore_list.Filter(lambda f, s: op.exists(f) and op.exists(s))
+        self.ignore_list.Filter(lambda f, s: op.exists(f) and op.exists(s))
        self.ignore_list_dialog.refresh()

    def remove_directories(self, indexes):
@ -713,7 +713,7 @@ class DupeGuru(Broadcaster):
            os.makedirs(self.appdata)
        self.directories.save_to_file(op.join(self.appdata, 'last_directories.xml'))
        p = op.join(self.appdata, 'ignore_list.xml')
-        self.scanner.ignore_list.save_to_xml(p)
+        self.ignore_list.save_to_xml(p)
        self.notify('save_session')

    def save_as(self, filename):
@ -740,7 +740,7 @@ class DupeGuru(Broadcaster):
            if self.options['ignore_hardlink_matches']:
                files = self._remove_hardlink_dupes(files)
            logging.info('Scanning %d files' % len(files))
-            self.results.groups = self.scanner.get_dupe_groups(files, j)
+            self.results.groups = self.scanner.get_dupe_groups(files, self.ignore_list, j)

        if not self.directories.has_any_file():
            self.view.show_message(tr("The selected directories contain no scannable file."))
--- a/core/gui/ignore_list_dialog.py
+++ b/core/gui/ignore_list_dialog.py
@ -15,7 +15,7 @@ class IgnoreListDialog:

    def __init__(self, app):
        self.app = app
-        self.ignore_list = self.app.scanner.ignore_list
+        self.ignore_list = self.app.ignore_list
        self.ignore_list_table = IgnoreListTable(self)

    def clear(self):
--- a/core/scanner.py
+++ b/core/scanner.py
@ -14,7 +14,6 @@ from hscommon.util import dedupe, rem_file_ext, get_file_ext
 from hscommon.trans import tr

 from . import engine
-from .ignore import IgnoreList

 # It's quite ugly to have scan types from all editions all put in the same class, but because there's
 # there will be some nasty bugs popping up (ScanType is used in core when in should exclusively be
@ -71,7 +70,6 @@ def remove_dupe_paths(files):

 class Scanner:
    def __init__(self):
-        self.ignore_list = IgnoreList()
        self.discarded_file_count = 0

    def _getmatches(self, files, j):
@ -133,7 +131,7 @@ class Scanner:
        """
        raise NotImplementedError()

-    def get_dupe_groups(self, files, j=job.nulljob):
+    def get_dupe_groups(self, files, ignore_list=None, j=job.nulljob):
        j = j.start_subjob([8, 2])
        for f in (f for f in files if not hasattr(f, 'is_ref')):
            f.is_ref = False
@ -163,12 +161,12 @@ class Scanner:
            matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)]
        matches = [m for m in matches if m.first.path.exists() and m.second.path.exists()]
        matches = [m for m in matches if not (m.first.is_ref and m.second.is_ref)]
-        if self.ignore_list:
+        if ignore_list:
            j = j.start_subjob(2)
            iter_matches = j.iter_with_progress(matches, tr("Processed %d/%d matches against the ignore list"))
            matches = [
                m for m in iter_matches
-                if not self.ignore_list.AreIgnored(str(m.first.path), str(m.second.path))
+                if not ignore_list.AreIgnored(str(m.first.path), str(m.second.path))
            ]
        logging.info('Grouping matches')
        groups = engine.get_groups(matches, j)
--- a/core/tests/app_test.py
+++ b/core/tests/app_test.py
@ -1,6 +1,4 @@
-# Created By: Virgil Dupras
-# Created On: 2007-06-23
-# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
+# Copyright 2016 Hardcoded Software (http://www.hardcoded.net)
 #
 # This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
 # which should be included with this package. The terms are also available at
@ -14,10 +12,10 @@ from pytest import mark
 from hscommon.path import Path
 import hscommon.conflict
 import hscommon.util
-from hscommon.testutil import CallLogger, eq_, log_calls
+from hscommon.testutil import eq_, log_calls
 from hscommon.jobprogress.job import Job

-from .base import DupeGuru, TestApp
+from .base import TestApp
 from .results_test import GetTestGroups
 from .. import app, fs, engine
 from ..scanner import ScanType
@ -347,11 +345,11 @@ class TestCaseDupeGuruWithResults:
        app = self.app
        self.rtable.select([4]) #The dupe of the second, 2 sized group
        app.add_selected_to_ignore_list()
-        eq_(len(app.scanner.ignore_list), 1)
+        eq_(len(app.ignore_list), 1)
        self.rtable.select([1]) #first dupe of the 3 dupes group
        app.add_selected_to_ignore_list()
        #BOTH the ref and the other dupe should have been added
-        eq_(len(app.scanner.ignore_list), 3)
+        eq_(len(app.ignore_list), 3)

    def test_purgeIgnoreList(self, do_setup, tmpdir):
        app = self.app
@ -360,13 +358,13 @@ class TestCaseDupeGuruWithResults:
        open(p1, 'w').close()
        open(p2, 'w').close()
        dne = '/does_not_exist'
-        app.scanner.ignore_list.Ignore(dne,p1)
-        app.scanner.ignore_list.Ignore(p2,dne)
-        app.scanner.ignore_list.Ignore(p1,p2)
+        app.ignore_list.Ignore(dne,p1)
+        app.ignore_list.Ignore(p2,dne)
+        app.ignore_list.Ignore(p1,p2)
        app.purge_ignore_list()
-        eq_(1,len(app.scanner.ignore_list))
-        assert app.scanner.ignore_list.AreIgnored(p1,p2)
-        assert not app.scanner.ignore_list.AreIgnored(dne,p1)
+        eq_(1,len(app.ignore_list))
+        assert app.ignore_list.AreIgnored(p1,p2)
+        assert not app.ignore_list.AreIgnored(dne,p1)

    def test_only_unicode_is_added_to_ignore_list(self, do_setup):
        def FakeIgnore(first,second):
@ -376,7 +374,7 @@ class TestCaseDupeGuruWithResults:
                self.fail()

        app = self.app
-        app.scanner.ignore_list.Ignore = FakeIgnore
+        app.ignore_list.Ignore = FakeIgnore
        self.rtable.select([4])
        app.add_selected_to_ignore_list()

--- a/core/tests/scanner_test.py
+++ b/core/tests/scanner_test.py
@ -1,6 +1,4 @@
-# Created By: Virgil Dupras
-# Created On: 2006/03/03
-# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
+# Copyright 2016 Hardcoded Software (http://www.hardcoded.net)
 #
 # This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
 # which should be included with this package. The terms are also available at
@ -13,7 +11,7 @@ from hscommon.testutil import eq_
 from .. import fs
 from ..engine import getwords, Match
 from ..ignore import IgnoreList
-from ..scanner import *
+from ..scanner import Scanner, ScanType

 class NamedObject:
    def __init__(self, name="foobar", size=1, path=None):
@ -50,7 +48,6 @@ def test_default_settings(fake_fileexists):
    eq_(s.mix_file_kind, True)
    eq_(s.word_weighting, False)
    eq_(s.match_similar_words, False)
-    assert isinstance(s.ignore_list, IgnoreList)

 def test_simple_with_default_settings(fake_fileexists):
    s = Scanner()
@ -142,7 +139,7 @@ def test_content_scan_doesnt_put_md5_in_words_at_the_end(fake_fileexists):
    f[0].md5 = f[0].md5partial = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
    f[1].md5 = f[1].md5partial = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
    r = s.get_dupe_groups(f)
-    g = r[0]
+    r[0]

 def test_extension_is_not_counted_in_filename_scan(fake_fileexists):
    s = Scanner()
@ -160,7 +157,7 @@ def test_job(fake_fileexists):
    s = Scanner()
    log = []
    f = [no('foo bar'), no('foo bar'), no('foo bleh')]
-    r = s.get_dupe_groups(f, job.Job(1, do_progress))
+    s.get_dupe_groups(f, j=job.Job(1, do_progress))
    eq_(log[0], 0)
    eq_(log[-1], 100)

@ -346,9 +343,10 @@ def test_ignore_list(fake_fileexists):
    f1.path = Path('dir1/foobar')
    f2.path = Path('dir2/foobar')
    f3.path = Path('dir3/foobar')
-    s.ignore_list.Ignore(str(f1.path),str(f2.path))
-    s.ignore_list.Ignore(str(f1.path),str(f3.path))
-    r = s.get_dupe_groups([f1,f2,f3])
+    ignore_list = IgnoreList()
+    ignore_list.Ignore(str(f1.path),str(f2.path))
+    ignore_list.Ignore(str(f1.path),str(f3.path))
+    r = s.get_dupe_groups([f1,f2,f3], ignore_list=ignore_list)
    eq_(len(r), 1)
    g = r[0]
    eq_(len(g.dupes), 1)
@ -368,9 +366,10 @@ def test_ignore_list_checks_for_unicode(fake_fileexists):
    f1.path = Path('foo1\u00e9')
    f2.path = Path('foo2\u00e9')
    f3.path = Path('foo3\u00e9')
-    s.ignore_list.Ignore(str(f1.path),str(f2.path))
-    s.ignore_list.Ignore(str(f1.path),str(f3.path))
-    r = s.get_dupe_groups([f1,f2,f3])
+    ignore_list = IgnoreList()
+    ignore_list.Ignore(str(f1.path),str(f2.path))
+    ignore_list.Ignore(str(f1.path),str(f3.path))
+    r = s.get_dupe_groups([f1,f2,f3], ignore_list=ignore_list)
    eq_(len(r), 1)
    g = r[0]
    eq_(len(g.dupes), 1)