From 9ed4b7abf048c1c970a74fa916c0c19903193100 Mon Sep 17 00:00:00 2001 From: Virgil Dupras Date: Sun, 29 May 2016 14:13:19 -0400 Subject: [PATCH] refactoring: take ignore_list out of Scanner class It's now `DupeGuru` that holds it and passes it to `get_dupe_groups()`, the only place where it's actually used in `Scanner`. This will make the SE/ME/PE merge easier by allowing us to instantiate the Scanner on-the-fly since it doesn't hold state anymore. --- core/app.py | 16 ++++++++-------- core/gui/ignore_list_dialog.py | 20 ++++++++++---------- core/scanner.py | 8 +++----- core/tests/app_test.py | 26 ++++++++++++-------------- core/tests/scanner_test.py | 25 ++++++++++++------------- 5 files changed, 45 insertions(+), 50 deletions(-) diff --git a/core/app.py b/core/app.py index aacdfa8d..9ba88703 100644 --- a/core/app.py +++ b/core/app.py @@ -1,6 +1,4 @@ -# Created By: Virgil Dupras -# Created On: 2006/11/11 -# Copyright 2015 Hardcoded Software (http://www.hardcoded.net) +# Copyright 2016 Hardcoded Software (http://www.hardcoded.net) # # This software is licensed under the "GPLv3" License as described in the "LICENSE" file, # which should be included with this package. The terms are also available at @@ -26,6 +24,7 @@ from hscommon.plat import ISWINDOWS from hscommon import desktop from . import directories, results, scanner, export, fs +from .ignore import IgnoreList from .gui.deletion_options import DeletionOptions from .gui.details_panel import DetailsPanel from .gui.directory_tree import DirectoryTree @@ -168,6 +167,7 @@ class DupeGuru(Broadcaster): os.makedirs(self.appdata) self.directories = directories.Directories() self.results = results.Results(self) + self.ignore_list = IgnoreList() self.scanner = self.SCANNER_CLASS() self.options = { 'escape_filter_regexp': True, @@ -373,7 +373,7 @@ class DupeGuru(Broadcaster): g = self.results.get_group_of_duplicate(dupe) for other in g: if other is not dupe: - self.scanner.ignore_list.Ignore(str(other.path), str(dupe.path)) + self.ignore_list.Ignore(str(other.path), str(dupe.path)) self.remove_duplicates(dupes) self.ignore_list_dialog.refresh() @@ -531,7 +531,7 @@ class DupeGuru(Broadcaster): self.directories.load_from_file(op.join(self.appdata, 'last_directories.xml')) self.notify('directories_changed') p = op.join(self.appdata, 'ignore_list.xml') - self.scanner.ignore_list.load_from_xml(p) + self.ignore_list.load_from_xml(p) self.ignore_list_dialog.refresh() def load_from(self, filename): @@ -620,7 +620,7 @@ class DupeGuru(Broadcaster): def purge_ignore_list(self): """Remove files that don't exist from :attr:`ignore_list`. """ - self.scanner.ignore_list.Filter(lambda f, s: op.exists(f) and op.exists(s)) + self.ignore_list.Filter(lambda f, s: op.exists(f) and op.exists(s)) self.ignore_list_dialog.refresh() def remove_directories(self, indexes): @@ -713,7 +713,7 @@ class DupeGuru(Broadcaster): os.makedirs(self.appdata) self.directories.save_to_file(op.join(self.appdata, 'last_directories.xml')) p = op.join(self.appdata, 'ignore_list.xml') - self.scanner.ignore_list.save_to_xml(p) + self.ignore_list.save_to_xml(p) self.notify('save_session') def save_as(self, filename): @@ -740,7 +740,7 @@ class DupeGuru(Broadcaster): if self.options['ignore_hardlink_matches']: files = self._remove_hardlink_dupes(files) logging.info('Scanning %d files' % len(files)) - self.results.groups = self.scanner.get_dupe_groups(files, j) + self.results.groups = self.scanner.get_dupe_groups(files, self.ignore_list, j) if not self.directories.has_any_file(): self.view.show_message(tr("The selected directories contain no scannable file.")) diff --git a/core/gui/ignore_list_dialog.py b/core/gui/ignore_list_dialog.py index 64b4ffcd..887acc2f 100644 --- a/core/gui/ignore_list_dialog.py +++ b/core/gui/ignore_list_dialog.py @@ -1,8 +1,8 @@ # Created On: 2012/03/13 # Copyright 2015 Hardcoded Software (http://www.hardcoded.net) -# -# This software is licensed under the "GPLv3" License as described in the "LICENSE" file, -# which should be included with this package. The terms are also available at +# +# This software is licensed under the "GPLv3" License as described in the "LICENSE" file, +# which should be included with this package. The terms are also available at # http://www.gnu.org/licenses/gpl-3.0.html from hscommon.trans import tr @@ -12,12 +12,12 @@ class IgnoreListDialog: #--- View interface # show() # - + def __init__(self, app): self.app = app - self.ignore_list = self.app.scanner.ignore_list + self.ignore_list = self.app.ignore_list self.ignore_list_table = IgnoreListTable(self) - + def clear(self): if not self.ignore_list: return @@ -25,15 +25,15 @@ class IgnoreListDialog: if self.app.view.ask_yes_no(msg): self.ignore_list.Clear() self.refresh() - + def refresh(self): self.ignore_list_table.refresh() - + def remove_selected(self): for row in self.ignore_list_table.selected_rows: self.ignore_list.remove(row.path1_original, row.path2_original) self.refresh() - + def show(self): self.view.show() - + diff --git a/core/scanner.py b/core/scanner.py index 32129486..434bb671 100644 --- a/core/scanner.py +++ b/core/scanner.py @@ -14,7 +14,6 @@ from hscommon.util import dedupe, rem_file_ext, get_file_ext from hscommon.trans import tr from . import engine -from .ignore import IgnoreList # It's quite ugly to have scan types from all editions all put in the same class, but because there's # there will be some nasty bugs popping up (ScanType is used in core when in should exclusively be @@ -71,7 +70,6 @@ def remove_dupe_paths(files): class Scanner: def __init__(self): - self.ignore_list = IgnoreList() self.discarded_file_count = 0 def _getmatches(self, files, j): @@ -133,7 +131,7 @@ class Scanner: """ raise NotImplementedError() - def get_dupe_groups(self, files, j=job.nulljob): + def get_dupe_groups(self, files, ignore_list=None, j=job.nulljob): j = j.start_subjob([8, 2]) for f in (f for f in files if not hasattr(f, 'is_ref')): f.is_ref = False @@ -163,12 +161,12 @@ class Scanner: matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)] matches = [m for m in matches if m.first.path.exists() and m.second.path.exists()] matches = [m for m in matches if not (m.first.is_ref and m.second.is_ref)] - if self.ignore_list: + if ignore_list: j = j.start_subjob(2) iter_matches = j.iter_with_progress(matches, tr("Processed %d/%d matches against the ignore list")) matches = [ m for m in iter_matches - if not self.ignore_list.AreIgnored(str(m.first.path), str(m.second.path)) + if not ignore_list.AreIgnored(str(m.first.path), str(m.second.path)) ] logging.info('Grouping matches') groups = engine.get_groups(matches, j) diff --git a/core/tests/app_test.py b/core/tests/app_test.py index 9c119d96..cbc39a71 100644 --- a/core/tests/app_test.py +++ b/core/tests/app_test.py @@ -1,6 +1,4 @@ -# Created By: Virgil Dupras -# Created On: 2007-06-23 -# Copyright 2015 Hardcoded Software (http://www.hardcoded.net) +# Copyright 2016 Hardcoded Software (http://www.hardcoded.net) # # This software is licensed under the "GPLv3" License as described in the "LICENSE" file, # which should be included with this package. The terms are also available at @@ -14,10 +12,10 @@ from pytest import mark from hscommon.path import Path import hscommon.conflict import hscommon.util -from hscommon.testutil import CallLogger, eq_, log_calls +from hscommon.testutil import eq_, log_calls from hscommon.jobprogress.job import Job -from .base import DupeGuru, TestApp +from .base import TestApp from .results_test import GetTestGroups from .. import app, fs, engine from ..scanner import ScanType @@ -347,11 +345,11 @@ class TestCaseDupeGuruWithResults: app = self.app self.rtable.select([4]) #The dupe of the second, 2 sized group app.add_selected_to_ignore_list() - eq_(len(app.scanner.ignore_list), 1) + eq_(len(app.ignore_list), 1) self.rtable.select([1]) #first dupe of the 3 dupes group app.add_selected_to_ignore_list() #BOTH the ref and the other dupe should have been added - eq_(len(app.scanner.ignore_list), 3) + eq_(len(app.ignore_list), 3) def test_purgeIgnoreList(self, do_setup, tmpdir): app = self.app @@ -360,13 +358,13 @@ class TestCaseDupeGuruWithResults: open(p1, 'w').close() open(p2, 'w').close() dne = '/does_not_exist' - app.scanner.ignore_list.Ignore(dne,p1) - app.scanner.ignore_list.Ignore(p2,dne) - app.scanner.ignore_list.Ignore(p1,p2) + app.ignore_list.Ignore(dne,p1) + app.ignore_list.Ignore(p2,dne) + app.ignore_list.Ignore(p1,p2) app.purge_ignore_list() - eq_(1,len(app.scanner.ignore_list)) - assert app.scanner.ignore_list.AreIgnored(p1,p2) - assert not app.scanner.ignore_list.AreIgnored(dne,p1) + eq_(1,len(app.ignore_list)) + assert app.ignore_list.AreIgnored(p1,p2) + assert not app.ignore_list.AreIgnored(dne,p1) def test_only_unicode_is_added_to_ignore_list(self, do_setup): def FakeIgnore(first,second): @@ -376,7 +374,7 @@ class TestCaseDupeGuruWithResults: self.fail() app = self.app - app.scanner.ignore_list.Ignore = FakeIgnore + app.ignore_list.Ignore = FakeIgnore self.rtable.select([4]) app.add_selected_to_ignore_list() diff --git a/core/tests/scanner_test.py b/core/tests/scanner_test.py index f8b38333..4fcf7009 100644 --- a/core/tests/scanner_test.py +++ b/core/tests/scanner_test.py @@ -1,6 +1,4 @@ -# Created By: Virgil Dupras -# Created On: 2006/03/03 -# Copyright 2015 Hardcoded Software (http://www.hardcoded.net) +# Copyright 2016 Hardcoded Software (http://www.hardcoded.net) # # This software is licensed under the "GPLv3" License as described in the "LICENSE" file, # which should be included with this package. The terms are also available at @@ -13,7 +11,7 @@ from hscommon.testutil import eq_ from .. import fs from ..engine import getwords, Match from ..ignore import IgnoreList -from ..scanner import * +from ..scanner import Scanner, ScanType class NamedObject: def __init__(self, name="foobar", size=1, path=None): @@ -50,7 +48,6 @@ def test_default_settings(fake_fileexists): eq_(s.mix_file_kind, True) eq_(s.word_weighting, False) eq_(s.match_similar_words, False) - assert isinstance(s.ignore_list, IgnoreList) def test_simple_with_default_settings(fake_fileexists): s = Scanner() @@ -142,7 +139,7 @@ def test_content_scan_doesnt_put_md5_in_words_at_the_end(fake_fileexists): f[0].md5 = f[0].md5partial = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f' f[1].md5 = f[1].md5partial = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f' r = s.get_dupe_groups(f) - g = r[0] + r[0] def test_extension_is_not_counted_in_filename_scan(fake_fileexists): s = Scanner() @@ -160,7 +157,7 @@ def test_job(fake_fileexists): s = Scanner() log = [] f = [no('foo bar'), no('foo bar'), no('foo bleh')] - r = s.get_dupe_groups(f, job.Job(1, do_progress)) + s.get_dupe_groups(f, j=job.Job(1, do_progress)) eq_(log[0], 0) eq_(log[-1], 100) @@ -346,9 +343,10 @@ def test_ignore_list(fake_fileexists): f1.path = Path('dir1/foobar') f2.path = Path('dir2/foobar') f3.path = Path('dir3/foobar') - s.ignore_list.Ignore(str(f1.path),str(f2.path)) - s.ignore_list.Ignore(str(f1.path),str(f3.path)) - r = s.get_dupe_groups([f1,f2,f3]) + ignore_list = IgnoreList() + ignore_list.Ignore(str(f1.path),str(f2.path)) + ignore_list.Ignore(str(f1.path),str(f3.path)) + r = s.get_dupe_groups([f1,f2,f3], ignore_list=ignore_list) eq_(len(r), 1) g = r[0] eq_(len(g.dupes), 1) @@ -368,9 +366,10 @@ def test_ignore_list_checks_for_unicode(fake_fileexists): f1.path = Path('foo1\u00e9') f2.path = Path('foo2\u00e9') f3.path = Path('foo3\u00e9') - s.ignore_list.Ignore(str(f1.path),str(f2.path)) - s.ignore_list.Ignore(str(f1.path),str(f3.path)) - r = s.get_dupe_groups([f1,f2,f3]) + ignore_list = IgnoreList() + ignore_list.Ignore(str(f1.path),str(f2.path)) + ignore_list.Ignore(str(f1.path),str(f3.path)) + r = s.get_dupe_groups([f1,f2,f3], ignore_list=ignore_list) eq_(len(r), 1) g = r[0] eq_(len(g.dupes), 1)