refactoring: take ignore_list out of Scanner class

It's now `DupeGuru` that holds it and passes it to `get_dupe_groups()`,
the only place where it's actually used in `Scanner`.

This will make the SE/ME/PE merge easier by allowing us to instantiate
the Scanner on-the-fly since it doesn't hold state anymore.
This commit is contained in:
Virgil Dupras 2016-05-29 14:13:19 -04:00
parent a0a90e8ef8
commit 9ed4b7abf0
5 changed files with 45 additions and 50 deletions

View File

@ -1,6 +1,4 @@
# Created By: Virgil Dupras
# Created On: 2006/11/11
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
# Copyright 2016 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
@ -26,6 +24,7 @@ from hscommon.plat import ISWINDOWS
from hscommon import desktop
from . import directories, results, scanner, export, fs
from .ignore import IgnoreList
from .gui.deletion_options import DeletionOptions
from .gui.details_panel import DetailsPanel
from .gui.directory_tree import DirectoryTree
@ -168,6 +167,7 @@ class DupeGuru(Broadcaster):
os.makedirs(self.appdata)
self.directories = directories.Directories()
self.results = results.Results(self)
self.ignore_list = IgnoreList()
self.scanner = self.SCANNER_CLASS()
self.options = {
'escape_filter_regexp': True,
@ -373,7 +373,7 @@ class DupeGuru(Broadcaster):
g = self.results.get_group_of_duplicate(dupe)
for other in g:
if other is not dupe:
self.scanner.ignore_list.Ignore(str(other.path), str(dupe.path))
self.ignore_list.Ignore(str(other.path), str(dupe.path))
self.remove_duplicates(dupes)
self.ignore_list_dialog.refresh()
@ -531,7 +531,7 @@ class DupeGuru(Broadcaster):
self.directories.load_from_file(op.join(self.appdata, 'last_directories.xml'))
self.notify('directories_changed')
p = op.join(self.appdata, 'ignore_list.xml')
self.scanner.ignore_list.load_from_xml(p)
self.ignore_list.load_from_xml(p)
self.ignore_list_dialog.refresh()
def load_from(self, filename):
@ -620,7 +620,7 @@ class DupeGuru(Broadcaster):
def purge_ignore_list(self):
"""Remove files that don't exist from :attr:`ignore_list`.
"""
self.scanner.ignore_list.Filter(lambda f, s: op.exists(f) and op.exists(s))
self.ignore_list.Filter(lambda f, s: op.exists(f) and op.exists(s))
self.ignore_list_dialog.refresh()
def remove_directories(self, indexes):
@ -713,7 +713,7 @@ class DupeGuru(Broadcaster):
os.makedirs(self.appdata)
self.directories.save_to_file(op.join(self.appdata, 'last_directories.xml'))
p = op.join(self.appdata, 'ignore_list.xml')
self.scanner.ignore_list.save_to_xml(p)
self.ignore_list.save_to_xml(p)
self.notify('save_session')
def save_as(self, filename):
@ -740,7 +740,7 @@ class DupeGuru(Broadcaster):
if self.options['ignore_hardlink_matches']:
files = self._remove_hardlink_dupes(files)
logging.info('Scanning %d files' % len(files))
self.results.groups = self.scanner.get_dupe_groups(files, j)
self.results.groups = self.scanner.get_dupe_groups(files, self.ignore_list, j)
if not self.directories.has_any_file():
self.view.show_message(tr("The selected directories contain no scannable file."))

View File

@ -1,8 +1,8 @@
# Created On: 2012/03/13
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
#
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.gnu.org/licenses/gpl-3.0.html
from hscommon.trans import tr
@ -12,12 +12,12 @@ class IgnoreListDialog:
#--- View interface
# show()
#
def __init__(self, app):
self.app = app
self.ignore_list = self.app.scanner.ignore_list
self.ignore_list = self.app.ignore_list
self.ignore_list_table = IgnoreListTable(self)
def clear(self):
if not self.ignore_list:
return
@ -25,15 +25,15 @@ class IgnoreListDialog:
if self.app.view.ask_yes_no(msg):
self.ignore_list.Clear()
self.refresh()
def refresh(self):
self.ignore_list_table.refresh()
def remove_selected(self):
for row in self.ignore_list_table.selected_rows:
self.ignore_list.remove(row.path1_original, row.path2_original)
self.refresh()
def show(self):
self.view.show()

View File

@ -14,7 +14,6 @@ from hscommon.util import dedupe, rem_file_ext, get_file_ext
from hscommon.trans import tr
from . import engine
from .ignore import IgnoreList
# It's quite ugly to have scan types from all editions all put in the same class, but because there's
# there will be some nasty bugs popping up (ScanType is used in core when in should exclusively be
@ -71,7 +70,6 @@ def remove_dupe_paths(files):
class Scanner:
def __init__(self):
self.ignore_list = IgnoreList()
self.discarded_file_count = 0
def _getmatches(self, files, j):
@ -133,7 +131,7 @@ class Scanner:
"""
raise NotImplementedError()
def get_dupe_groups(self, files, j=job.nulljob):
def get_dupe_groups(self, files, ignore_list=None, j=job.nulljob):
j = j.start_subjob([8, 2])
for f in (f for f in files if not hasattr(f, 'is_ref')):
f.is_ref = False
@ -163,12 +161,12 @@ class Scanner:
matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)]
matches = [m for m in matches if m.first.path.exists() and m.second.path.exists()]
matches = [m for m in matches if not (m.first.is_ref and m.second.is_ref)]
if self.ignore_list:
if ignore_list:
j = j.start_subjob(2)
iter_matches = j.iter_with_progress(matches, tr("Processed %d/%d matches against the ignore list"))
matches = [
m for m in iter_matches
if not self.ignore_list.AreIgnored(str(m.first.path), str(m.second.path))
if not ignore_list.AreIgnored(str(m.first.path), str(m.second.path))
]
logging.info('Grouping matches')
groups = engine.get_groups(matches, j)

View File

@ -1,6 +1,4 @@
# Created By: Virgil Dupras
# Created On: 2007-06-23
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
# Copyright 2016 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
@ -14,10 +12,10 @@ from pytest import mark
from hscommon.path import Path
import hscommon.conflict
import hscommon.util
from hscommon.testutil import CallLogger, eq_, log_calls
from hscommon.testutil import eq_, log_calls
from hscommon.jobprogress.job import Job
from .base import DupeGuru, TestApp
from .base import TestApp
from .results_test import GetTestGroups
from .. import app, fs, engine
from ..scanner import ScanType
@ -347,11 +345,11 @@ class TestCaseDupeGuruWithResults:
app = self.app
self.rtable.select([4]) #The dupe of the second, 2 sized group
app.add_selected_to_ignore_list()
eq_(len(app.scanner.ignore_list), 1)
eq_(len(app.ignore_list), 1)
self.rtable.select([1]) #first dupe of the 3 dupes group
app.add_selected_to_ignore_list()
#BOTH the ref and the other dupe should have been added
eq_(len(app.scanner.ignore_list), 3)
eq_(len(app.ignore_list), 3)
def test_purgeIgnoreList(self, do_setup, tmpdir):
app = self.app
@ -360,13 +358,13 @@ class TestCaseDupeGuruWithResults:
open(p1, 'w').close()
open(p2, 'w').close()
dne = '/does_not_exist'
app.scanner.ignore_list.Ignore(dne,p1)
app.scanner.ignore_list.Ignore(p2,dne)
app.scanner.ignore_list.Ignore(p1,p2)
app.ignore_list.Ignore(dne,p1)
app.ignore_list.Ignore(p2,dne)
app.ignore_list.Ignore(p1,p2)
app.purge_ignore_list()
eq_(1,len(app.scanner.ignore_list))
assert app.scanner.ignore_list.AreIgnored(p1,p2)
assert not app.scanner.ignore_list.AreIgnored(dne,p1)
eq_(1,len(app.ignore_list))
assert app.ignore_list.AreIgnored(p1,p2)
assert not app.ignore_list.AreIgnored(dne,p1)
def test_only_unicode_is_added_to_ignore_list(self, do_setup):
def FakeIgnore(first,second):
@ -376,7 +374,7 @@ class TestCaseDupeGuruWithResults:
self.fail()
app = self.app
app.scanner.ignore_list.Ignore = FakeIgnore
app.ignore_list.Ignore = FakeIgnore
self.rtable.select([4])
app.add_selected_to_ignore_list()

View File

@ -1,6 +1,4 @@
# Created By: Virgil Dupras
# Created On: 2006/03/03
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
# Copyright 2016 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
@ -13,7 +11,7 @@ from hscommon.testutil import eq_
from .. import fs
from ..engine import getwords, Match
from ..ignore import IgnoreList
from ..scanner import *
from ..scanner import Scanner, ScanType
class NamedObject:
def __init__(self, name="foobar", size=1, path=None):
@ -50,7 +48,6 @@ def test_default_settings(fake_fileexists):
eq_(s.mix_file_kind, True)
eq_(s.word_weighting, False)
eq_(s.match_similar_words, False)
assert isinstance(s.ignore_list, IgnoreList)
def test_simple_with_default_settings(fake_fileexists):
s = Scanner()
@ -142,7 +139,7 @@ def test_content_scan_doesnt_put_md5_in_words_at_the_end(fake_fileexists):
f[0].md5 = f[0].md5partial = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
f[1].md5 = f[1].md5partial = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
r = s.get_dupe_groups(f)
g = r[0]
r[0]
def test_extension_is_not_counted_in_filename_scan(fake_fileexists):
s = Scanner()
@ -160,7 +157,7 @@ def test_job(fake_fileexists):
s = Scanner()
log = []
f = [no('foo bar'), no('foo bar'), no('foo bleh')]
r = s.get_dupe_groups(f, job.Job(1, do_progress))
s.get_dupe_groups(f, j=job.Job(1, do_progress))
eq_(log[0], 0)
eq_(log[-1], 100)
@ -346,9 +343,10 @@ def test_ignore_list(fake_fileexists):
f1.path = Path('dir1/foobar')
f2.path = Path('dir2/foobar')
f3.path = Path('dir3/foobar')
s.ignore_list.Ignore(str(f1.path),str(f2.path))
s.ignore_list.Ignore(str(f1.path),str(f3.path))
r = s.get_dupe_groups([f1,f2,f3])
ignore_list = IgnoreList()
ignore_list.Ignore(str(f1.path),str(f2.path))
ignore_list.Ignore(str(f1.path),str(f3.path))
r = s.get_dupe_groups([f1,f2,f3], ignore_list=ignore_list)
eq_(len(r), 1)
g = r[0]
eq_(len(g.dupes), 1)
@ -368,9 +366,10 @@ def test_ignore_list_checks_for_unicode(fake_fileexists):
f1.path = Path('foo1\u00e9')
f2.path = Path('foo2\u00e9')
f3.path = Path('foo3\u00e9')
s.ignore_list.Ignore(str(f1.path),str(f2.path))
s.ignore_list.Ignore(str(f1.path),str(f3.path))
r = s.get_dupe_groups([f1,f2,f3])
ignore_list = IgnoreList()
ignore_list.Ignore(str(f1.path),str(f2.path))
ignore_list.Ignore(str(f1.path),str(f3.path))
r = s.get_dupe_groups([f1,f2,f3], ignore_list=ignore_list)
eq_(len(r), 1)
g = r[0]
eq_(len(g.dupes), 1)