dgpe: adjusted to the MatchFactory removal.

--HG-- extra : convert_revision : svn%3Ac306627e-7827-47d3-bdf0-9a457c9553a1/trunk%40193
2025-09-11 17:58:17 +00:00 · 2009-10-18 09:26:04 +00:00 · 2009-10-18 09:26:04 +00:00 · 11977c6533
commit 11977c6533
parent 7228adf433
6 changed files with 107 additions and 101 deletions
--- a/base/py/scanner.py
+++ b/base/py/scanner.py
@ -99,7 +99,6 @@ class Scanner(object):
            g.prioritize(self._key_func, self._tie_breaker)
        return groups
    match_factory        = None
    match_similar_words  = False
    min_match_percentage = 80
    mix_file_kind        = True
--- a/pe/py/app_cocoa.py
+++ b/pe/py/app_cocoa.py
@ -27,8 +27,9 @@ from hsutil.path import Path
 from hsutil.cocoa import as_fetch
 from dupeguru import app_cocoa, directories
-from . import data, matchbase
+from . import data
 from .cache import string_to_colors, Cache
 from .scanner import ScannerPE
 mainBundle = NSBundle.mainBundle()
 PictureBlocks = mainBundle.classNamed_('PictureBlocks')
@ -126,11 +127,11 @@ class IPhotoLibrary(fs.Directory):
 class DupeGuruPE(app_cocoa.DupeGuru):
    def __init__(self):
        app_cocoa.DupeGuru.__init__(self, data, 'dupeGuru Picture Edition', appid=5)
-        self.scanner.match_factory = matchbase.AsyncMatchFactory()
+        self.scanner = ScannerPE()
        self.directories.dirclass = Directory
        self.directories.special_dirclasses[Path('iPhoto Library')] = lambda _, __: self._create_iphoto_library()
        p = op.join(self.appdata, 'cached_pictures.db')
-        self.scanner.match_factory.cached_blocks = Cache(p)
+        self.scanner.cached_blocks = Cache(p)
    def _create_iphoto_library(self):
        ud = NSUserDefaults.standardUserDefaults()
--- a/pe/py/matchbase.py
+++ b/pe/py/matchbase.py
@ -20,47 +20,27 @@ from .block import avgdiff, DifferentBlockCountError, NoBlocksError
 from .cache import Cache
 MIN_ITERATIONS = 3
 BLOCK_COUNT_PER_SIDE = 15
 # Enough so that we're sure that the main thread will not wait after a result.get() call
 # cpucount*2 should be enough to be sure that the spawned process will not wait after the results
 # collection made by the main process.
 RESULTS_QUEUE_LIMIT = multiprocessing.cpu_count() * 2
-def get_match(first,second,percentage):
+def prepare_pictures(pictures, cached_blocks, j=job.nulljob):
    if percentage < 0:
        percentage = 0
    return Match(first,second,percentage)
 class MatchFactory(object):
    cached_blocks = None
    block_count_per_side = 15
    threshold = 75
    match_scaled = False
    def _do_getmatches(self, files, j):
        raise NotImplementedError()
    def getmatches(self, files, j=job.nulljob):
    # The MemoryError handlers in there use logging without first caring about whether or not
    # there is enough memory left to carry on the operation because it is assumed that the
    # MemoryError happens when trying to read an image file, which is freed from memory by the
    # time that MemoryError is raised.
-        j = j.start_subjob([3, 7])
+    prepared = [] # only pictures for which there was no error getting blocks
        logging.info('Preparing %d files' % len(files))
        prepared = self.prepare_files(files, j)
        logging.info('Finished preparing %d files' % len(prepared))
        return self._do_getmatches(prepared, j)
    def prepare_files(self, files, j=job.nulljob):
        prepared = [] # only files for which there was no error getting blocks
    try:
-            for picture in j.iter_with_progress(files, 'Analyzed %d/%d pictures'):
+        for picture in j.iter_with_progress(pictures, 'Analyzed %d/%d pictures'):
            picture.dimensions
            picture.unicode_path = unicode(picture.path)
            try:
-                    if picture.unicode_path not in self.cached_blocks:
+                if picture.unicode_path not in cached_blocks:
-                        blocks = picture.get_blocks(self.block_count_per_side)
+                    blocks = picture.get_blocks(BLOCK_COUNT_PER_SIDE)
-                        self.cached_blocks[picture.unicode_path] = blocks
+                    cached_blocks[picture.unicode_path] = blocks
                prepared.append(picture)
            except IOError as e:
                logging.warning(unicode(e))
@ -69,9 +49,13 @@ class MatchFactory(object):
                if picture.size < 10 * 1024 * 1024: # We're really running out of memory
                    raise
    except MemoryError:
-            logging.warning('Ran out of memory while preparing files')
+        logging.warning('Ran out of memory while preparing pictures')
    return prepared
 def get_match(first, second, percentage):
    if percentage < 0:
        percentage = 0
    return Match(first, second, percentage)
 def async_compare(ref_id, other_ids, dbname, threshold):
    cache = Cache(dbname, threaded=False)
@ -90,8 +74,7 @@ def async_compare(ref_id, other_ids, dbname, threshold):
    cache.con.close()
    return results
-class AsyncMatchFactory(MatchFactory):
+def getmatches(pictures, cached_blocks, threshold=75, match_scaled=False, j=job.nulljob):
    def _do_getmatches(self, pictures, j):
    def empty_out_queue(queue, into):
        try:
            while True:
@ -99,15 +82,17 @@ class AsyncMatchFactory(MatchFactory):
        except Empty:
            pass
    j = j.start_subjob([3, 7])
    pictures = prepare_pictures(pictures, cached_blocks, j)
    j = j.start_subjob([9, 1], 'Preparing for matching')
-        cache = self.cached_blocks
+    cache = cached_blocks
    id2picture = {}
    dimensions2pictures = defaultdict(set)
    for picture in pictures:
        try:
            picture.cache_id = cache.get_id(picture.unicode_path)
            id2picture[picture.cache_id] = picture
-                if not self.match_scaled:
+            if not match_scaled:
                dimensions2pictures[picture.dimensions].add(picture)
        except ValueError:
            pass
@ -117,11 +102,11 @@ class AsyncMatchFactory(MatchFactory):
    matches = []
    pictures_copy = set(pictures)
    for ref in j.iter_with_progress(pictures, 'Matched %d/%d pictures'):
-            others = pictures_copy if self.match_scaled else dimensions2pictures[ref.dimensions]
+        others = pictures_copy if match_scaled else dimensions2pictures[ref.dimensions]
        others.remove(ref)
        if others:
            cache_ids = [f.cache_id for f in others]
-                args = (ref.cache_id, cache_ids, self.cached_blocks.dbname, self.threshold)
+            args = (ref.cache_id, cache_ids, cached_blocks.dbname, threshold)
            async_results.append(pool.apply_async(async_compare, args))
        if len(async_results) > RESULTS_QUEUE_LIMIT:
            result = async_results.pop(0)
@ -133,9 +118,8 @@ class AsyncMatchFactory(MatchFactory):
        other = id2picture[other_id]
        if percentage == 100 and ref.md5 != other.md5:
            percentage = 99
-            if percentage >= self.threshold:
+        if percentage >= threshold:
            result.append(get_match(ref, other, percentage))
    return result
 multiprocessing.freeze_support()
--- a/pe/py/scanner.py
+++ b/pe/py/scanner.py
@ -0,0 +1,22 @@
 # -*- coding: utf-8 -*-
 # Created By: Virgil Dupras
 # Created On: 2009-10-18
 # $Id$
 # Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
 # 
 # This software is licensed under the "HS" License as described in the "LICENSE" file, 
 # which should be included with this package. The terms are also available at 
 # http://www.hardcoded.net/licenses/hs_license
 from dupeguru.scanner import Scanner
 from . import matchbase
 class ScannerPE(Scanner):
    cached_blocks = None
    match_scaled = False
    threshold = 75
    def _getmatches(self, files, j):
        return matchbase.getmatches(files, self.cached_blocks, self.threshold, self.match_scaled, j)
--- a/pe/qt/app.py
+++ b/pe/qt/app.py
@ -17,7 +17,7 @@ from hsutil.str import get_file_ext
 from dupeguru_pe import data as data_pe
 from dupeguru_pe.cache import Cache
-from dupeguru_pe.matchbase import AsyncMatchFactory
+from dupeguru_pe.scanner import ScannerPE
 from block import getblocks
 from base.app import DupeGuru as DupeGuruBase
@ -63,15 +63,15 @@ class DupeGuru(DupeGuruBase):
        DupeGuruBase.__init__(self, data_pe, appid=5)
    def _setup(self):
-        self.scanner.match_factory = AsyncMatchFactory()
+        self.scanner = ScannerPE()
        self.directories.dirclass = Directory
-        self.scanner.match_factory.cached_blocks = Cache(op.join(self.appdata, 'cached_pictures.db'))
+        self.scanner.cached_blocks = Cache(op.join(self.appdata, 'cached_pictures.db'))
        DupeGuruBase._setup(self)
    def _update_options(self):
        DupeGuruBase._update_options(self)
-        self.scanner.match_factory.match_scaled = self.prefs.match_scaled
+        self.scanner.match_scaled = self.prefs.match_scaled
-        self.scanner.match_factory.threshold = self.prefs.filter_hardness
+        self.scanner.threshold = self.prefs.filter_hardness
    def _create_details_dialog(self, parent):
        return DetailsDialog(parent, self)
--- a/pe/qt/main_window.py
+++ b/pe/qt/main_window.py
@ -23,6 +23,6 @@ class MainWindow(MainWindowBase):
        title = "Clear Picture Cache"
        msg = "Do you really want to remove all your cached picture analysis?"
        if self._confirm(title, msg, QMessageBox.No):
-            self.app.scanner.match_factory.cached_blocks.clear()
+            self.app.scanner.cached_blocks.clear()
            QMessageBox.information(self, title, "Picture cache cleared.")