dgpe: adjusted to the MatchFactory removal.

--HG-- extra : convert_revision : svn%3Ac306627e-7827-47d3-bdf0-9a457c9553a1/trunk%40193
2026-06-19 13:37:52 +00:00 · 2009-10-18 09:26:04 +00:00
parent 7228adf433
commit 11977c6533
6 changed files with 107 additions and 101 deletions
--- a/base/py/scanner.py
+++ b/base/py/scanner.py
@@ -99,7 +99,6 @@ class Scanner(object):
            g.prioritize(self._key_func, self._tie_breaker)
        return groups
    
-    match_factory        = None
    match_similar_words  = False
    min_match_percentage = 80
    mix_file_kind        = True
--- a/pe/py/app_cocoa.py
+++ b/pe/py/app_cocoa.py
@@ -27,8 +27,9 @@ from hsutil.path import Path
 from hsutil.cocoa import as_fetch

 from dupeguru import app_cocoa, directories
-from . import data, matchbase
+from . import data
 from .cache import string_to_colors, Cache
+from .scanner import ScannerPE

 mainBundle = NSBundle.mainBundle()
 PictureBlocks = mainBundle.classNamed_('PictureBlocks')
@@ -126,11 +127,11 @@ class IPhotoLibrary(fs.Directory):
 class DupeGuruPE(app_cocoa.DupeGuru):
    def __init__(self):
        app_cocoa.DupeGuru.__init__(self, data, 'dupeGuru Picture Edition', appid=5)
-        self.scanner.match_factory = matchbase.AsyncMatchFactory()
+        self.scanner = ScannerPE()
        self.directories.dirclass = Directory
        self.directories.special_dirclasses[Path('iPhoto Library')] = lambda _, __: self._create_iphoto_library()
        p = op.join(self.appdata, 'cached_pictures.db')
-        self.scanner.match_factory.cached_blocks = Cache(p)
+        self.scanner.cached_blocks = Cache(p)
    
    def _create_iphoto_library(self):
        ud = NSUserDefaults.standardUserDefaults()
--- a/pe/py/matchbase.py
+++ b/pe/py/matchbase.py
@@ -20,58 +20,42 @@ from .block import avgdiff, DifferentBlockCountError, NoBlocksError
 from .cache import Cache

 MIN_ITERATIONS = 3
+BLOCK_COUNT_PER_SIDE = 15

 # Enough so that we're sure that the main thread will not wait after a result.get() call
 # cpucount*2 should be enough to be sure that the spawned process will not wait after the results
 # collection made by the main process.
 RESULTS_QUEUE_LIMIT = multiprocessing.cpu_count() * 2

-def get_match(first,second,percentage):
+def prepare_pictures(pictures, cached_blocks, j=job.nulljob):
+    # The MemoryError handlers in there use logging without first caring about whether or not
+    # there is enough memory left to carry on the operation because it is assumed that the
+    # MemoryError happens when trying to read an image file, which is freed from memory by the
+    # time that MemoryError is raised.
+    prepared = [] # only pictures for which there was no error getting blocks
+    try:
+        for picture in j.iter_with_progress(pictures, 'Analyzed %d/%d pictures'):
+            picture.dimensions
+            picture.unicode_path = unicode(picture.path)
+            try:
+                if picture.unicode_path not in cached_blocks:
+                    blocks = picture.get_blocks(BLOCK_COUNT_PER_SIDE)
+                    cached_blocks[picture.unicode_path] = blocks
+                prepared.append(picture)
+            except IOError as e:
+                logging.warning(unicode(e))
+            except MemoryError:
+                logging.warning(u'Ran out of memory while reading %s of size %d' % (picture.unicode_path, picture.size))
+                if picture.size < 10 * 1024 * 1024: # We're really running out of memory
+                    raise
+    except MemoryError:
+        logging.warning('Ran out of memory while preparing pictures')
+    return prepared
+
+def get_match(first, second, percentage):
    if percentage < 0:
        percentage = 0
-    return Match(first,second,percentage)
-
-class MatchFactory(object):
-    cached_blocks = None
-    block_count_per_side = 15
-    threshold = 75
-    match_scaled = False
-    
-    def _do_getmatches(self, files, j):
-        raise NotImplementedError()
-    
-    def getmatches(self, files, j=job.nulljob):
-        # The MemoryError handlers in there use logging without first caring about whether or not
-        # there is enough memory left to carry on the operation because it is assumed that the
-        # MemoryError happens when trying to read an image file, which is freed from memory by the
-        # time that MemoryError is raised.
-        j = j.start_subjob([3, 7])
-        logging.info('Preparing %d files' % len(files))
-        prepared = self.prepare_files(files, j)
-        logging.info('Finished preparing %d files' % len(prepared))
-        return self._do_getmatches(prepared, j)
-    
-    def prepare_files(self, files, j=job.nulljob):
-        prepared = [] # only files for which there was no error getting blocks
-        try:
-            for picture in j.iter_with_progress(files, 'Analyzed %d/%d pictures'):
-                picture.dimensions
-                picture.unicode_path = unicode(picture.path)
-                try:
-                    if picture.unicode_path not in self.cached_blocks:
-                        blocks = picture.get_blocks(self.block_count_per_side)
-                        self.cached_blocks[picture.unicode_path] = blocks
-                    prepared.append(picture)
-                except IOError as e:
-                    logging.warning(unicode(e))
-                except MemoryError:
-                    logging.warning(u'Ran out of memory while reading %s of size %d' % (picture.unicode_path, picture.size))
-                    if picture.size < 10 * 1024 * 1024: # We're really running out of memory
-                        raise
-        except MemoryError:
-            logging.warning('Ran out of memory while preparing files')
-        return prepared
-    
+    return Match(first, second, percentage)

 def async_compare(ref_id, other_ids, dbname, threshold):
    cache = Cache(dbname, threaded=False)
@@ -89,53 +73,53 @@ def async_compare(ref_id, other_ids, dbname, threshold):
            results.append((ref_id, other_id, percentage))
    cache.con.close()
    return results
-
-class AsyncMatchFactory(MatchFactory):
-    def _do_getmatches(self, pictures, j):
-        def empty_out_queue(queue, into):
-            try:
-                while True:
-                    into.append(queue.get(block=False))
-            except Empty:
-                pass
-        
-        j = j.start_subjob([9, 1], 'Preparing for matching')
-        cache = self.cached_blocks
-        id2picture = {}
-        dimensions2pictures = defaultdict(set)
-        for picture in pictures:
-            try:
-                picture.cache_id = cache.get_id(picture.unicode_path)
-                id2picture[picture.cache_id] = picture
-                if not self.match_scaled:
-                    dimensions2pictures[picture.dimensions].add(picture)
-            except ValueError:
-                pass
-        pictures = [p for p in pictures if hasattr(p, 'cache_id')]
-        pool = multiprocessing.Pool()
-        async_results = []
-        matches = []
-        pictures_copy = set(pictures)
-        for ref in j.iter_with_progress(pictures, 'Matched %d/%d pictures'):
-            others = pictures_copy if self.match_scaled else dimensions2pictures[ref.dimensions]
-            others.remove(ref)
-            if others:
-                cache_ids = [f.cache_id for f in others]
-                args = (ref.cache_id, cache_ids, self.cached_blocks.dbname, self.threshold)
-                async_results.append(pool.apply_async(async_compare, args))
-            if len(async_results) > RESULTS_QUEUE_LIMIT:
-                result = async_results.pop(0)
-                matches.extend(result.get())
-        
-        result = []
-        for ref_id, other_id, percentage in j.iter_with_progress(matches, 'Verified %d/%d matches', every=10):
-            ref = id2picture[ref_id]
-            other = id2picture[other_id]
-            if percentage == 100 and ref.md5 != other.md5:
-                percentage = 99
-            if percentage >= self.threshold:
-                result.append(get_match(ref, other, percentage))
-        return result
    
+def getmatches(pictures, cached_blocks, threshold=75, match_scaled=False, j=job.nulljob):
+    def empty_out_queue(queue, into):
+        try:
+            while True:
+                into.append(queue.get(block=False))
+        except Empty:
+            pass
+    
+    j = j.start_subjob([3, 7])
+    pictures = prepare_pictures(pictures, cached_blocks, j)
+    j = j.start_subjob([9, 1], 'Preparing for matching')
+    cache = cached_blocks
+    id2picture = {}
+    dimensions2pictures = defaultdict(set)
+    for picture in pictures:
+        try:
+            picture.cache_id = cache.get_id(picture.unicode_path)
+            id2picture[picture.cache_id] = picture
+            if not match_scaled:
+                dimensions2pictures[picture.dimensions].add(picture)
+        except ValueError:
+            pass
+    pictures = [p for p in pictures if hasattr(p, 'cache_id')]
+    pool = multiprocessing.Pool()
+    async_results = []
+    matches = []
+    pictures_copy = set(pictures)
+    for ref in j.iter_with_progress(pictures, 'Matched %d/%d pictures'):
+        others = pictures_copy if match_scaled else dimensions2pictures[ref.dimensions]
+        others.remove(ref)
+        if others:
+            cache_ids = [f.cache_id for f in others]
+            args = (ref.cache_id, cache_ids, cached_blocks.dbname, threshold)
+            async_results.append(pool.apply_async(async_compare, args))
+        if len(async_results) > RESULTS_QUEUE_LIMIT:
+            result = async_results.pop(0)
+            matches.extend(result.get())
+    
+    result = []
+    for ref_id, other_id, percentage in j.iter_with_progress(matches, 'Verified %d/%d matches', every=10):
+        ref = id2picture[ref_id]
+        other = id2picture[other_id]
+        if percentage == 100 and ref.md5 != other.md5:
+            percentage = 99
+        if percentage >= threshold:
+            result.append(get_match(ref, other, percentage))
+    return result

 multiprocessing.freeze_support()
--- a/pe/py/scanner.py
+++ b/pe/py/scanner.py
@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+# Created By: Virgil Dupras
+# Created On: 2009-10-18
+# $Id$
+# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
+# 
+# This software is licensed under the "HS" License as described in the "LICENSE" file, 
+# which should be included with this package. The terms are also available at 
+# http://www.hardcoded.net/licenses/hs_license
+
+from dupeguru.scanner import Scanner
+
+from . import matchbase
+
+class ScannerPE(Scanner):
+    cached_blocks = None
+    match_scaled = False
+    threshold = 75
+    
+    def _getmatches(self, files, j):
+        return matchbase.getmatches(files, self.cached_blocks, self.threshold, self.match_scaled, j)
+    
--- a/pe/qt/app.py
+++ b/pe/qt/app.py
@@ -17,7 +17,7 @@ from hsutil.str import get_file_ext

 from dupeguru_pe import data as data_pe
 from dupeguru_pe.cache import Cache
-from dupeguru_pe.matchbase import AsyncMatchFactory
+from dupeguru_pe.scanner import ScannerPE

 from block import getblocks
 from base.app import DupeGuru as DupeGuruBase
@@ -63,15 +63,15 @@ class DupeGuru(DupeGuruBase):
        DupeGuruBase.__init__(self, data_pe, appid=5)
    
    def _setup(self):
-        self.scanner.match_factory = AsyncMatchFactory()
+        self.scanner = ScannerPE()
        self.directories.dirclass = Directory
-        self.scanner.match_factory.cached_blocks = Cache(op.join(self.appdata, 'cached_pictures.db'))
+        self.scanner.cached_blocks = Cache(op.join(self.appdata, 'cached_pictures.db'))
        DupeGuruBase._setup(self)
    
    def _update_options(self):
        DupeGuruBase._update_options(self)
-        self.scanner.match_factory.match_scaled = self.prefs.match_scaled
-        self.scanner.match_factory.threshold = self.prefs.filter_hardness
+        self.scanner.match_scaled = self.prefs.match_scaled
+        self.scanner.threshold = self.prefs.filter_hardness
    
    def _create_details_dialog(self, parent):
        return DetailsDialog(parent, self)
--- a/pe/qt/main_window.py
+++ b/pe/qt/main_window.py
@@ -23,6 +23,6 @@ class MainWindow(MainWindowBase):
        title = "Clear Picture Cache"
        msg = "Do you really want to remove all your cached picture analysis?"
        if self._confirm(title, msg, QMessageBox.No):
-            self.app.scanner.match_factory.cached_blocks.clear()
+            self.app.scanner.cached_blocks.clear()
            QMessageBox.information(self, title, "Picture cache cleared.")