mirror of
https://github.com/arsenetar/dupeguru.git
synced 2025-03-10 05:34:36 +00:00
dgpe: adjusted to the MatchFactory removal.
--HG-- extra : convert_revision : svn%3Ac306627e-7827-47d3-bdf0-9a457c9553a1/trunk%40193
This commit is contained in:
parent
7228adf433
commit
11977c6533
@ -99,7 +99,6 @@ class Scanner(object):
|
|||||||
g.prioritize(self._key_func, self._tie_breaker)
|
g.prioritize(self._key_func, self._tie_breaker)
|
||||||
return groups
|
return groups
|
||||||
|
|
||||||
match_factory = None
|
|
||||||
match_similar_words = False
|
match_similar_words = False
|
||||||
min_match_percentage = 80
|
min_match_percentage = 80
|
||||||
mix_file_kind = True
|
mix_file_kind = True
|
||||||
|
@ -27,8 +27,9 @@ from hsutil.path import Path
|
|||||||
from hsutil.cocoa import as_fetch
|
from hsutil.cocoa import as_fetch
|
||||||
|
|
||||||
from dupeguru import app_cocoa, directories
|
from dupeguru import app_cocoa, directories
|
||||||
from . import data, matchbase
|
from . import data
|
||||||
from .cache import string_to_colors, Cache
|
from .cache import string_to_colors, Cache
|
||||||
|
from .scanner import ScannerPE
|
||||||
|
|
||||||
mainBundle = NSBundle.mainBundle()
|
mainBundle = NSBundle.mainBundle()
|
||||||
PictureBlocks = mainBundle.classNamed_('PictureBlocks')
|
PictureBlocks = mainBundle.classNamed_('PictureBlocks')
|
||||||
@ -126,11 +127,11 @@ class IPhotoLibrary(fs.Directory):
|
|||||||
class DupeGuruPE(app_cocoa.DupeGuru):
|
class DupeGuruPE(app_cocoa.DupeGuru):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
app_cocoa.DupeGuru.__init__(self, data, 'dupeGuru Picture Edition', appid=5)
|
app_cocoa.DupeGuru.__init__(self, data, 'dupeGuru Picture Edition', appid=5)
|
||||||
self.scanner.match_factory = matchbase.AsyncMatchFactory()
|
self.scanner = ScannerPE()
|
||||||
self.directories.dirclass = Directory
|
self.directories.dirclass = Directory
|
||||||
self.directories.special_dirclasses[Path('iPhoto Library')] = lambda _, __: self._create_iphoto_library()
|
self.directories.special_dirclasses[Path('iPhoto Library')] = lambda _, __: self._create_iphoto_library()
|
||||||
p = op.join(self.appdata, 'cached_pictures.db')
|
p = op.join(self.appdata, 'cached_pictures.db')
|
||||||
self.scanner.match_factory.cached_blocks = Cache(p)
|
self.scanner.cached_blocks = Cache(p)
|
||||||
|
|
||||||
def _create_iphoto_library(self):
|
def _create_iphoto_library(self):
|
||||||
ud = NSUserDefaults.standardUserDefaults()
|
ud = NSUserDefaults.standardUserDefaults()
|
||||||
|
@ -20,47 +20,27 @@ from .block import avgdiff, DifferentBlockCountError, NoBlocksError
|
|||||||
from .cache import Cache
|
from .cache import Cache
|
||||||
|
|
||||||
MIN_ITERATIONS = 3
|
MIN_ITERATIONS = 3
|
||||||
|
BLOCK_COUNT_PER_SIDE = 15
|
||||||
|
|
||||||
# Enough so that we're sure that the main thread will not wait after a result.get() call
|
# Enough so that we're sure that the main thread will not wait after a result.get() call
|
||||||
# cpucount*2 should be enough to be sure that the spawned process will not wait after the results
|
# cpucount*2 should be enough to be sure that the spawned process will not wait after the results
|
||||||
# collection made by the main process.
|
# collection made by the main process.
|
||||||
RESULTS_QUEUE_LIMIT = multiprocessing.cpu_count() * 2
|
RESULTS_QUEUE_LIMIT = multiprocessing.cpu_count() * 2
|
||||||
|
|
||||||
def get_match(first,second,percentage):
|
def prepare_pictures(pictures, cached_blocks, j=job.nulljob):
|
||||||
if percentage < 0:
|
|
||||||
percentage = 0
|
|
||||||
return Match(first,second,percentage)
|
|
||||||
|
|
||||||
class MatchFactory(object):
|
|
||||||
cached_blocks = None
|
|
||||||
block_count_per_side = 15
|
|
||||||
threshold = 75
|
|
||||||
match_scaled = False
|
|
||||||
|
|
||||||
def _do_getmatches(self, files, j):
|
|
||||||
raise NotImplementedError()
|
|
||||||
|
|
||||||
def getmatches(self, files, j=job.nulljob):
|
|
||||||
# The MemoryError handlers in there use logging without first caring about whether or not
|
# The MemoryError handlers in there use logging without first caring about whether or not
|
||||||
# there is enough memory left to carry on the operation because it is assumed that the
|
# there is enough memory left to carry on the operation because it is assumed that the
|
||||||
# MemoryError happens when trying to read an image file, which is freed from memory by the
|
# MemoryError happens when trying to read an image file, which is freed from memory by the
|
||||||
# time that MemoryError is raised.
|
# time that MemoryError is raised.
|
||||||
j = j.start_subjob([3, 7])
|
prepared = [] # only pictures for which there was no error getting blocks
|
||||||
logging.info('Preparing %d files' % len(files))
|
|
||||||
prepared = self.prepare_files(files, j)
|
|
||||||
logging.info('Finished preparing %d files' % len(prepared))
|
|
||||||
return self._do_getmatches(prepared, j)
|
|
||||||
|
|
||||||
def prepare_files(self, files, j=job.nulljob):
|
|
||||||
prepared = [] # only files for which there was no error getting blocks
|
|
||||||
try:
|
try:
|
||||||
for picture in j.iter_with_progress(files, 'Analyzed %d/%d pictures'):
|
for picture in j.iter_with_progress(pictures, 'Analyzed %d/%d pictures'):
|
||||||
picture.dimensions
|
picture.dimensions
|
||||||
picture.unicode_path = unicode(picture.path)
|
picture.unicode_path = unicode(picture.path)
|
||||||
try:
|
try:
|
||||||
if picture.unicode_path not in self.cached_blocks:
|
if picture.unicode_path not in cached_blocks:
|
||||||
blocks = picture.get_blocks(self.block_count_per_side)
|
blocks = picture.get_blocks(BLOCK_COUNT_PER_SIDE)
|
||||||
self.cached_blocks[picture.unicode_path] = blocks
|
cached_blocks[picture.unicode_path] = blocks
|
||||||
prepared.append(picture)
|
prepared.append(picture)
|
||||||
except IOError as e:
|
except IOError as e:
|
||||||
logging.warning(unicode(e))
|
logging.warning(unicode(e))
|
||||||
@ -69,9 +49,13 @@ class MatchFactory(object):
|
|||||||
if picture.size < 10 * 1024 * 1024: # We're really running out of memory
|
if picture.size < 10 * 1024 * 1024: # We're really running out of memory
|
||||||
raise
|
raise
|
||||||
except MemoryError:
|
except MemoryError:
|
||||||
logging.warning('Ran out of memory while preparing files')
|
logging.warning('Ran out of memory while preparing pictures')
|
||||||
return prepared
|
return prepared
|
||||||
|
|
||||||
|
def get_match(first, second, percentage):
|
||||||
|
if percentage < 0:
|
||||||
|
percentage = 0
|
||||||
|
return Match(first, second, percentage)
|
||||||
|
|
||||||
def async_compare(ref_id, other_ids, dbname, threshold):
|
def async_compare(ref_id, other_ids, dbname, threshold):
|
||||||
cache = Cache(dbname, threaded=False)
|
cache = Cache(dbname, threaded=False)
|
||||||
@ -90,8 +74,7 @@ def async_compare(ref_id, other_ids, dbname, threshold):
|
|||||||
cache.con.close()
|
cache.con.close()
|
||||||
return results
|
return results
|
||||||
|
|
||||||
class AsyncMatchFactory(MatchFactory):
|
def getmatches(pictures, cached_blocks, threshold=75, match_scaled=False, j=job.nulljob):
|
||||||
def _do_getmatches(self, pictures, j):
|
|
||||||
def empty_out_queue(queue, into):
|
def empty_out_queue(queue, into):
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
@ -99,15 +82,17 @@ class AsyncMatchFactory(MatchFactory):
|
|||||||
except Empty:
|
except Empty:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
j = j.start_subjob([3, 7])
|
||||||
|
pictures = prepare_pictures(pictures, cached_blocks, j)
|
||||||
j = j.start_subjob([9, 1], 'Preparing for matching')
|
j = j.start_subjob([9, 1], 'Preparing for matching')
|
||||||
cache = self.cached_blocks
|
cache = cached_blocks
|
||||||
id2picture = {}
|
id2picture = {}
|
||||||
dimensions2pictures = defaultdict(set)
|
dimensions2pictures = defaultdict(set)
|
||||||
for picture in pictures:
|
for picture in pictures:
|
||||||
try:
|
try:
|
||||||
picture.cache_id = cache.get_id(picture.unicode_path)
|
picture.cache_id = cache.get_id(picture.unicode_path)
|
||||||
id2picture[picture.cache_id] = picture
|
id2picture[picture.cache_id] = picture
|
||||||
if not self.match_scaled:
|
if not match_scaled:
|
||||||
dimensions2pictures[picture.dimensions].add(picture)
|
dimensions2pictures[picture.dimensions].add(picture)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
@ -117,11 +102,11 @@ class AsyncMatchFactory(MatchFactory):
|
|||||||
matches = []
|
matches = []
|
||||||
pictures_copy = set(pictures)
|
pictures_copy = set(pictures)
|
||||||
for ref in j.iter_with_progress(pictures, 'Matched %d/%d pictures'):
|
for ref in j.iter_with_progress(pictures, 'Matched %d/%d pictures'):
|
||||||
others = pictures_copy if self.match_scaled else dimensions2pictures[ref.dimensions]
|
others = pictures_copy if match_scaled else dimensions2pictures[ref.dimensions]
|
||||||
others.remove(ref)
|
others.remove(ref)
|
||||||
if others:
|
if others:
|
||||||
cache_ids = [f.cache_id for f in others]
|
cache_ids = [f.cache_id for f in others]
|
||||||
args = (ref.cache_id, cache_ids, self.cached_blocks.dbname, self.threshold)
|
args = (ref.cache_id, cache_ids, cached_blocks.dbname, threshold)
|
||||||
async_results.append(pool.apply_async(async_compare, args))
|
async_results.append(pool.apply_async(async_compare, args))
|
||||||
if len(async_results) > RESULTS_QUEUE_LIMIT:
|
if len(async_results) > RESULTS_QUEUE_LIMIT:
|
||||||
result = async_results.pop(0)
|
result = async_results.pop(0)
|
||||||
@ -133,9 +118,8 @@ class AsyncMatchFactory(MatchFactory):
|
|||||||
other = id2picture[other_id]
|
other = id2picture[other_id]
|
||||||
if percentage == 100 and ref.md5 != other.md5:
|
if percentage == 100 and ref.md5 != other.md5:
|
||||||
percentage = 99
|
percentage = 99
|
||||||
if percentage >= self.threshold:
|
if percentage >= threshold:
|
||||||
result.append(get_match(ref, other, percentage))
|
result.append(get_match(ref, other, percentage))
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
multiprocessing.freeze_support()
|
multiprocessing.freeze_support()
|
22
pe/py/scanner.py
Normal file
22
pe/py/scanner.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Created By: Virgil Dupras
|
||||||
|
# Created On: 2009-10-18
|
||||||
|
# $Id$
|
||||||
|
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
|
||||||
|
#
|
||||||
|
# This software is licensed under the "HS" License as described in the "LICENSE" file,
|
||||||
|
# which should be included with this package. The terms are also available at
|
||||||
|
# http://www.hardcoded.net/licenses/hs_license
|
||||||
|
|
||||||
|
from dupeguru.scanner import Scanner
|
||||||
|
|
||||||
|
from . import matchbase
|
||||||
|
|
||||||
|
class ScannerPE(Scanner):
|
||||||
|
cached_blocks = None
|
||||||
|
match_scaled = False
|
||||||
|
threshold = 75
|
||||||
|
|
||||||
|
def _getmatches(self, files, j):
|
||||||
|
return matchbase.getmatches(files, self.cached_blocks, self.threshold, self.match_scaled, j)
|
||||||
|
|
10
pe/qt/app.py
10
pe/qt/app.py
@ -17,7 +17,7 @@ from hsutil.str import get_file_ext
|
|||||||
|
|
||||||
from dupeguru_pe import data as data_pe
|
from dupeguru_pe import data as data_pe
|
||||||
from dupeguru_pe.cache import Cache
|
from dupeguru_pe.cache import Cache
|
||||||
from dupeguru_pe.matchbase import AsyncMatchFactory
|
from dupeguru_pe.scanner import ScannerPE
|
||||||
|
|
||||||
from block import getblocks
|
from block import getblocks
|
||||||
from base.app import DupeGuru as DupeGuruBase
|
from base.app import DupeGuru as DupeGuruBase
|
||||||
@ -63,15 +63,15 @@ class DupeGuru(DupeGuruBase):
|
|||||||
DupeGuruBase.__init__(self, data_pe, appid=5)
|
DupeGuruBase.__init__(self, data_pe, appid=5)
|
||||||
|
|
||||||
def _setup(self):
|
def _setup(self):
|
||||||
self.scanner.match_factory = AsyncMatchFactory()
|
self.scanner = ScannerPE()
|
||||||
self.directories.dirclass = Directory
|
self.directories.dirclass = Directory
|
||||||
self.scanner.match_factory.cached_blocks = Cache(op.join(self.appdata, 'cached_pictures.db'))
|
self.scanner.cached_blocks = Cache(op.join(self.appdata, 'cached_pictures.db'))
|
||||||
DupeGuruBase._setup(self)
|
DupeGuruBase._setup(self)
|
||||||
|
|
||||||
def _update_options(self):
|
def _update_options(self):
|
||||||
DupeGuruBase._update_options(self)
|
DupeGuruBase._update_options(self)
|
||||||
self.scanner.match_factory.match_scaled = self.prefs.match_scaled
|
self.scanner.match_scaled = self.prefs.match_scaled
|
||||||
self.scanner.match_factory.threshold = self.prefs.filter_hardness
|
self.scanner.threshold = self.prefs.filter_hardness
|
||||||
|
|
||||||
def _create_details_dialog(self, parent):
|
def _create_details_dialog(self, parent):
|
||||||
return DetailsDialog(parent, self)
|
return DetailsDialog(parent, self)
|
||||||
|
@ -23,6 +23,6 @@ class MainWindow(MainWindowBase):
|
|||||||
title = "Clear Picture Cache"
|
title = "Clear Picture Cache"
|
||||||
msg = "Do you really want to remove all your cached picture analysis?"
|
msg = "Do you really want to remove all your cached picture analysis?"
|
||||||
if self._confirm(title, msg, QMessageBox.No):
|
if self._confirm(title, msg, QMessageBox.No):
|
||||||
self.app.scanner.match_factory.cached_blocks.clear()
|
self.app.scanner.cached_blocks.clear()
|
||||||
QMessageBox.information(self, title, "Picture cache cleared.")
|
QMessageBox.information(self, title, "Picture cache cleared.")
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user