mirror of
https://github.com/arsenetar/dupeguru.git
synced 2026-01-23 07:01:39 +00:00
Added tox configuration
... and fixed pep8 warnings. There's a lot of them that are still ignored, but that's because it's too much of a step to take at once.
This commit is contained in:
@@ -1,17 +1,17 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2006/09/01
|
||||
# Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.hardcoded.net/licenses/bsd_license
|
||||
|
||||
from ._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2
|
||||
from ._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2 # NOQA
|
||||
|
||||
# Converted to C
|
||||
# def getblock(image):
|
||||
# """Returns a 3 sized tuple containing the mean color of 'image'.
|
||||
#
|
||||
#
|
||||
# image: a PIL image or crop.
|
||||
# """
|
||||
# if image.size[0]:
|
||||
@@ -28,7 +28,7 @@ from ._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2
|
||||
# This is not used anymore
|
||||
# def getblocks(image,blocksize):
|
||||
# """Returns a list of blocks (3 sized tuples).
|
||||
#
|
||||
#
|
||||
# image: A PIL image to base the blocks on.
|
||||
# blocksize: The size of the blocks to be create. This is a single integer, defining
|
||||
# both width and height (blocks are square).
|
||||
@@ -46,7 +46,7 @@ from ._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2
|
||||
# Converted to C
|
||||
# def getblocks2(image,block_count_per_side):
|
||||
# """Returns a list of blocks (3 sized tuples).
|
||||
#
|
||||
#
|
||||
# image: A PIL image to base the blocks on.
|
||||
# block_count_per_side: This integer determine the number of blocks the function will return.
|
||||
# If it is 10, for example, 100 blocks will be returns (10 width, 10 height). The blocks will not
|
||||
@@ -73,7 +73,7 @@ from ._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2
|
||||
# Converted to C
|
||||
# def diff(first, second):
|
||||
# """Returns the difference between the first block and the second.
|
||||
#
|
||||
#
|
||||
# It returns an absolute sum of the 3 differences (RGB).
|
||||
# """
|
||||
# r1, g1, b1 = first
|
||||
@@ -83,7 +83,7 @@ from ._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2
|
||||
# Converted to C
|
||||
# def avgdiff(first, second, limit=768, min_iterations=1):
|
||||
# """Returns the average diff between first blocks and seconds.
|
||||
#
|
||||
#
|
||||
# If the result surpasses limit, limit + 1 is returned, except if less than min_iterations
|
||||
# iterations have been made in the blocks.
|
||||
# """
|
||||
@@ -106,7 +106,7 @@ from ._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2
|
||||
# This is not used anymore
|
||||
# def maxdiff(first,second,limit=768):
|
||||
# """Returns the max diff between first blocks and seconds.
|
||||
#
|
||||
#
|
||||
# If the result surpasses limit, the first max being over limit is returned.
|
||||
# """
|
||||
# if len(first) != len(second):
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2006/09/14
|
||||
# Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.hardcoded.net/licenses/bsd_license
|
||||
|
||||
import os
|
||||
@@ -15,11 +15,11 @@ from ._cache import string_to_colors
|
||||
|
||||
def colors_to_string(colors):
|
||||
"""Transform the 3 sized tuples 'colors' into a hex string.
|
||||
|
||||
|
||||
[(0,100,255)] --> 0064ff
|
||||
[(1,2,3),(4,5,6)] --> 010203040506
|
||||
"""
|
||||
return ''.join(['%02x%02x%02x' % (r,g,b) for r,g,b in colors])
|
||||
return ''.join(['%02x%02x%02x' % (r, g, b) for r, g, b in colors])
|
||||
|
||||
# This function is an important bottleneck of dupeGuru PE. It has been converted to C.
|
||||
# def string_to_colors(s):
|
||||
@@ -38,18 +38,18 @@ class Cache:
|
||||
self.dbname = db
|
||||
self.con = None
|
||||
self._create_con()
|
||||
|
||||
|
||||
def __contains__(self, key):
|
||||
sql = "select count(*) from pictures where path = ?"
|
||||
result = self.con.execute(sql, [key]).fetchall()
|
||||
return result[0][0] > 0
|
||||
|
||||
|
||||
def __delitem__(self, key):
|
||||
if key not in self:
|
||||
raise KeyError(key)
|
||||
sql = "delete from pictures where path = ?"
|
||||
self.con.execute(sql, [key])
|
||||
|
||||
|
||||
# Optimized
|
||||
def __getitem__(self, key):
|
||||
if isinstance(key, int):
|
||||
@@ -62,17 +62,17 @@ class Cache:
|
||||
return result
|
||||
else:
|
||||
raise KeyError(key)
|
||||
|
||||
|
||||
def __iter__(self):
|
||||
sql = "select path from pictures"
|
||||
result = self.con.execute(sql)
|
||||
return (row[0] for row in result)
|
||||
|
||||
|
||||
def __len__(self):
|
||||
sql = "select count(*) from pictures"
|
||||
result = self.con.execute(sql).fetchall()
|
||||
return result[0][0]
|
||||
|
||||
|
||||
def __setitem__(self, path_str, blocks):
|
||||
blocks = colors_to_string(blocks)
|
||||
if op.exists(path_str):
|
||||
@@ -89,15 +89,15 @@ class Cache:
|
||||
logging.warning('Picture cache could not set value for key %r', path_str)
|
||||
except sqlite.DatabaseError as e:
|
||||
logging.warning('DatabaseError while setting value for key %r: %s', path_str, str(e))
|
||||
|
||||
|
||||
def _create_con(self, second_try=False):
|
||||
def create_tables():
|
||||
logging.debug("Creating picture cache tables.")
|
||||
self.con.execute("drop table if exists pictures");
|
||||
self.con.execute("drop index if exists idx_path");
|
||||
self.con.execute("create table pictures(path TEXT, mtime INTEGER, blocks TEXT)");
|
||||
self.con.execute("drop table if exists pictures")
|
||||
self.con.execute("drop index if exists idx_path")
|
||||
self.con.execute("create table pictures(path TEXT, mtime INTEGER, blocks TEXT)")
|
||||
self.con.execute("create index idx_path on pictures (path)")
|
||||
|
||||
|
||||
self.con = sqlite.connect(self.dbname, isolation_level=None)
|
||||
try:
|
||||
self.con.execute("select path, mtime, blocks from pictures where 1=2")
|
||||
@@ -110,23 +110,23 @@ class Cache:
|
||||
self.con.close()
|
||||
os.remove(self.dbname)
|
||||
self._create_con(second_try=True)
|
||||
|
||||
|
||||
def clear(self):
|
||||
self.close()
|
||||
if self.dbname != ':memory:':
|
||||
os.remove(self.dbname)
|
||||
self._create_con()
|
||||
|
||||
|
||||
def close(self):
|
||||
if self.con is not None:
|
||||
self.con.close()
|
||||
self.con = None
|
||||
|
||||
|
||||
def filter(self, func):
|
||||
to_delete = [key for key in self if not func(key)]
|
||||
for key in to_delete:
|
||||
del self[key]
|
||||
|
||||
|
||||
def get_id(self, path):
|
||||
sql = "select rowid from pictures where path = ?"
|
||||
result = self.con.execute(sql, [path]).fetchone()
|
||||
@@ -134,15 +134,15 @@ class Cache:
|
||||
return result[0]
|
||||
else:
|
||||
raise ValueError(path)
|
||||
|
||||
|
||||
def get_multiple(self, rowids):
|
||||
sql = "select rowid, blocks from pictures where rowid in (%s)" % ','.join(map(str, rowids))
|
||||
cur = self.con.execute(sql)
|
||||
return ((rowid, string_to_colors(blocks)) for rowid, blocks in cur)
|
||||
|
||||
|
||||
def purge_outdated(self):
|
||||
"""Go through the cache and purge outdated records.
|
||||
|
||||
|
||||
A record is outdated if the picture doesn't exist or if its mtime is greater than the one in
|
||||
the db.
|
||||
"""
|
||||
@@ -159,4 +159,4 @@ class Cache:
|
||||
if todelete:
|
||||
sql = "delete from pictures where rowid in (%s)" % ','.join(map(str, todelete))
|
||||
self.con.execute(sql)
|
||||
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2011-04-20
|
||||
# Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.hardcoded.net/licenses/bsd_license
|
||||
|
||||
# Heavily based on http://topo.math.u-psud.fr/~bousch/exifdump.py by Thierry Bousch (Public Domain)
|
||||
@@ -181,7 +181,7 @@ class Fraction:
|
||||
|
||||
def __repr__(self):
|
||||
return '%d/%d' % (self.num, self.den)
|
||||
|
||||
|
||||
|
||||
class TIFF_file:
|
||||
def __init__(self, data):
|
||||
@@ -201,14 +201,14 @@ class TIFF_file:
|
||||
logging.debug(self.endian)
|
||||
logging.debug("Slice for offset %d length %d: %r and value: %d", offset, length, slice, val)
|
||||
return val
|
||||
|
||||
|
||||
def first_IFD(self):
|
||||
return self.s2n(4, 4)
|
||||
|
||||
|
||||
def next_IFD(self, ifd):
|
||||
entries = self.s2n(ifd, 2)
|
||||
return self.s2n(ifd + 2 + 12 * entries, 4)
|
||||
|
||||
|
||||
def list_IFDs(self):
|
||||
i = self.first_IFD()
|
||||
a = []
|
||||
@@ -216,7 +216,7 @@ class TIFF_file:
|
||||
a.append(i)
|
||||
i = self.next_IFD(i)
|
||||
return a
|
||||
|
||||
|
||||
def dump_IFD(self, ifd):
|
||||
entries = self.s2n(ifd, 2)
|
||||
logging.debug("Entries for IFD %d: %d", ifd, entries)
|
||||
@@ -230,7 +230,7 @@ class TIFF_file:
|
||||
type = self.s2n(entry+2, 2)
|
||||
if not 1 <= type <= 10:
|
||||
continue # not handled
|
||||
typelen = [ 1, 1, 2, 4, 8, 1, 1, 2, 4, 8 ] [type-1]
|
||||
typelen = [1, 1, 2, 4, 8, 1, 1, 2, 4, 8][type-1]
|
||||
count = self.s2n(entry+4, 4)
|
||||
if count > MAX_COUNT:
|
||||
logging.debug("Probably corrupt. Aborting.")
|
||||
@@ -247,7 +247,7 @@ class TIFF_file:
|
||||
for j in range(count):
|
||||
if type in {5, 10}:
|
||||
# The type is either 5 or 10
|
||||
value_j = Fraction(self.s2n(offset, 4, signed),
|
||||
value_j = Fraction(self.s2n(offset, 4, signed),
|
||||
self.s2n(offset+4, 4, signed))
|
||||
else:
|
||||
# Not a fraction
|
||||
@@ -255,7 +255,7 @@ class TIFF_file:
|
||||
values.append(value_j)
|
||||
offset = offset + typelen
|
||||
# Now "values" is either a string or an array
|
||||
a.append((tag,type,values))
|
||||
a.append((tag, type, values))
|
||||
return a
|
||||
|
||||
def read_exif_header(fp):
|
||||
@@ -283,13 +283,13 @@ def get_fields(fp):
|
||||
logging.debug("Exif header length: %d bytes", length)
|
||||
data = fp.read(length-8)
|
||||
data_format = data[0]
|
||||
logging.debug("%s format", {INTEL_ENDIAN:'Intel', MOTOROLA_ENDIAN:'Motorola'}[data_format])
|
||||
logging.debug("%s format", {INTEL_ENDIAN: 'Intel', MOTOROLA_ENDIAN: 'Motorola'}[data_format])
|
||||
T = TIFF_file(data)
|
||||
# There may be more than one IFD per file, but we only read the first one because others are
|
||||
# most likely thumbnails.
|
||||
main_IFD_offset = T.first_IFD()
|
||||
result = {}
|
||||
|
||||
|
||||
def add_tag_to_result(tag, values):
|
||||
try:
|
||||
stag = EXIF_TAGS[tag]
|
||||
@@ -298,7 +298,7 @@ def get_fields(fp):
|
||||
if stag in result:
|
||||
return # don't overwrite data
|
||||
result[stag] = values
|
||||
|
||||
|
||||
logging.debug("IFD at offset %d", main_IFD_offset)
|
||||
IFD = T.dump_IFD(main_IFD_offset)
|
||||
exif_off = gps_off = 0
|
||||
|
||||
@@ -93,8 +93,10 @@ def get_chunks(pictures):
|
||||
chunk_count = max(min_chunk_count, chunk_count)
|
||||
chunk_size = (len(pictures) // chunk_count) + 1
|
||||
chunk_size = max(MIN_CHUNK_SIZE, chunk_size)
|
||||
logging.info("Creating %d chunks with a chunk size of %d for %d pictures", chunk_count,
|
||||
chunk_size, len(pictures))
|
||||
logging.info(
|
||||
"Creating %d chunks with a chunk size of %d for %d pictures", chunk_count,
|
||||
chunk_size, len(pictures)
|
||||
)
|
||||
chunks = [pictures[i:i+chunk_size] for i in range(0, len(pictures), chunk_size)]
|
||||
return chunks
|
||||
|
||||
@@ -142,7 +144,7 @@ def getmatches(pictures, cache_path, threshold=75, match_scaled=False, j=job.nul
|
||||
|
||||
def collect_results(collect_all=False):
|
||||
# collect results and wait until the queue is small enough to accomodate a new results.
|
||||
nonlocal async_results, matches, comparison_count
|
||||
nonlocal async_results, matches, comparison_count, comparisons_to_do
|
||||
limit = 0 if collect_all else RESULTS_QUEUE_LIMIT
|
||||
while len(async_results) > limit:
|
||||
ready, working = extract(lambda r: r.ready(), async_results)
|
||||
@@ -150,7 +152,8 @@ def getmatches(pictures, cache_path, threshold=75, match_scaled=False, j=job.nul
|
||||
matches += result.get()
|
||||
async_results.remove(result)
|
||||
comparison_count += 1
|
||||
progress_msg = tr("Performed %d/%d chunk matches") % (comparison_count, len(comparisons_to_do))
|
||||
# About the NOQA below: I think there's a bug in pyflakes. To investigate...
|
||||
progress_msg = tr("Performed %d/%d chunk matches") % (comparison_count, len(comparisons_to_do)) # NOQA
|
||||
j.set_progress(comparison_count, progress_msg)
|
||||
|
||||
j = j.start_subjob([3, 7])
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2011-04-20
|
||||
# Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.hardcoded.net/licenses/bsd_license
|
||||
|
||||
from collections import defaultdict
|
||||
@@ -27,4 +27,5 @@ def getmatches(files, match_scaled, j):
|
||||
if (not match_scaled) and (p1.dimensions != p2.dimensions):
|
||||
continue
|
||||
matches.append(Match(p1, p2, 100))
|
||||
return matches
|
||||
return matches
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2011-05-29
|
||||
# Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.hardcoded.net/licenses/bsd_license
|
||||
|
||||
import logging
|
||||
@@ -23,20 +23,20 @@ def get_delta_dimensions(value, ref_value):
|
||||
class Photo(fs.File):
|
||||
INITIAL_INFO = fs.File.INITIAL_INFO.copy()
|
||||
INITIAL_INFO.update({
|
||||
'dimensions': (0,0),
|
||||
'dimensions': (0, 0),
|
||||
'exif_timestamp': '',
|
||||
})
|
||||
__slots__ = fs.File.__slots__ + tuple(INITIAL_INFO.keys())
|
||||
|
||||
|
||||
# These extensions are supported on all platforms
|
||||
HANDLED_EXTS = {'png', 'jpg', 'jpeg', 'gif', 'bmp', 'tiff', 'tif'}
|
||||
|
||||
|
||||
def _plat_get_dimensions(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def _plat_get_blocks(self, block_count_per_side, orientation):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def _get_orientation(self):
|
||||
if not hasattr(self, '_cached_orientation'):
|
||||
try:
|
||||
@@ -48,7 +48,7 @@ class Photo(fs.File):
|
||||
except Exception: # Couldn't read EXIF data, no transforms
|
||||
self._cached_orientation = 0
|
||||
return self._cached_orientation
|
||||
|
||||
|
||||
def _get_exif_timestamp(self):
|
||||
try:
|
||||
with self.path.open('rb') as fp:
|
||||
@@ -57,11 +57,11 @@ class Photo(fs.File):
|
||||
except Exception:
|
||||
logging.info("Couldn't read EXIF of picture: %s", self.path)
|
||||
return ''
|
||||
|
||||
|
||||
@classmethod
|
||||
def can_handle(cls, path):
|
||||
return fs.File.can_handle(path) and get_file_ext(path.name) in cls.HANDLED_EXTS
|
||||
|
||||
|
||||
def get_display_info(self, group, delta):
|
||||
size = self.size
|
||||
mtime = self.mtime
|
||||
@@ -90,7 +90,7 @@ class Photo(fs.File):
|
||||
'percentage': format_perc(percentage),
|
||||
'dupe_count': format_dupe_count(dupe_count),
|
||||
}
|
||||
|
||||
|
||||
def _read_info(self, field):
|
||||
fs.File._read_info(self, field)
|
||||
if field == 'dimensions':
|
||||
@@ -99,7 +99,7 @@ class Photo(fs.File):
|
||||
self.dimensions = (self.dimensions[1], self.dimensions[0])
|
||||
elif field == 'exif_timestamp':
|
||||
self.exif_timestamp = self._get_exif_timestamp()
|
||||
|
||||
|
||||
def get_blocks(self, block_count_per_side):
|
||||
return self._plat_get_blocks(block_count_per_side, self._get_orientation())
|
||||
|
||||
|
||||
|
||||
@@ -1,27 +1,31 @@
|
||||
# Created On: 2011/09/16
|
||||
# Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.hardcoded.net/licenses/bsd_license
|
||||
|
||||
from hscommon.trans import trget
|
||||
|
||||
from core.prioritize import (KindCategory, FolderCategory, FilenameCategory, NumericalCategory,
|
||||
SizeCategory, MtimeCategory)
|
||||
from core.prioritize import (
|
||||
KindCategory, FolderCategory, FilenameCategory, NumericalCategory,
|
||||
SizeCategory, MtimeCategory
|
||||
)
|
||||
|
||||
coltr = trget('columns')
|
||||
|
||||
class DimensionsCategory(NumericalCategory):
|
||||
NAME = coltr("Dimensions")
|
||||
|
||||
|
||||
def extract_value(self, dupe):
|
||||
return dupe.dimensions
|
||||
|
||||
|
||||
def invert_numerical_value(self, value):
|
||||
width, height = value
|
||||
return (-width, -height)
|
||||
|
||||
def all_categories():
|
||||
return [KindCategory, FolderCategory, FilenameCategory, SizeCategory, DimensionsCategory,
|
||||
MtimeCategory]
|
||||
return [
|
||||
KindCategory, FolderCategory, FilenameCategory, SizeCategory, DimensionsCategory,
|
||||
MtimeCategory
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user