1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2026-01-23 07:01:39 +00:00

Added tox configuration

... and fixed pep8 warnings. There's a lot of them that are still
ignored, but that's because it's too much of a step to take at once.
This commit is contained in:
Virgil Dupras
2014-10-13 15:08:59 -04:00
parent 24643a9b5d
commit 2166a0996c
46 changed files with 794 additions and 612 deletions

View File

@@ -1,17 +1,17 @@
# Created By: Virgil Dupras
# Created On: 2006/09/01
# Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/bsd_license
from ._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2
from ._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2 # NOQA
# Converted to C
# def getblock(image):
# """Returns a 3 sized tuple containing the mean color of 'image'.
#
#
# image: a PIL image or crop.
# """
# if image.size[0]:
@@ -28,7 +28,7 @@ from ._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2
# This is not used anymore
# def getblocks(image,blocksize):
# """Returns a list of blocks (3 sized tuples).
#
#
# image: A PIL image to base the blocks on.
# blocksize: The size of the blocks to be create. This is a single integer, defining
# both width and height (blocks are square).
@@ -46,7 +46,7 @@ from ._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2
# Converted to C
# def getblocks2(image,block_count_per_side):
# """Returns a list of blocks (3 sized tuples).
#
#
# image: A PIL image to base the blocks on.
# block_count_per_side: This integer determine the number of blocks the function will return.
# If it is 10, for example, 100 blocks will be returns (10 width, 10 height). The blocks will not
@@ -73,7 +73,7 @@ from ._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2
# Converted to C
# def diff(first, second):
# """Returns the difference between the first block and the second.
#
#
# It returns an absolute sum of the 3 differences (RGB).
# """
# r1, g1, b1 = first
@@ -83,7 +83,7 @@ from ._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2
# Converted to C
# def avgdiff(first, second, limit=768, min_iterations=1):
# """Returns the average diff between first blocks and seconds.
#
#
# If the result surpasses limit, limit + 1 is returned, except if less than min_iterations
# iterations have been made in the blocks.
# """
@@ -106,7 +106,7 @@ from ._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2
# This is not used anymore
# def maxdiff(first,second,limit=768):
# """Returns the max diff between first blocks and seconds.
#
#
# If the result surpasses limit, the first max being over limit is returned.
# """
# if len(first) != len(second):

View File

@@ -1,9 +1,9 @@
# Created By: Virgil Dupras
# Created On: 2006/09/14
# Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/bsd_license
import os
@@ -15,11 +15,11 @@ from ._cache import string_to_colors
def colors_to_string(colors):
"""Transform the 3 sized tuples 'colors' into a hex string.
[(0,100,255)] --> 0064ff
[(1,2,3),(4,5,6)] --> 010203040506
"""
return ''.join(['%02x%02x%02x' % (r,g,b) for r,g,b in colors])
return ''.join(['%02x%02x%02x' % (r, g, b) for r, g, b in colors])
# This function is an important bottleneck of dupeGuru PE. It has been converted to C.
# def string_to_colors(s):
@@ -38,18 +38,18 @@ class Cache:
self.dbname = db
self.con = None
self._create_con()
def __contains__(self, key):
sql = "select count(*) from pictures where path = ?"
result = self.con.execute(sql, [key]).fetchall()
return result[0][0] > 0
def __delitem__(self, key):
if key not in self:
raise KeyError(key)
sql = "delete from pictures where path = ?"
self.con.execute(sql, [key])
# Optimized
def __getitem__(self, key):
if isinstance(key, int):
@@ -62,17 +62,17 @@ class Cache:
return result
else:
raise KeyError(key)
def __iter__(self):
sql = "select path from pictures"
result = self.con.execute(sql)
return (row[0] for row in result)
def __len__(self):
sql = "select count(*) from pictures"
result = self.con.execute(sql).fetchall()
return result[0][0]
def __setitem__(self, path_str, blocks):
blocks = colors_to_string(blocks)
if op.exists(path_str):
@@ -89,15 +89,15 @@ class Cache:
logging.warning('Picture cache could not set value for key %r', path_str)
except sqlite.DatabaseError as e:
logging.warning('DatabaseError while setting value for key %r: %s', path_str, str(e))
def _create_con(self, second_try=False):
def create_tables():
logging.debug("Creating picture cache tables.")
self.con.execute("drop table if exists pictures");
self.con.execute("drop index if exists idx_path");
self.con.execute("create table pictures(path TEXT, mtime INTEGER, blocks TEXT)");
self.con.execute("drop table if exists pictures")
self.con.execute("drop index if exists idx_path")
self.con.execute("create table pictures(path TEXT, mtime INTEGER, blocks TEXT)")
self.con.execute("create index idx_path on pictures (path)")
self.con = sqlite.connect(self.dbname, isolation_level=None)
try:
self.con.execute("select path, mtime, blocks from pictures where 1=2")
@@ -110,23 +110,23 @@ class Cache:
self.con.close()
os.remove(self.dbname)
self._create_con(second_try=True)
def clear(self):
self.close()
if self.dbname != ':memory:':
os.remove(self.dbname)
self._create_con()
def close(self):
if self.con is not None:
self.con.close()
self.con = None
def filter(self, func):
to_delete = [key for key in self if not func(key)]
for key in to_delete:
del self[key]
def get_id(self, path):
sql = "select rowid from pictures where path = ?"
result = self.con.execute(sql, [path]).fetchone()
@@ -134,15 +134,15 @@ class Cache:
return result[0]
else:
raise ValueError(path)
def get_multiple(self, rowids):
sql = "select rowid, blocks from pictures where rowid in (%s)" % ','.join(map(str, rowids))
cur = self.con.execute(sql)
return ((rowid, string_to_colors(blocks)) for rowid, blocks in cur)
def purge_outdated(self):
"""Go through the cache and purge outdated records.
A record is outdated if the picture doesn't exist or if its mtime is greater than the one in
the db.
"""
@@ -159,4 +159,4 @@ class Cache:
if todelete:
sql = "delete from pictures where rowid in (%s)" % ','.join(map(str, todelete))
self.con.execute(sql)

View File

@@ -1,9 +1,9 @@
# Created By: Virgil Dupras
# Created On: 2011-04-20
# Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/bsd_license
# Heavily based on http://topo.math.u-psud.fr/~bousch/exifdump.py by Thierry Bousch (Public Domain)
@@ -181,7 +181,7 @@ class Fraction:
def __repr__(self):
return '%d/%d' % (self.num, self.den)
class TIFF_file:
def __init__(self, data):
@@ -201,14 +201,14 @@ class TIFF_file:
logging.debug(self.endian)
logging.debug("Slice for offset %d length %d: %r and value: %d", offset, length, slice, val)
return val
def first_IFD(self):
return self.s2n(4, 4)
def next_IFD(self, ifd):
entries = self.s2n(ifd, 2)
return self.s2n(ifd + 2 + 12 * entries, 4)
def list_IFDs(self):
i = self.first_IFD()
a = []
@@ -216,7 +216,7 @@ class TIFF_file:
a.append(i)
i = self.next_IFD(i)
return a
def dump_IFD(self, ifd):
entries = self.s2n(ifd, 2)
logging.debug("Entries for IFD %d: %d", ifd, entries)
@@ -230,7 +230,7 @@ class TIFF_file:
type = self.s2n(entry+2, 2)
if not 1 <= type <= 10:
continue # not handled
typelen = [ 1, 1, 2, 4, 8, 1, 1, 2, 4, 8 ] [type-1]
typelen = [1, 1, 2, 4, 8, 1, 1, 2, 4, 8][type-1]
count = self.s2n(entry+4, 4)
if count > MAX_COUNT:
logging.debug("Probably corrupt. Aborting.")
@@ -247,7 +247,7 @@ class TIFF_file:
for j in range(count):
if type in {5, 10}:
# The type is either 5 or 10
value_j = Fraction(self.s2n(offset, 4, signed),
value_j = Fraction(self.s2n(offset, 4, signed),
self.s2n(offset+4, 4, signed))
else:
# Not a fraction
@@ -255,7 +255,7 @@ class TIFF_file:
values.append(value_j)
offset = offset + typelen
# Now "values" is either a string or an array
a.append((tag,type,values))
a.append((tag, type, values))
return a
def read_exif_header(fp):
@@ -283,13 +283,13 @@ def get_fields(fp):
logging.debug("Exif header length: %d bytes", length)
data = fp.read(length-8)
data_format = data[0]
logging.debug("%s format", {INTEL_ENDIAN:'Intel', MOTOROLA_ENDIAN:'Motorola'}[data_format])
logging.debug("%s format", {INTEL_ENDIAN: 'Intel', MOTOROLA_ENDIAN: 'Motorola'}[data_format])
T = TIFF_file(data)
# There may be more than one IFD per file, but we only read the first one because others are
# most likely thumbnails.
main_IFD_offset = T.first_IFD()
result = {}
def add_tag_to_result(tag, values):
try:
stag = EXIF_TAGS[tag]
@@ -298,7 +298,7 @@ def get_fields(fp):
if stag in result:
return # don't overwrite data
result[stag] = values
logging.debug("IFD at offset %d", main_IFD_offset)
IFD = T.dump_IFD(main_IFD_offset)
exif_off = gps_off = 0

View File

@@ -93,8 +93,10 @@ def get_chunks(pictures):
chunk_count = max(min_chunk_count, chunk_count)
chunk_size = (len(pictures) // chunk_count) + 1
chunk_size = max(MIN_CHUNK_SIZE, chunk_size)
logging.info("Creating %d chunks with a chunk size of %d for %d pictures", chunk_count,
chunk_size, len(pictures))
logging.info(
"Creating %d chunks with a chunk size of %d for %d pictures", chunk_count,
chunk_size, len(pictures)
)
chunks = [pictures[i:i+chunk_size] for i in range(0, len(pictures), chunk_size)]
return chunks
@@ -142,7 +144,7 @@ def getmatches(pictures, cache_path, threshold=75, match_scaled=False, j=job.nul
def collect_results(collect_all=False):
# collect results and wait until the queue is small enough to accomodate a new results.
nonlocal async_results, matches, comparison_count
nonlocal async_results, matches, comparison_count, comparisons_to_do
limit = 0 if collect_all else RESULTS_QUEUE_LIMIT
while len(async_results) > limit:
ready, working = extract(lambda r: r.ready(), async_results)
@@ -150,7 +152,8 @@ def getmatches(pictures, cache_path, threshold=75, match_scaled=False, j=job.nul
matches += result.get()
async_results.remove(result)
comparison_count += 1
progress_msg = tr("Performed %d/%d chunk matches") % (comparison_count, len(comparisons_to_do))
# About the NOQA below: I think there's a bug in pyflakes. To investigate...
progress_msg = tr("Performed %d/%d chunk matches") % (comparison_count, len(comparisons_to_do)) # NOQA
j.set_progress(comparison_count, progress_msg)
j = j.start_subjob([3, 7])

View File

@@ -1,9 +1,9 @@
# Created By: Virgil Dupras
# Created On: 2011-04-20
# Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/bsd_license
from collections import defaultdict
@@ -27,4 +27,5 @@ def getmatches(files, match_scaled, j):
if (not match_scaled) and (p1.dimensions != p2.dimensions):
continue
matches.append(Match(p1, p2, 100))
return matches
return matches

View File

@@ -1,9 +1,9 @@
# Created By: Virgil Dupras
# Created On: 2011-05-29
# Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/bsd_license
import logging
@@ -23,20 +23,20 @@ def get_delta_dimensions(value, ref_value):
class Photo(fs.File):
INITIAL_INFO = fs.File.INITIAL_INFO.copy()
INITIAL_INFO.update({
'dimensions': (0,0),
'dimensions': (0, 0),
'exif_timestamp': '',
})
__slots__ = fs.File.__slots__ + tuple(INITIAL_INFO.keys())
# These extensions are supported on all platforms
HANDLED_EXTS = {'png', 'jpg', 'jpeg', 'gif', 'bmp', 'tiff', 'tif'}
def _plat_get_dimensions(self):
raise NotImplementedError()
def _plat_get_blocks(self, block_count_per_side, orientation):
raise NotImplementedError()
def _get_orientation(self):
if not hasattr(self, '_cached_orientation'):
try:
@@ -48,7 +48,7 @@ class Photo(fs.File):
except Exception: # Couldn't read EXIF data, no transforms
self._cached_orientation = 0
return self._cached_orientation
def _get_exif_timestamp(self):
try:
with self.path.open('rb') as fp:
@@ -57,11 +57,11 @@ class Photo(fs.File):
except Exception:
logging.info("Couldn't read EXIF of picture: %s", self.path)
return ''
@classmethod
def can_handle(cls, path):
return fs.File.can_handle(path) and get_file_ext(path.name) in cls.HANDLED_EXTS
def get_display_info(self, group, delta):
size = self.size
mtime = self.mtime
@@ -90,7 +90,7 @@ class Photo(fs.File):
'percentage': format_perc(percentage),
'dupe_count': format_dupe_count(dupe_count),
}
def _read_info(self, field):
fs.File._read_info(self, field)
if field == 'dimensions':
@@ -99,7 +99,7 @@ class Photo(fs.File):
self.dimensions = (self.dimensions[1], self.dimensions[0])
elif field == 'exif_timestamp':
self.exif_timestamp = self._get_exif_timestamp()
def get_blocks(self, block_count_per_side):
return self._plat_get_blocks(block_count_per_side, self._get_orientation())

View File

@@ -1,27 +1,31 @@
# Created On: 2011/09/16
# Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/bsd_license
from hscommon.trans import trget
from core.prioritize import (KindCategory, FolderCategory, FilenameCategory, NumericalCategory,
SizeCategory, MtimeCategory)
from core.prioritize import (
KindCategory, FolderCategory, FilenameCategory, NumericalCategory,
SizeCategory, MtimeCategory
)
coltr = trget('columns')
class DimensionsCategory(NumericalCategory):
NAME = coltr("Dimensions")
def extract_value(self, dupe):
return dupe.dimensions
def invert_numerical_value(self, value):
width, height = value
return (-width, -height)
def all_categories():
return [KindCategory, FolderCategory, FilenameCategory, SizeCategory, DimensionsCategory,
MtimeCategory]
return [
KindCategory, FolderCategory, FilenameCategory, SizeCategory, DimensionsCategory,
MtimeCategory
]