mirror of
https://github.com/arsenetar/dupeguru.git
synced 2026-01-22 14:41:39 +00:00
Format files with black
- Format all files with black - Update tox.ini flake8 arguments to be compatible - Add black to requirements-extra.txt - Reduce ignored flake8 rules and fix a few violations
This commit is contained in:
@@ -1 +1,12 @@
|
||||
from . import block, cache, exif, iphoto_plist, matchblock, matchexif, photo, prioritize, result_table, scanner # noqa
|
||||
from . import ( # noqa
|
||||
block,
|
||||
cache,
|
||||
exif,
|
||||
iphoto_plist,
|
||||
matchblock,
|
||||
matchexif,
|
||||
photo,
|
||||
prioritize,
|
||||
result_table,
|
||||
scanner,
|
||||
)
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from ._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2 # NOQA
|
||||
from ._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2 # NOQA
|
||||
|
||||
# Converted to C
|
||||
# def getblock(image):
|
||||
|
||||
@@ -4,7 +4,8 @@
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from ._cache import string_to_colors # noqa
|
||||
from ._cache import string_to_colors # noqa
|
||||
|
||||
|
||||
def colors_to_string(colors):
|
||||
"""Transform the 3 sized tuples 'colors' into a hex string.
|
||||
@@ -12,7 +13,8 @@ def colors_to_string(colors):
|
||||
[(0,100,255)] --> 0064ff
|
||||
[(1,2,3),(4,5,6)] --> 010203040506
|
||||
"""
|
||||
return ''.join('%02x%02x%02x' % (r, g, b) for r, g, b in colors)
|
||||
return "".join("%02x%02x%02x" % (r, g, b) for r, g, b in colors)
|
||||
|
||||
|
||||
# This function is an important bottleneck of dupeGuru PE. It has been converted to C.
|
||||
# def string_to_colors(s):
|
||||
@@ -23,4 +25,3 @@ def colors_to_string(colors):
|
||||
# number = int(s[i:i+6], 16)
|
||||
# result.append((number >> 16, (number >> 8) & 0xff, number & 0xff))
|
||||
# return result
|
||||
|
||||
|
||||
@@ -12,29 +12,36 @@ from collections import namedtuple
|
||||
|
||||
from .cache import string_to_colors, colors_to_string
|
||||
|
||||
|
||||
def wrap_path(path):
|
||||
return 'path:{}'.format(path)
|
||||
return "path:{}".format(path)
|
||||
|
||||
|
||||
def unwrap_path(key):
|
||||
return key[5:]
|
||||
|
||||
|
||||
def wrap_id(path):
|
||||
return 'id:{}'.format(path)
|
||||
return "id:{}".format(path)
|
||||
|
||||
|
||||
def unwrap_id(key):
|
||||
return int(key[3:])
|
||||
|
||||
CacheRow = namedtuple('CacheRow', 'id path blocks mtime')
|
||||
|
||||
CacheRow = namedtuple("CacheRow", "id path blocks mtime")
|
||||
|
||||
|
||||
class ShelveCache:
|
||||
"""A class to cache picture blocks in a shelve backend.
|
||||
"""
|
||||
|
||||
def __init__(self, db=None, readonly=False):
|
||||
self.istmp = db is None
|
||||
if self.istmp:
|
||||
self.dtmp = tempfile.mkdtemp()
|
||||
self.ftmp = db = op.join(self.dtmp, 'tmpdb')
|
||||
flag = 'r' if readonly else 'c'
|
||||
self.ftmp = db = op.join(self.dtmp, "tmpdb")
|
||||
flag = "r" if readonly else "c"
|
||||
self.shelve = shelve.open(db, flag)
|
||||
self.maxid = self._compute_maxid()
|
||||
|
||||
@@ -54,10 +61,10 @@ class ShelveCache:
|
||||
return string_to_colors(self.shelve[skey].blocks)
|
||||
|
||||
def __iter__(self):
|
||||
return (unwrap_path(k) for k in self.shelve if k.startswith('path:'))
|
||||
return (unwrap_path(k) for k in self.shelve if k.startswith("path:"))
|
||||
|
||||
def __len__(self):
|
||||
return sum(1 for k in self.shelve if k.startswith('path:'))
|
||||
return sum(1 for k in self.shelve if k.startswith("path:"))
|
||||
|
||||
def __setitem__(self, path_str, blocks):
|
||||
blocks = colors_to_string(blocks)
|
||||
@@ -74,7 +81,9 @@ class ShelveCache:
|
||||
self.shelve[wrap_id(rowid)] = wrap_path(path_str)
|
||||
|
||||
def _compute_maxid(self):
|
||||
return max((unwrap_id(k) for k in self.shelve if k.startswith('id:')), default=1)
|
||||
return max(
|
||||
(unwrap_id(k) for k in self.shelve if k.startswith("id:")), default=1
|
||||
)
|
||||
|
||||
def _get_new_id(self):
|
||||
self.maxid += 1
|
||||
@@ -133,4 +142,3 @@ class ShelveCache:
|
||||
# #402 and #439. I don't think it hurts to silently ignore the error, so that's
|
||||
# what we do
|
||||
pass
|
||||
|
||||
|
||||
@@ -11,10 +11,12 @@ import sqlite3 as sqlite
|
||||
|
||||
from .cache import string_to_colors, colors_to_string
|
||||
|
||||
|
||||
class SqliteCache:
|
||||
"""A class to cache picture blocks in a sqlite backend.
|
||||
"""
|
||||
def __init__(self, db=':memory:', readonly=False):
|
||||
|
||||
def __init__(self, db=":memory:", readonly=False):
|
||||
# readonly is not used in the sqlite version of the cache
|
||||
self.dbname = db
|
||||
self.con = None
|
||||
@@ -67,34 +69,40 @@ class SqliteCache:
|
||||
try:
|
||||
self.con.execute(sql, [blocks, mtime, path_str])
|
||||
except sqlite.OperationalError:
|
||||
logging.warning('Picture cache could not set value for key %r', path_str)
|
||||
logging.warning("Picture cache could not set value for key %r", path_str)
|
||||
except sqlite.DatabaseError as e:
|
||||
logging.warning('DatabaseError while setting value for key %r: %s', path_str, str(e))
|
||||
logging.warning(
|
||||
"DatabaseError while setting value for key %r: %s", path_str, str(e)
|
||||
)
|
||||
|
||||
def _create_con(self, second_try=False):
|
||||
def create_tables():
|
||||
logging.debug("Creating picture cache tables.")
|
||||
self.con.execute("drop table if exists pictures")
|
||||
self.con.execute("drop index if exists idx_path")
|
||||
self.con.execute("create table pictures(path TEXT, mtime INTEGER, blocks TEXT)")
|
||||
self.con.execute(
|
||||
"create table pictures(path TEXT, mtime INTEGER, blocks TEXT)"
|
||||
)
|
||||
self.con.execute("create index idx_path on pictures (path)")
|
||||
|
||||
self.con = sqlite.connect(self.dbname, isolation_level=None)
|
||||
try:
|
||||
self.con.execute("select path, mtime, blocks from pictures where 1=2")
|
||||
except sqlite.OperationalError: # new db
|
||||
except sqlite.OperationalError: # new db
|
||||
create_tables()
|
||||
except sqlite.DatabaseError as e: # corrupted db
|
||||
except sqlite.DatabaseError as e: # corrupted db
|
||||
if second_try:
|
||||
raise # Something really strange is happening
|
||||
logging.warning('Could not create picture cache because of an error: %s', str(e))
|
||||
raise # Something really strange is happening
|
||||
logging.warning(
|
||||
"Could not create picture cache because of an error: %s", str(e)
|
||||
)
|
||||
self.con.close()
|
||||
os.remove(self.dbname)
|
||||
self._create_con(second_try=True)
|
||||
|
||||
def clear(self):
|
||||
self.close()
|
||||
if self.dbname != ':memory:':
|
||||
if self.dbname != ":memory:":
|
||||
os.remove(self.dbname)
|
||||
self._create_con()
|
||||
|
||||
@@ -117,7 +125,9 @@ class SqliteCache:
|
||||
raise ValueError(path)
|
||||
|
||||
def get_multiple(self, rowids):
|
||||
sql = "select rowid, blocks from pictures where rowid in (%s)" % ','.join(map(str, rowids))
|
||||
sql = "select rowid, blocks from pictures where rowid in (%s)" % ",".join(
|
||||
map(str, rowids)
|
||||
)
|
||||
cur = self.con.execute(sql)
|
||||
return ((rowid, string_to_colors(blocks)) for rowid, blocks in cur)
|
||||
|
||||
@@ -138,6 +148,7 @@ class SqliteCache:
|
||||
continue
|
||||
todelete.append(rowid)
|
||||
if todelete:
|
||||
sql = "delete from pictures where rowid in (%s)" % ','.join(map(str, todelete))
|
||||
sql = "delete from pictures where rowid in (%s)" % ",".join(
|
||||
map(str, todelete)
|
||||
)
|
||||
self.con.execute(sql)
|
||||
|
||||
|
||||
@@ -83,17 +83,17 @@ EXIF_TAGS = {
|
||||
0xA003: "PixelYDimension",
|
||||
0xA004: "RelatedSoundFile",
|
||||
0xA005: "InteroperabilityIFDPointer",
|
||||
0xA20B: "FlashEnergy", # 0x920B in TIFF/EP
|
||||
0xA20C: "SpatialFrequencyResponse", # 0x920C - -
|
||||
0xA20E: "FocalPlaneXResolution", # 0x920E - -
|
||||
0xA20F: "FocalPlaneYResolution", # 0x920F - -
|
||||
0xA210: "FocalPlaneResolutionUnit", # 0x9210 - -
|
||||
0xA214: "SubjectLocation", # 0x9214 - -
|
||||
0xA215: "ExposureIndex", # 0x9215 - -
|
||||
0xA217: "SensingMethod", # 0x9217 - -
|
||||
0xA20B: "FlashEnergy", # 0x920B in TIFF/EP
|
||||
0xA20C: "SpatialFrequencyResponse", # 0x920C - -
|
||||
0xA20E: "FocalPlaneXResolution", # 0x920E - -
|
||||
0xA20F: "FocalPlaneYResolution", # 0x920F - -
|
||||
0xA210: "FocalPlaneResolutionUnit", # 0x9210 - -
|
||||
0xA214: "SubjectLocation", # 0x9214 - -
|
||||
0xA215: "ExposureIndex", # 0x9215 - -
|
||||
0xA217: "SensingMethod", # 0x9217 - -
|
||||
0xA300: "FileSource",
|
||||
0xA301: "SceneType",
|
||||
0xA302: "CFAPattern", # 0x828E in TIFF/EP
|
||||
0xA302: "CFAPattern", # 0x828E in TIFF/EP
|
||||
0xA401: "CustomRendered",
|
||||
0xA402: "ExposureMode",
|
||||
0xA403: "WhiteBalance",
|
||||
@@ -148,17 +148,18 @@ GPS_TA0GS = {
|
||||
0x1B: "GPSProcessingMethod",
|
||||
0x1C: "GPSAreaInformation",
|
||||
0x1D: "GPSDateStamp",
|
||||
0x1E: "GPSDifferential"
|
||||
0x1E: "GPSDifferential",
|
||||
}
|
||||
|
||||
INTEL_ENDIAN = ord('I')
|
||||
MOTOROLA_ENDIAN = ord('M')
|
||||
INTEL_ENDIAN = ord("I")
|
||||
MOTOROLA_ENDIAN = ord("M")
|
||||
|
||||
# About MAX_COUNT: It's possible to have corrupted exif tags where the entry count is way too high
|
||||
# and thus makes us loop, not endlessly, but for heck of a long time for nothing. Therefore, we put
|
||||
# an arbitrary limit on the entry count we'll allow ourselves to read and any IFD reporting more
|
||||
# entries than that will be considered corrupt.
|
||||
MAX_COUNT = 0xffff
|
||||
MAX_COUNT = 0xFFFF
|
||||
|
||||
|
||||
def s2n_motorola(bytes):
|
||||
x = 0
|
||||
@@ -166,6 +167,7 @@ def s2n_motorola(bytes):
|
||||
x = (x << 8) | c
|
||||
return x
|
||||
|
||||
|
||||
def s2n_intel(bytes):
|
||||
x = 0
|
||||
y = 0
|
||||
@@ -174,13 +176,14 @@ def s2n_intel(bytes):
|
||||
y = y + 8
|
||||
return x
|
||||
|
||||
|
||||
class Fraction:
|
||||
def __init__(self, num, den):
|
||||
self.num = num
|
||||
self.den = den
|
||||
|
||||
def __repr__(self):
|
||||
return '%d/%d' % (self.num, self.den)
|
||||
return "%d/%d" % (self.num, self.den)
|
||||
|
||||
|
||||
class TIFF_file:
|
||||
@@ -190,16 +193,22 @@ class TIFF_file:
|
||||
self.s2nfunc = s2n_intel if self.endian == INTEL_ENDIAN else s2n_motorola
|
||||
|
||||
def s2n(self, offset, length, signed=0, debug=False):
|
||||
slice = self.data[offset:offset+length]
|
||||
slice = self.data[offset : offset + length]
|
||||
val = self.s2nfunc(slice)
|
||||
# Sign extension ?
|
||||
if signed:
|
||||
msb = 1 << (8*length - 1)
|
||||
msb = 1 << (8 * length - 1)
|
||||
if val & msb:
|
||||
val = val - (msb << 1)
|
||||
if debug:
|
||||
logging.debug(self.endian)
|
||||
logging.debug("Slice for offset %d length %d: %r and value: %d", offset, length, slice, val)
|
||||
logging.debug(
|
||||
"Slice for offset %d length %d: %r and value: %d",
|
||||
offset,
|
||||
length,
|
||||
slice,
|
||||
val,
|
||||
)
|
||||
return val
|
||||
|
||||
def first_IFD(self):
|
||||
@@ -225,30 +234,31 @@ class TIFF_file:
|
||||
return []
|
||||
a = []
|
||||
for i in range(entries):
|
||||
entry = ifd + 2 + 12*i
|
||||
entry = ifd + 2 + 12 * i
|
||||
tag = self.s2n(entry, 2)
|
||||
type = self.s2n(entry+2, 2)
|
||||
type = self.s2n(entry + 2, 2)
|
||||
if not 1 <= type <= 10:
|
||||
continue # not handled
|
||||
typelen = [1, 1, 2, 4, 8, 1, 1, 2, 4, 8][type-1]
|
||||
count = self.s2n(entry+4, 4)
|
||||
continue # not handled
|
||||
typelen = [1, 1, 2, 4, 8, 1, 1, 2, 4, 8][type - 1]
|
||||
count = self.s2n(entry + 4, 4)
|
||||
if count > MAX_COUNT:
|
||||
logging.debug("Probably corrupt. Aborting.")
|
||||
return []
|
||||
offset = entry+8
|
||||
if count*typelen > 4:
|
||||
offset = entry + 8
|
||||
if count * typelen > 4:
|
||||
offset = self.s2n(offset, 4)
|
||||
if type == 2:
|
||||
# Special case: nul-terminated ASCII string
|
||||
values = str(self.data[offset:offset+count-1], encoding='latin-1')
|
||||
values = str(self.data[offset : offset + count - 1], encoding="latin-1")
|
||||
else:
|
||||
values = []
|
||||
signed = (type == 6 or type >= 8)
|
||||
signed = type == 6 or type >= 8
|
||||
for j in range(count):
|
||||
if type in {5, 10}:
|
||||
# The type is either 5 or 10
|
||||
value_j = Fraction(self.s2n(offset, 4, signed),
|
||||
self.s2n(offset+4, 4, signed))
|
||||
value_j = Fraction(
|
||||
self.s2n(offset, 4, signed), self.s2n(offset + 4, 4, signed)
|
||||
)
|
||||
else:
|
||||
# Not a fraction
|
||||
value_j = self.s2n(offset, typelen, signed)
|
||||
@@ -258,32 +268,37 @@ class TIFF_file:
|
||||
a.append((tag, type, values))
|
||||
return a
|
||||
|
||||
|
||||
def read_exif_header(fp):
|
||||
# If `fp`'s first bytes are not exif, it tries to find it in the next 4kb
|
||||
def isexif(data):
|
||||
return data[0:4] == b'\377\330\377\341' and data[6:10] == b'Exif'
|
||||
return data[0:4] == b"\377\330\377\341" and data[6:10] == b"Exif"
|
||||
|
||||
data = fp.read(12)
|
||||
if isexif(data):
|
||||
return data
|
||||
# ok, not exif, try to find it
|
||||
large_data = fp.read(4096)
|
||||
try:
|
||||
index = large_data.index(b'Exif')
|
||||
data = large_data[index-6:index+6]
|
||||
index = large_data.index(b"Exif")
|
||||
data = large_data[index - 6 : index + 6]
|
||||
# large_data omits the first 12 bytes, and the index is at the middle of the header, so we
|
||||
# must seek index + 18
|
||||
fp.seek(index+18)
|
||||
fp.seek(index + 18)
|
||||
return data
|
||||
except ValueError:
|
||||
raise ValueError("Not an Exif file")
|
||||
|
||||
|
||||
def get_fields(fp):
|
||||
data = read_exif_header(fp)
|
||||
length = data[4] * 256 + data[5]
|
||||
logging.debug("Exif header length: %d bytes", length)
|
||||
data = fp.read(length-8)
|
||||
data = fp.read(length - 8)
|
||||
data_format = data[0]
|
||||
logging.debug("%s format", {INTEL_ENDIAN: 'Intel', MOTOROLA_ENDIAN: 'Motorola'}[data_format])
|
||||
logging.debug(
|
||||
"%s format", {INTEL_ENDIAN: "Intel", MOTOROLA_ENDIAN: "Motorola"}[data_format]
|
||||
)
|
||||
T = TIFF_file(data)
|
||||
# There may be more than one IFD per file, but we only read the first one because others are
|
||||
# most likely thumbnails.
|
||||
@@ -294,9 +309,9 @@ def get_fields(fp):
|
||||
try:
|
||||
stag = EXIF_TAGS[tag]
|
||||
except KeyError:
|
||||
stag = '0x%04X' % tag
|
||||
stag = "0x%04X" % tag
|
||||
if stag in result:
|
||||
return # don't overwrite data
|
||||
return # don't overwrite data
|
||||
result[stag] = values
|
||||
|
||||
logging.debug("IFD at offset %d", main_IFD_offset)
|
||||
|
||||
@@ -1,24 +1,26 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2014-03-15
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
import plistlib
|
||||
|
||||
|
||||
class IPhotoPlistParser(plistlib._PlistParser):
|
||||
"""A parser for iPhoto plists.
|
||||
|
||||
iPhoto plists tend to be malformed, so we have to subclass the built-in parser to be a bit more
|
||||
lenient.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
plistlib._PlistParser.__init__(self, use_builtin_types=True, dict_type=dict)
|
||||
# For debugging purposes, we remember the last bit of data to be analyzed so that we can
|
||||
# log it in case of an exception
|
||||
self.lastdata = ''
|
||||
self.lastdata = ""
|
||||
|
||||
def get_data(self):
|
||||
self.lastdata = plistlib._PlistParser.get_data(self)
|
||||
|
||||
@@ -48,14 +48,18 @@ except Exception:
|
||||
logging.warning("Had problems to determine cpu count on launch.")
|
||||
RESULTS_QUEUE_LIMIT = 8
|
||||
|
||||
|
||||
def get_cache(cache_path, readonly=False):
|
||||
if cache_path.endswith('shelve'):
|
||||
if cache_path.endswith("shelve"):
|
||||
from .cache_shelve import ShelveCache
|
||||
|
||||
return ShelveCache(cache_path, readonly=readonly)
|
||||
else:
|
||||
from .cache_sqlite import SqliteCache
|
||||
|
||||
return SqliteCache(cache_path, readonly=readonly)
|
||||
|
||||
|
||||
def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
|
||||
# The MemoryError handlers in there use logging without first caring about whether or not
|
||||
# there is enough memory left to carry on the operation because it is assumed that the
|
||||
@@ -63,7 +67,7 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
|
||||
# time that MemoryError is raised.
|
||||
cache = get_cache(cache_path)
|
||||
cache.purge_outdated()
|
||||
prepared = [] # only pictures for which there was no error getting blocks
|
||||
prepared = [] # only pictures for which there was no error getting blocks
|
||||
try:
|
||||
for picture in j.iter_with_progress(pictures, tr("Analyzed %d/%d pictures")):
|
||||
if not picture.path:
|
||||
@@ -77,7 +81,7 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
|
||||
picture.unicode_path = str(picture.path)
|
||||
logging.debug("Analyzing picture at %s", picture.unicode_path)
|
||||
if with_dimensions:
|
||||
picture.dimensions # pre-read dimensions
|
||||
picture.dimensions # pre-read dimensions
|
||||
try:
|
||||
if picture.unicode_path not in cache:
|
||||
blocks = picture.get_blocks(BLOCK_COUNT_PER_SIDE)
|
||||
@@ -86,32 +90,45 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
|
||||
except (IOError, ValueError) as e:
|
||||
logging.warning(str(e))
|
||||
except MemoryError:
|
||||
logging.warning("Ran out of memory while reading %s of size %d", picture.unicode_path, picture.size)
|
||||
if picture.size < 10 * 1024 * 1024: # We're really running out of memory
|
||||
logging.warning(
|
||||
"Ran out of memory while reading %s of size %d",
|
||||
picture.unicode_path,
|
||||
picture.size,
|
||||
)
|
||||
if (
|
||||
picture.size < 10 * 1024 * 1024
|
||||
): # We're really running out of memory
|
||||
raise
|
||||
except MemoryError:
|
||||
logging.warning('Ran out of memory while preparing pictures')
|
||||
logging.warning("Ran out of memory while preparing pictures")
|
||||
cache.close()
|
||||
return prepared
|
||||
|
||||
|
||||
def get_chunks(pictures):
|
||||
min_chunk_count = multiprocessing.cpu_count() * 2 # have enough chunks to feed all subprocesses
|
||||
min_chunk_count = (
|
||||
multiprocessing.cpu_count() * 2
|
||||
) # have enough chunks to feed all subprocesses
|
||||
chunk_count = len(pictures) // DEFAULT_CHUNK_SIZE
|
||||
chunk_count = max(min_chunk_count, chunk_count)
|
||||
chunk_size = (len(pictures) // chunk_count) + 1
|
||||
chunk_size = max(MIN_CHUNK_SIZE, chunk_size)
|
||||
logging.info(
|
||||
"Creating %d chunks with a chunk size of %d for %d pictures", chunk_count,
|
||||
chunk_size, len(pictures)
|
||||
"Creating %d chunks with a chunk size of %d for %d pictures",
|
||||
chunk_count,
|
||||
chunk_size,
|
||||
len(pictures),
|
||||
)
|
||||
chunks = [pictures[i:i+chunk_size] for i in range(0, len(pictures), chunk_size)]
|
||||
chunks = [pictures[i : i + chunk_size] for i in range(0, len(pictures), chunk_size)]
|
||||
return chunks
|
||||
|
||||
|
||||
def get_match(first, second, percentage):
|
||||
if percentage < 0:
|
||||
percentage = 0
|
||||
return Match(first, second, percentage)
|
||||
|
||||
|
||||
def async_compare(ref_ids, other_ids, dbname, threshold, picinfo):
|
||||
# The list of ids in ref_ids have to be compared to the list of ids in other_ids. other_ids
|
||||
# can be None. In this case, ref_ids has to be compared with itself
|
||||
@@ -142,6 +159,7 @@ def async_compare(ref_ids, other_ids, dbname, threshold, picinfo):
|
||||
cache.close()
|
||||
return results
|
||||
|
||||
|
||||
def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljob):
|
||||
def get_picinfo(p):
|
||||
if match_scaled:
|
||||
@@ -160,11 +178,16 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
|
||||
async_results.remove(result)
|
||||
comparison_count += 1
|
||||
# About the NOQA below: I think there's a bug in pyflakes. To investigate...
|
||||
progress_msg = tr("Performed %d/%d chunk matches") % (comparison_count, len(comparisons_to_do)) # NOQA
|
||||
progress_msg = tr("Performed %d/%d chunk matches") % (
|
||||
comparison_count,
|
||||
len(comparisons_to_do),
|
||||
) # NOQA
|
||||
j.set_progress(comparison_count, progress_msg)
|
||||
|
||||
j = j.start_subjob([3, 7])
|
||||
pictures = prepare_pictures(pictures, cache_path, with_dimensions=not match_scaled, j=j)
|
||||
pictures = prepare_pictures(
|
||||
pictures, cache_path, with_dimensions=not match_scaled, j=j
|
||||
)
|
||||
j = j.start_subjob([9, 1], tr("Preparing for matching"))
|
||||
cache = get_cache(cache_path)
|
||||
id2picture = {}
|
||||
@@ -175,7 +198,7 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
|
||||
except ValueError:
|
||||
pass
|
||||
cache.close()
|
||||
pictures = [p for p in pictures if hasattr(p, 'cache_id')]
|
||||
pictures = [p for p in pictures if hasattr(p, "cache_id")]
|
||||
pool = multiprocessing.Pool()
|
||||
async_results = []
|
||||
matches = []
|
||||
@@ -203,9 +226,17 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
|
||||
# some wiggle room, log about the incident, and stop matching right here. We then process
|
||||
# the matches we have. The rest of the process doesn't allocate much and we should be
|
||||
# alright.
|
||||
del comparisons_to_do, chunks, pictures # some wiggle room for the next statements
|
||||
logging.warning("Ran out of memory when scanning! We had %d matches.", len(matches))
|
||||
del matches[-len(matches)//3:] # some wiggle room to ensure we don't run out of memory again.
|
||||
del (
|
||||
comparisons_to_do,
|
||||
chunks,
|
||||
pictures,
|
||||
) # some wiggle room for the next statements
|
||||
logging.warning(
|
||||
"Ran out of memory when scanning! We had %d matches.", len(matches)
|
||||
)
|
||||
del matches[
|
||||
-len(matches) // 3 :
|
||||
] # some wiggle room to ensure we don't run out of memory again.
|
||||
pool.close()
|
||||
result = []
|
||||
myiter = j.iter_with_progress(
|
||||
@@ -220,10 +251,10 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
|
||||
if percentage == 100 and ref.md5 != other.md5:
|
||||
percentage = 99
|
||||
if percentage >= threshold:
|
||||
ref.dimensions # pre-read dimensions for display in results
|
||||
ref.dimensions # pre-read dimensions for display in results
|
||||
other.dimensions
|
||||
result.append(get_match(ref, other, percentage))
|
||||
return result
|
||||
|
||||
multiprocessing.freeze_support()
|
||||
|
||||
multiprocessing.freeze_support()
|
||||
|
||||
@@ -13,14 +13,15 @@ from hscommon.trans import tr
|
||||
|
||||
from core.engine import Match
|
||||
|
||||
|
||||
def getmatches(files, match_scaled, j):
|
||||
timestamp2pic = defaultdict(set)
|
||||
for picture in j.iter_with_progress(files, tr("Read EXIF of %d/%d pictures")):
|
||||
timestamp = picture.exif_timestamp
|
||||
if timestamp:
|
||||
timestamp2pic[timestamp].add(picture)
|
||||
if '0000:00:00 00:00:00' in timestamp2pic: # very likely false matches
|
||||
del timestamp2pic['0000:00:00 00:00:00']
|
||||
if "0000:00:00 00:00:00" in timestamp2pic: # very likely false matches
|
||||
del timestamp2pic["0000:00:00 00:00:00"]
|
||||
matches = []
|
||||
for pictures in timestamp2pic.values():
|
||||
for p1, p2 in combinations(pictures, 2):
|
||||
@@ -28,4 +29,3 @@ def getmatches(files, match_scaled, j):
|
||||
continue
|
||||
matches.append(Match(p1, p2, 100))
|
||||
return matches
|
||||
|
||||
|
||||
@@ -14,23 +14,22 @@ from . import exif
|
||||
# This global value is set by the platform-specific subclasser of the Photo base class
|
||||
PLAT_SPECIFIC_PHOTO_CLASS = None
|
||||
|
||||
|
||||
def format_dimensions(dimensions):
|
||||
return '%d x %d' % (dimensions[0], dimensions[1])
|
||||
return "%d x %d" % (dimensions[0], dimensions[1])
|
||||
|
||||
|
||||
def get_delta_dimensions(value, ref_value):
|
||||
return (value[0]-ref_value[0], value[1]-ref_value[1])
|
||||
return (value[0] - ref_value[0], value[1] - ref_value[1])
|
||||
|
||||
|
||||
class Photo(fs.File):
|
||||
INITIAL_INFO = fs.File.INITIAL_INFO.copy()
|
||||
INITIAL_INFO.update({
|
||||
'dimensions': (0, 0),
|
||||
'exif_timestamp': '',
|
||||
})
|
||||
INITIAL_INFO.update({"dimensions": (0, 0), "exif_timestamp": ""})
|
||||
__slots__ = fs.File.__slots__ + tuple(INITIAL_INFO.keys())
|
||||
|
||||
# These extensions are supported on all platforms
|
||||
HANDLED_EXTS = {'png', 'jpg', 'jpeg', 'gif', 'bmp', 'tiff', 'tif'}
|
||||
HANDLED_EXTS = {"png", "jpg", "jpeg", "gif", "bmp", "tiff", "tif"}
|
||||
|
||||
def _plat_get_dimensions(self):
|
||||
raise NotImplementedError()
|
||||
@@ -39,25 +38,25 @@ class Photo(fs.File):
|
||||
raise NotImplementedError()
|
||||
|
||||
def _get_orientation(self):
|
||||
if not hasattr(self, '_cached_orientation'):
|
||||
if not hasattr(self, "_cached_orientation"):
|
||||
try:
|
||||
with self.path.open('rb') as fp:
|
||||
with self.path.open("rb") as fp:
|
||||
exifdata = exif.get_fields(fp)
|
||||
# the value is a list (probably one-sized) of ints
|
||||
orientations = exifdata['Orientation']
|
||||
orientations = exifdata["Orientation"]
|
||||
self._cached_orientation = orientations[0]
|
||||
except Exception: # Couldn't read EXIF data, no transforms
|
||||
except Exception: # Couldn't read EXIF data, no transforms
|
||||
self._cached_orientation = 0
|
||||
return self._cached_orientation
|
||||
|
||||
def _get_exif_timestamp(self):
|
||||
try:
|
||||
with self.path.open('rb') as fp:
|
||||
with self.path.open("rb") as fp:
|
||||
exifdata = exif.get_fields(fp)
|
||||
return exifdata['DateTimeOriginal']
|
||||
return exifdata["DateTimeOriginal"]
|
||||
except Exception:
|
||||
logging.info("Couldn't read EXIF of picture: %s", self.path)
|
||||
return ''
|
||||
return ""
|
||||
|
||||
@classmethod
|
||||
def can_handle(cls, path):
|
||||
@@ -79,28 +78,27 @@ class Photo(fs.File):
|
||||
else:
|
||||
percentage = group.percentage
|
||||
dupe_count = len(group.dupes)
|
||||
dupe_folder_path = getattr(self, 'display_folder_path', self.folder_path)
|
||||
dupe_folder_path = getattr(self, "display_folder_path", self.folder_path)
|
||||
return {
|
||||
'name': self.name,
|
||||
'folder_path': str(dupe_folder_path),
|
||||
'size': format_size(size, 0, 1, False),
|
||||
'extension': self.extension,
|
||||
'dimensions': format_dimensions(dimensions),
|
||||
'exif_timestamp': self.exif_timestamp,
|
||||
'mtime': format_timestamp(mtime, delta and m),
|
||||
'percentage': format_perc(percentage),
|
||||
'dupe_count': format_dupe_count(dupe_count),
|
||||
"name": self.name,
|
||||
"folder_path": str(dupe_folder_path),
|
||||
"size": format_size(size, 0, 1, False),
|
||||
"extension": self.extension,
|
||||
"dimensions": format_dimensions(dimensions),
|
||||
"exif_timestamp": self.exif_timestamp,
|
||||
"mtime": format_timestamp(mtime, delta and m),
|
||||
"percentage": format_perc(percentage),
|
||||
"dupe_count": format_dupe_count(dupe_count),
|
||||
}
|
||||
|
||||
def _read_info(self, field):
|
||||
fs.File._read_info(self, field)
|
||||
if field == 'dimensions':
|
||||
if field == "dimensions":
|
||||
self.dimensions = self._plat_get_dimensions()
|
||||
if self._get_orientation() in {5, 6, 7, 8}:
|
||||
self.dimensions = (self.dimensions[1], self.dimensions[0])
|
||||
elif field == 'exif_timestamp':
|
||||
elif field == "exif_timestamp":
|
||||
self.exif_timestamp = self._get_exif_timestamp()
|
||||
|
||||
def get_blocks(self, block_count_per_side):
|
||||
return self._plat_get_blocks(block_count_per_side, self._get_orientation())
|
||||
|
||||
|
||||
@@ -8,11 +8,16 @@
|
||||
from hscommon.trans import trget
|
||||
|
||||
from core.prioritize import (
|
||||
KindCategory, FolderCategory, FilenameCategory, NumericalCategory,
|
||||
SizeCategory, MtimeCategory
|
||||
KindCategory,
|
||||
FolderCategory,
|
||||
FilenameCategory,
|
||||
NumericalCategory,
|
||||
SizeCategory,
|
||||
MtimeCategory,
|
||||
)
|
||||
|
||||
coltr = trget('columns')
|
||||
coltr = trget("columns")
|
||||
|
||||
|
||||
class DimensionsCategory(NumericalCategory):
|
||||
NAME = coltr("Dimensions")
|
||||
@@ -24,8 +29,13 @@ class DimensionsCategory(NumericalCategory):
|
||||
width, height = value
|
||||
return (-width, -height)
|
||||
|
||||
|
||||
def all_categories():
|
||||
return [
|
||||
KindCategory, FolderCategory, FilenameCategory, SizeCategory, DimensionsCategory,
|
||||
MtimeCategory
|
||||
KindCategory,
|
||||
FolderCategory,
|
||||
FilenameCategory,
|
||||
SizeCategory,
|
||||
DimensionsCategory,
|
||||
MtimeCategory,
|
||||
]
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# Created On: 2011-11-27
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from hscommon.gui.column import Column
|
||||
@@ -10,19 +10,20 @@ from hscommon.trans import trget
|
||||
|
||||
from core.gui.result_table import ResultTable as ResultTableBase
|
||||
|
||||
coltr = trget('columns')
|
||||
coltr = trget("columns")
|
||||
|
||||
|
||||
class ResultTable(ResultTableBase):
|
||||
COLUMNS = [
|
||||
Column('marked', ''),
|
||||
Column('name', coltr("Filename")),
|
||||
Column('folder_path', coltr("Folder"), optional=True),
|
||||
Column('size', coltr("Size (KB)"), optional=True),
|
||||
Column('extension', coltr("Kind"), visible=False, optional=True),
|
||||
Column('dimensions', coltr("Dimensions"), optional=True),
|
||||
Column('exif_timestamp', coltr("EXIF Timestamp"), visible=False, optional=True),
|
||||
Column('mtime', coltr("Modification"), visible=False, optional=True),
|
||||
Column('percentage', coltr("Match %"), optional=True),
|
||||
Column('dupe_count', coltr("Dupe Count"), visible=False, optional=True),
|
||||
Column("marked", ""),
|
||||
Column("name", coltr("Filename")),
|
||||
Column("folder_path", coltr("Folder"), optional=True),
|
||||
Column("size", coltr("Size (KB)"), optional=True),
|
||||
Column("extension", coltr("Kind"), visible=False, optional=True),
|
||||
Column("dimensions", coltr("Dimensions"), optional=True),
|
||||
Column("exif_timestamp", coltr("EXIF Timestamp"), visible=False, optional=True),
|
||||
Column("mtime", coltr("Modification"), visible=False, optional=True),
|
||||
Column("percentage", coltr("Match %"), optional=True),
|
||||
Column("dupe_count", coltr("Dupe Count"), visible=False, optional=True),
|
||||
]
|
||||
DELTA_COLUMNS = {'size', 'dimensions', 'mtime'}
|
||||
DELTA_COLUMNS = {"size", "dimensions", "mtime"}
|
||||
|
||||
@@ -10,6 +10,7 @@ from core.scanner import Scanner, ScanType, ScanOption
|
||||
|
||||
from . import matchblock, matchexif
|
||||
|
||||
|
||||
class ScannerPE(Scanner):
|
||||
cache_path = None
|
||||
match_scaled = False
|
||||
@@ -28,10 +29,9 @@ class ScannerPE(Scanner):
|
||||
cache_path=self.cache_path,
|
||||
threshold=self.min_match_percentage,
|
||||
match_scaled=self.match_scaled,
|
||||
j=j
|
||||
j=j,
|
||||
)
|
||||
elif self.scan_type == ScanType.ExifTimestamp:
|
||||
return matchexif.getmatches(files, self.match_scaled, j)
|
||||
else:
|
||||
raise Exception("Invalid scan type")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user