mirror of
https://github.com/arsenetar/dupeguru.git
synced 2026-01-22 14:41:39 +00:00
Merge core_{se,me,pe} into core.{se,me,pe}
This commit is contained in:
1
core/pe/__init__.py
Normal file
1
core/pe/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from . import block, cache, exif, iphoto_plist, matchblock, matchexif, photo, prioritize, result_table, scanner # noqa
|
||||
122
core/pe/block.py
Normal file
122
core/pe/block.py
Normal file
@@ -0,0 +1,122 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2006/09/01
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from ._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2 # NOQA
|
||||
|
||||
# Converted to C
|
||||
# def getblock(image):
|
||||
# """Returns a 3 sized tuple containing the mean color of 'image'.
|
||||
#
|
||||
# image: a PIL image or crop.
|
||||
# """
|
||||
# if image.size[0]:
|
||||
# pixel_count = image.size[0] * image.size[1]
|
||||
# red = green = blue = 0
|
||||
# for r,g,b in image.getdata():
|
||||
# red += r
|
||||
# green += g
|
||||
# blue += b
|
||||
# return (red // pixel_count, green // pixel_count, blue // pixel_count)
|
||||
# else:
|
||||
# return (0,0,0)
|
||||
|
||||
# This is not used anymore
|
||||
# def getblocks(image,blocksize):
|
||||
# """Returns a list of blocks (3 sized tuples).
|
||||
#
|
||||
# image: A PIL image to base the blocks on.
|
||||
# blocksize: The size of the blocks to be create. This is a single integer, defining
|
||||
# both width and height (blocks are square).
|
||||
# """
|
||||
# if min(image.size) < blocksize:
|
||||
# return ()
|
||||
# result = []
|
||||
# for i in xrange(image.size[1] // blocksize):
|
||||
# for j in xrange(image.size[0] // blocksize):
|
||||
# box = (blocksize * j, blocksize * i, blocksize * (j + 1), blocksize * (i + 1))
|
||||
# crop = image.crop(box)
|
||||
# result.append(getblock(crop))
|
||||
# return result
|
||||
|
||||
# Converted to C
|
||||
# def getblocks2(image,block_count_per_side):
|
||||
# """Returns a list of blocks (3 sized tuples).
|
||||
#
|
||||
# image: A PIL image to base the blocks on.
|
||||
# block_count_per_side: This integer determine the number of blocks the function will return.
|
||||
# If it is 10, for example, 100 blocks will be returns (10 width, 10 height). The blocks will not
|
||||
# necessarely cover square areas. The area covered by each block will be proportional to the image
|
||||
# itself.
|
||||
# """
|
||||
# if not image.size[0]:
|
||||
# return []
|
||||
# width,height = image.size
|
||||
# block_width = max(width // block_count_per_side,1)
|
||||
# block_height = max(height // block_count_per_side,1)
|
||||
# result = []
|
||||
# for ih in range(block_count_per_side):
|
||||
# top = min(ih * block_height, height - block_height)
|
||||
# bottom = top + block_height
|
||||
# for iw in range(block_count_per_side):
|
||||
# left = min(iw * block_width, width - block_width)
|
||||
# right = left + block_width
|
||||
# box = (left,top,right,bottom)
|
||||
# crop = image.crop(box)
|
||||
# result.append(getblock(crop))
|
||||
# return result
|
||||
|
||||
# Converted to C
|
||||
# def diff(first, second):
|
||||
# """Returns the difference between the first block and the second.
|
||||
#
|
||||
# It returns an absolute sum of the 3 differences (RGB).
|
||||
# """
|
||||
# r1, g1, b1 = first
|
||||
# r2, g2, b2 = second
|
||||
# return abs(r1 - r2) + abs(g1 - g2) + abs(b1 - b2)
|
||||
|
||||
# Converted to C
|
||||
# def avgdiff(first, second, limit=768, min_iterations=1):
|
||||
# """Returns the average diff between first blocks and seconds.
|
||||
#
|
||||
# If the result surpasses limit, limit + 1 is returned, except if less than min_iterations
|
||||
# iterations have been made in the blocks.
|
||||
# """
|
||||
# if len(first) != len(second):
|
||||
# raise DifferentBlockCountError
|
||||
# if not first:
|
||||
# raise NoBlocksError
|
||||
# count = len(first)
|
||||
# sum = 0
|
||||
# zipped = izip(xrange(1, count + 1), first, second)
|
||||
# for i, first, second in zipped:
|
||||
# sum += diff(first, second)
|
||||
# if sum > limit * i and i >= min_iterations:
|
||||
# return limit + 1
|
||||
# result = sum // count
|
||||
# if (not result) and sum:
|
||||
# result = 1
|
||||
# return result
|
||||
|
||||
# This is not used anymore
|
||||
# def maxdiff(first,second,limit=768):
|
||||
# """Returns the max diff between first blocks and seconds.
|
||||
#
|
||||
# If the result surpasses limit, the first max being over limit is returned.
|
||||
# """
|
||||
# if len(first) != len(second):
|
||||
# raise DifferentBlockCountError
|
||||
# if not first:
|
||||
# raise NoBlocksError
|
||||
# result = 0
|
||||
# zipped = zip(first,second)
|
||||
# for first,second in zipped:
|
||||
# result = max(result,diff(first,second))
|
||||
# if result > limit:
|
||||
# return result
|
||||
# return result
|
||||
162
core/pe/cache.py
Normal file
162
core/pe/cache.py
Normal file
@@ -0,0 +1,162 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2006/09/14
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
import os
|
||||
import os.path as op
|
||||
import logging
|
||||
import sqlite3 as sqlite
|
||||
|
||||
from ._cache import string_to_colors
|
||||
|
||||
def colors_to_string(colors):
|
||||
"""Transform the 3 sized tuples 'colors' into a hex string.
|
||||
|
||||
[(0,100,255)] --> 0064ff
|
||||
[(1,2,3),(4,5,6)] --> 010203040506
|
||||
"""
|
||||
return ''.join(['%02x%02x%02x' % (r, g, b) for r, g, b in colors])
|
||||
|
||||
# This function is an important bottleneck of dupeGuru PE. It has been converted to C.
|
||||
# def string_to_colors(s):
|
||||
# """Transform the string 's' in a list of 3 sized tuples.
|
||||
# """
|
||||
# result = []
|
||||
# for i in xrange(0, len(s), 6):
|
||||
# number = int(s[i:i+6], 16)
|
||||
# result.append((number >> 16, (number >> 8) & 0xff, number & 0xff))
|
||||
# return result
|
||||
|
||||
class Cache:
|
||||
"""A class to cache picture blocks.
|
||||
"""
|
||||
def __init__(self, db=':memory:'):
|
||||
self.dbname = db
|
||||
self.con = None
|
||||
self._create_con()
|
||||
|
||||
def __contains__(self, key):
|
||||
sql = "select count(*) from pictures where path = ?"
|
||||
result = self.con.execute(sql, [key]).fetchall()
|
||||
return result[0][0] > 0
|
||||
|
||||
def __delitem__(self, key):
|
||||
if key not in self:
|
||||
raise KeyError(key)
|
||||
sql = "delete from pictures where path = ?"
|
||||
self.con.execute(sql, [key])
|
||||
|
||||
# Optimized
|
||||
def __getitem__(self, key):
|
||||
if isinstance(key, int):
|
||||
sql = "select blocks from pictures where rowid = ?"
|
||||
else:
|
||||
sql = "select blocks from pictures where path = ?"
|
||||
result = self.con.execute(sql, [key]).fetchone()
|
||||
if result:
|
||||
result = string_to_colors(result[0])
|
||||
return result
|
||||
else:
|
||||
raise KeyError(key)
|
||||
|
||||
def __iter__(self):
|
||||
sql = "select path from pictures"
|
||||
result = self.con.execute(sql)
|
||||
return (row[0] for row in result)
|
||||
|
||||
def __len__(self):
|
||||
sql = "select count(*) from pictures"
|
||||
result = self.con.execute(sql).fetchall()
|
||||
return result[0][0]
|
||||
|
||||
def __setitem__(self, path_str, blocks):
|
||||
blocks = colors_to_string(blocks)
|
||||
if op.exists(path_str):
|
||||
mtime = int(os.stat(path_str).st_mtime)
|
||||
else:
|
||||
mtime = 0
|
||||
if path_str in self:
|
||||
sql = "update pictures set blocks = ?, mtime = ? where path = ?"
|
||||
else:
|
||||
sql = "insert into pictures(blocks,mtime,path) values(?,?,?)"
|
||||
try:
|
||||
self.con.execute(sql, [blocks, mtime, path_str])
|
||||
except sqlite.OperationalError:
|
||||
logging.warning('Picture cache could not set value for key %r', path_str)
|
||||
except sqlite.DatabaseError as e:
|
||||
logging.warning('DatabaseError while setting value for key %r: %s', path_str, str(e))
|
||||
|
||||
def _create_con(self, second_try=False):
|
||||
def create_tables():
|
||||
logging.debug("Creating picture cache tables.")
|
||||
self.con.execute("drop table if exists pictures")
|
||||
self.con.execute("drop index if exists idx_path")
|
||||
self.con.execute("create table pictures(path TEXT, mtime INTEGER, blocks TEXT)")
|
||||
self.con.execute("create index idx_path on pictures (path)")
|
||||
|
||||
self.con = sqlite.connect(self.dbname, isolation_level=None)
|
||||
try:
|
||||
self.con.execute("select path, mtime, blocks from pictures where 1=2")
|
||||
except sqlite.OperationalError: # new db
|
||||
create_tables()
|
||||
except sqlite.DatabaseError as e: # corrupted db
|
||||
if second_try:
|
||||
raise # Something really strange is happening
|
||||
logging.warning('Could not create picture cache because of an error: %s', str(e))
|
||||
self.con.close()
|
||||
os.remove(self.dbname)
|
||||
self._create_con(second_try=True)
|
||||
|
||||
def clear(self):
|
||||
self.close()
|
||||
if self.dbname != ':memory:':
|
||||
os.remove(self.dbname)
|
||||
self._create_con()
|
||||
|
||||
def close(self):
|
||||
if self.con is not None:
|
||||
self.con.close()
|
||||
self.con = None
|
||||
|
||||
def filter(self, func):
|
||||
to_delete = [key for key in self if not func(key)]
|
||||
for key in to_delete:
|
||||
del self[key]
|
||||
|
||||
def get_id(self, path):
|
||||
sql = "select rowid from pictures where path = ?"
|
||||
result = self.con.execute(sql, [path]).fetchone()
|
||||
if result:
|
||||
return result[0]
|
||||
else:
|
||||
raise ValueError(path)
|
||||
|
||||
def get_multiple(self, rowids):
|
||||
sql = "select rowid, blocks from pictures where rowid in (%s)" % ','.join(map(str, rowids))
|
||||
cur = self.con.execute(sql)
|
||||
return ((rowid, string_to_colors(blocks)) for rowid, blocks in cur)
|
||||
|
||||
def purge_outdated(self):
|
||||
"""Go through the cache and purge outdated records.
|
||||
|
||||
A record is outdated if the picture doesn't exist or if its mtime is greater than the one in
|
||||
the db.
|
||||
"""
|
||||
todelete = []
|
||||
sql = "select rowid, path, mtime from pictures"
|
||||
cur = self.con.execute(sql)
|
||||
for rowid, path_str, mtime in cur:
|
||||
if mtime and op.exists(path_str):
|
||||
picture_mtime = os.stat(path_str).st_mtime
|
||||
if int(picture_mtime) <= mtime:
|
||||
# not outdated
|
||||
continue
|
||||
todelete.append(rowid)
|
||||
if todelete:
|
||||
sql = "delete from pictures where rowid in (%s)" % ','.join(map(str, todelete))
|
||||
self.con.execute(sql)
|
||||
|
||||
335
core/pe/exif.py
Normal file
335
core/pe/exif.py
Normal file
@@ -0,0 +1,335 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2011-04-20
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
# Heavily based on http://topo.math.u-psud.fr/~bousch/exifdump.py by Thierry Bousch (Public Domain)
|
||||
|
||||
import logging
|
||||
|
||||
EXIF_TAGS = {
|
||||
0x0100: "ImageWidth",
|
||||
0x0101: "ImageLength",
|
||||
0x0102: "BitsPerSample",
|
||||
0x0103: "Compression",
|
||||
0x0106: "PhotometricInterpretation",
|
||||
0x010A: "FillOrder",
|
||||
0x010D: "DocumentName",
|
||||
0x010E: "ImageDescription",
|
||||
0x010F: "Make",
|
||||
0x0110: "Model",
|
||||
0x0111: "StripOffsets",
|
||||
0x0112: "Orientation",
|
||||
0x0115: "SamplesPerPixel",
|
||||
0x0116: "RowsPerStrip",
|
||||
0x0117: "StripByteCounts",
|
||||
0x011A: "XResolution",
|
||||
0x011B: "YResolution",
|
||||
0x011C: "PlanarConfiguration",
|
||||
0x0128: "ResolutionUnit",
|
||||
0x012D: "TransferFunction",
|
||||
0x0131: "Software",
|
||||
0x0132: "DateTime",
|
||||
0x013B: "Artist",
|
||||
0x013E: "WhitePoint",
|
||||
0x013F: "PrimaryChromaticities",
|
||||
0x0156: "TransferRange",
|
||||
0x0200: "JPEGProc",
|
||||
0x0201: "JPEGInterchangeFormat",
|
||||
0x0202: "JPEGInterchangeFormatLength",
|
||||
0x0211: "YCbCrCoefficients",
|
||||
0x0212: "YCbCrSubSampling",
|
||||
0x0213: "YCbCrPositioning",
|
||||
0x0214: "ReferenceBlackWhite",
|
||||
0x828F: "BatteryLevel",
|
||||
0x8298: "Copyright",
|
||||
0x829A: "ExposureTime",
|
||||
0x829D: "FNumber",
|
||||
0x83BB: "IPTC/NAA",
|
||||
0x8769: "ExifIFDPointer",
|
||||
0x8773: "InterColorProfile",
|
||||
0x8822: "ExposureProgram",
|
||||
0x8824: "SpectralSensitivity",
|
||||
0x8825: "GPSInfoIFDPointer",
|
||||
0x8827: "ISOSpeedRatings",
|
||||
0x8828: "OECF",
|
||||
0x9000: "ExifVersion",
|
||||
0x9003: "DateTimeOriginal",
|
||||
0x9004: "DateTimeDigitized",
|
||||
0x9101: "ComponentsConfiguration",
|
||||
0x9102: "CompressedBitsPerPixel",
|
||||
0x9201: "ShutterSpeedValue",
|
||||
0x9202: "ApertureValue",
|
||||
0x9203: "BrightnessValue",
|
||||
0x9204: "ExposureBiasValue",
|
||||
0x9205: "MaxApertureValue",
|
||||
0x9206: "SubjectDistance",
|
||||
0x9207: "MeteringMode",
|
||||
0x9208: "LightSource",
|
||||
0x9209: "Flash",
|
||||
0x920A: "FocalLength",
|
||||
0x9214: "SubjectArea",
|
||||
0x927C: "MakerNote",
|
||||
0x9286: "UserComment",
|
||||
0x9290: "SubSecTime",
|
||||
0x9291: "SubSecTimeOriginal",
|
||||
0x9292: "SubSecTimeDigitized",
|
||||
0xA000: "FlashPixVersion",
|
||||
0xA001: "ColorSpace",
|
||||
0xA002: "PixelXDimension",
|
||||
0xA003: "PixelYDimension",
|
||||
0xA004: "RelatedSoundFile",
|
||||
0xA005: "InteroperabilityIFDPointer",
|
||||
0xA20B: "FlashEnergy", # 0x920B in TIFF/EP
|
||||
0xA20C: "SpatialFrequencyResponse", # 0x920C - -
|
||||
0xA20E: "FocalPlaneXResolution", # 0x920E - -
|
||||
0xA20F: "FocalPlaneYResolution", # 0x920F - -
|
||||
0xA210: "FocalPlaneResolutionUnit", # 0x9210 - -
|
||||
0xA214: "SubjectLocation", # 0x9214 - -
|
||||
0xA215: "ExposureIndex", # 0x9215 - -
|
||||
0xA217: "SensingMethod", # 0x9217 - -
|
||||
0xA300: "FileSource",
|
||||
0xA301: "SceneType",
|
||||
0xA302: "CFAPattern", # 0x828E in TIFF/EP
|
||||
0xA401: "CustomRendered",
|
||||
0xA402: "ExposureMode",
|
||||
0xA403: "WhiteBalance",
|
||||
0xA404: "DigitalZoomRatio",
|
||||
0xA405: "FocalLengthIn35mmFilm",
|
||||
0xA406: "SceneCaptureType",
|
||||
0xA407: "GainControl",
|
||||
0xA408: "Contrast",
|
||||
0xA409: "Saturation",
|
||||
0xA40A: "Sharpness",
|
||||
0xA40B: "DeviceSettingDescription",
|
||||
0xA40C: "SubjectDistanceRange",
|
||||
0xA420: "ImageUniqueID",
|
||||
}
|
||||
|
||||
INTR_TAGS = {
|
||||
0x0001: "InteroperabilityIndex",
|
||||
0x0002: "InteroperabilityVersion",
|
||||
0x1000: "RelatedImageFileFormat",
|
||||
0x1001: "RelatedImageWidth",
|
||||
0x1002: "RelatedImageLength",
|
||||
}
|
||||
|
||||
GPS_TA0GS = {
|
||||
0x00: "GPSVersionID",
|
||||
0x01: "GPSLatitudeRef",
|
||||
0x02: "GPSLatitude",
|
||||
0x03: "GPSLongitudeRef",
|
||||
0x04: "GPSLongitude",
|
||||
0x05: "GPSAltitudeRef",
|
||||
0x06: "GPSAltitude",
|
||||
0x07: "GPSTimeStamp",
|
||||
0x08: "GPSSatellites",
|
||||
0x09: "GPSStatus",
|
||||
0x0A: "GPSMeasureMode",
|
||||
0x0B: "GPSDOP",
|
||||
0x0C: "GPSSpeedRef",
|
||||
0x0D: "GPSSpeed",
|
||||
0x0E: "GPSTrackRef",
|
||||
0x0F: "GPSTrack",
|
||||
0x10: "GPSImgDirectionRef",
|
||||
0x11: "GPSImgDirection",
|
||||
0x12: "GPSMapDatum",
|
||||
0x13: "GPSDestLatitudeRef",
|
||||
0x14: "GPSDestLatitude",
|
||||
0x15: "GPSDestLongitudeRef",
|
||||
0x16: "GPSDestLongitude",
|
||||
0x17: "GPSDestBearingRef",
|
||||
0x18: "GPSDestBearing",
|
||||
0x19: "GPSDestDistanceRef",
|
||||
0x1A: "GPSDestDistance",
|
||||
0x1B: "GPSProcessingMethod",
|
||||
0x1C: "GPSAreaInformation",
|
||||
0x1D: "GPSDateStamp",
|
||||
0x1E: "GPSDifferential"
|
||||
}
|
||||
|
||||
INTEL_ENDIAN = ord('I')
|
||||
MOTOROLA_ENDIAN = ord('M')
|
||||
|
||||
# About MAX_COUNT: It's possible to have corrupted exif tags where the entry count is way too high
|
||||
# and thus makes us loop, not endlessly, but for heck of a long time for nothing. Therefore, we put
|
||||
# an arbitrary limit on the entry count we'll allow ourselves to read and any IFD reporting more
|
||||
# entries than that will be considered corrupt.
|
||||
MAX_COUNT = 0xffff
|
||||
|
||||
def s2n_motorola(bytes):
|
||||
x = 0
|
||||
for c in bytes:
|
||||
x = (x << 8) | c
|
||||
return x
|
||||
|
||||
def s2n_intel(bytes):
|
||||
x = 0
|
||||
y = 0
|
||||
for c in bytes:
|
||||
x = x | (c << y)
|
||||
y = y + 8
|
||||
return x
|
||||
|
||||
class Fraction:
|
||||
def __init__(self, num, den):
|
||||
self.num = num
|
||||
self.den = den
|
||||
|
||||
def __repr__(self):
|
||||
return '%d/%d' % (self.num, self.den)
|
||||
|
||||
|
||||
class TIFF_file:
|
||||
def __init__(self, data):
|
||||
self.data = data
|
||||
self.endian = data[0]
|
||||
self.s2nfunc = s2n_intel if self.endian == INTEL_ENDIAN else s2n_motorola
|
||||
|
||||
def s2n(self, offset, length, signed=0, debug=False):
|
||||
slice = self.data[offset:offset+length]
|
||||
val = self.s2nfunc(slice)
|
||||
# Sign extension ?
|
||||
if signed:
|
||||
msb = 1 << (8*length - 1)
|
||||
if val & msb:
|
||||
val = val - (msb << 1)
|
||||
if debug:
|
||||
logging.debug(self.endian)
|
||||
logging.debug("Slice for offset %d length %d: %r and value: %d", offset, length, slice, val)
|
||||
return val
|
||||
|
||||
def first_IFD(self):
|
||||
return self.s2n(4, 4)
|
||||
|
||||
def next_IFD(self, ifd):
|
||||
entries = self.s2n(ifd, 2)
|
||||
return self.s2n(ifd + 2 + 12 * entries, 4)
|
||||
|
||||
def list_IFDs(self):
|
||||
i = self.first_IFD()
|
||||
a = []
|
||||
while i:
|
||||
a.append(i)
|
||||
i = self.next_IFD(i)
|
||||
return a
|
||||
|
||||
def dump_IFD(self, ifd):
|
||||
entries = self.s2n(ifd, 2)
|
||||
logging.debug("Entries for IFD %d: %d", ifd, entries)
|
||||
if entries > MAX_COUNT:
|
||||
logging.debug("Probably corrupt. Aborting.")
|
||||
return []
|
||||
a = []
|
||||
for i in range(entries):
|
||||
entry = ifd + 2 + 12*i
|
||||
tag = self.s2n(entry, 2)
|
||||
type = self.s2n(entry+2, 2)
|
||||
if not 1 <= type <= 10:
|
||||
continue # not handled
|
||||
typelen = [1, 1, 2, 4, 8, 1, 1, 2, 4, 8][type-1]
|
||||
count = self.s2n(entry+4, 4)
|
||||
if count > MAX_COUNT:
|
||||
logging.debug("Probably corrupt. Aborting.")
|
||||
return []
|
||||
offset = entry+8
|
||||
if count*typelen > 4:
|
||||
offset = self.s2n(offset, 4)
|
||||
if type == 2:
|
||||
# Special case: nul-terminated ASCII string
|
||||
values = str(self.data[offset:offset+count-1], encoding='latin-1')
|
||||
else:
|
||||
values = []
|
||||
signed = (type == 6 or type >= 8)
|
||||
for j in range(count):
|
||||
if type in {5, 10}:
|
||||
# The type is either 5 or 10
|
||||
value_j = Fraction(self.s2n(offset, 4, signed),
|
||||
self.s2n(offset+4, 4, signed))
|
||||
else:
|
||||
# Not a fraction
|
||||
value_j = self.s2n(offset, typelen, signed)
|
||||
values.append(value_j)
|
||||
offset = offset + typelen
|
||||
# Now "values" is either a string or an array
|
||||
a.append((tag, type, values))
|
||||
return a
|
||||
|
||||
def read_exif_header(fp):
|
||||
# If `fp`'s first bytes are not exif, it tries to find it in the next 4kb
|
||||
def isexif(data):
|
||||
return data[0:4] == b'\377\330\377\341' and data[6:10] == b'Exif'
|
||||
data = fp.read(12)
|
||||
if isexif(data):
|
||||
return data
|
||||
# ok, not exif, try to find it
|
||||
large_data = fp.read(4096)
|
||||
try:
|
||||
index = large_data.index(b'Exif')
|
||||
data = large_data[index-6:index+6]
|
||||
# large_data omits the first 12 bytes, and the index is at the middle of the header, so we
|
||||
# must seek index + 18
|
||||
fp.seek(index+18)
|
||||
return data
|
||||
except ValueError:
|
||||
raise ValueError("Not an Exif file")
|
||||
|
||||
def get_fields(fp):
|
||||
data = read_exif_header(fp)
|
||||
length = data[4] * 256 + data[5]
|
||||
logging.debug("Exif header length: %d bytes", length)
|
||||
data = fp.read(length-8)
|
||||
data_format = data[0]
|
||||
logging.debug("%s format", {INTEL_ENDIAN: 'Intel', MOTOROLA_ENDIAN: 'Motorola'}[data_format])
|
||||
T = TIFF_file(data)
|
||||
# There may be more than one IFD per file, but we only read the first one because others are
|
||||
# most likely thumbnails.
|
||||
main_IFD_offset = T.first_IFD()
|
||||
result = {}
|
||||
|
||||
def add_tag_to_result(tag, values):
|
||||
try:
|
||||
stag = EXIF_TAGS[tag]
|
||||
except KeyError:
|
||||
stag = '0x%04X' % tag
|
||||
if stag in result:
|
||||
return # don't overwrite data
|
||||
result[stag] = values
|
||||
|
||||
logging.debug("IFD at offset %d", main_IFD_offset)
|
||||
IFD = T.dump_IFD(main_IFD_offset)
|
||||
exif_off = gps_off = 0
|
||||
for tag, type, values in IFD:
|
||||
if tag == 0x8769:
|
||||
exif_off = values[0]
|
||||
continue
|
||||
if tag == 0x8825:
|
||||
gps_off = values[0]
|
||||
continue
|
||||
add_tag_to_result(tag, values)
|
||||
if exif_off:
|
||||
logging.debug("Exif SubIFD at offset %d:", exif_off)
|
||||
IFD = T.dump_IFD(exif_off)
|
||||
# Recent digital cameras have a little subdirectory
|
||||
# here, pointed to by tag 0xA005. Apparently, it's the
|
||||
# "Interoperability IFD", defined in Exif 2.1 and DCF.
|
||||
intr_off = 0
|
||||
for tag, type, values in IFD:
|
||||
if tag == 0xA005:
|
||||
intr_off = values[0]
|
||||
continue
|
||||
add_tag_to_result(tag, values)
|
||||
if intr_off:
|
||||
logging.debug("Exif Interoperability SubSubIFD at offset %d:", intr_off)
|
||||
IFD = T.dump_IFD(intr_off)
|
||||
for tag, type, values in IFD:
|
||||
add_tag_to_result(tag, values)
|
||||
if gps_off:
|
||||
logging.debug("GPS SubIFD at offset %d:", gps_off)
|
||||
IFD = T.dump_IFD(gps_off)
|
||||
for tag, type, values in IFD:
|
||||
add_tag_to_result(tag, values)
|
||||
return result
|
||||
31
core/pe/iphoto_plist.py
Normal file
31
core/pe/iphoto_plist.py
Normal file
@@ -0,0 +1,31 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2014-03-15
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
import plistlib
|
||||
|
||||
class IPhotoPlistParser(plistlib._PlistParser):
|
||||
"""A parser for iPhoto plists.
|
||||
|
||||
iPhoto plists tend to be malformed, so we have to subclass the built-in parser to be a bit more
|
||||
lenient.
|
||||
"""
|
||||
def __init__(self):
|
||||
plistlib._PlistParser.__init__(self, use_builtin_types=True, dict_type=dict)
|
||||
# For debugging purposes, we remember the last bit of data to be analyzed so that we can
|
||||
# log it in case of an exception
|
||||
self.lastdata = ''
|
||||
|
||||
def get_data(self):
|
||||
self.lastdata = plistlib._PlistParser.get_data(self)
|
||||
return self.lastdata
|
||||
|
||||
def end_integer(self):
|
||||
try:
|
||||
self.add_object(int(self.get_data()))
|
||||
except ValueError:
|
||||
self.add_object(0)
|
||||
222
core/pe/matchblock.py
Normal file
222
core/pe/matchblock.py
Normal file
@@ -0,0 +1,222 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2007/02/25
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
import logging
|
||||
import multiprocessing
|
||||
from itertools import combinations
|
||||
|
||||
from hscommon.util import extract, iterconsume
|
||||
from hscommon.trans import tr
|
||||
from hscommon.jobprogress import job
|
||||
|
||||
from core.engine import Match
|
||||
from .block import avgdiff, DifferentBlockCountError, NoBlocksError
|
||||
from .cache import Cache
|
||||
|
||||
# OPTIMIZATION NOTES:
|
||||
# The bottleneck of the matching phase is CPU, which is why we use multiprocessing. However, another
|
||||
# bottleneck that shows up when a lot of pictures are involved is Disk IO's because blocks
|
||||
# constantly have to be read from disks by subprocesses. This problem is especially big on CPUs
|
||||
# with a lot of cores. Therefore, we must minimize Disk IOs. The best way to achieve that is to
|
||||
# separate the files to scan in "chunks" and it's by chunk that blocks are read in memory and
|
||||
# compared to each other. Each file in a chunk has to be compared to each other, of course, but also
|
||||
# to files in other chunks. So chunkifying doesn't save us any actual comparison, but the advantage
|
||||
# is that instead of reading blocks from disk number_of_files**2 times, we read it
|
||||
# number_of_files*number_of_chunks times.
|
||||
# Determining the right chunk size is tricky, bceause if it's too big, too many blocks will be in
|
||||
# memory at the same time and we might end up with memory trashing, which is awfully slow. So,
|
||||
# because our *real* bottleneck is CPU, the chunk size must simply be enough so that the CPU isn't
|
||||
# starved by Disk IOs.
|
||||
|
||||
MIN_ITERATIONS = 3
|
||||
BLOCK_COUNT_PER_SIDE = 15
|
||||
DEFAULT_CHUNK_SIZE = 1000
|
||||
MIN_CHUNK_SIZE = 100
|
||||
|
||||
# Enough so that we're sure that the main thread will not wait after a result.get() call
|
||||
# cpucount+1 should be enough to be sure that the spawned process will not wait after the results
|
||||
# collection made by the main process.
|
||||
try:
|
||||
RESULTS_QUEUE_LIMIT = multiprocessing.cpu_count() + 1
|
||||
except Exception:
|
||||
# I had an IOError on app launch once. It seems to be a freak occurrence. In any case, we want
|
||||
# the app to launch, so let's just put an arbitrary value.
|
||||
logging.warning("Had problems to determine cpu count on launch.")
|
||||
RESULTS_QUEUE_LIMIT = 8
|
||||
|
||||
def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
|
||||
# The MemoryError handlers in there use logging without first caring about whether or not
|
||||
# there is enough memory left to carry on the operation because it is assumed that the
|
||||
# MemoryError happens when trying to read an image file, which is freed from memory by the
|
||||
# time that MemoryError is raised.
|
||||
cache = Cache(cache_path)
|
||||
cache.purge_outdated()
|
||||
prepared = [] # only pictures for which there was no error getting blocks
|
||||
try:
|
||||
for picture in j.iter_with_progress(pictures, tr("Analyzed %d/%d pictures")):
|
||||
if not picture.path:
|
||||
# XXX Find the root cause of this. I've received reports of crashes where we had
|
||||
# "Analyzing picture at " (without a path) in the debug log. It was an iPhoto scan.
|
||||
# For now, I'm simply working around the crash by ignoring those, but it would be
|
||||
# interesting to know exactly why this happens. I'm suspecting a malformed
|
||||
# entry in iPhoto library.
|
||||
logging.warning("We have a picture with a null path here")
|
||||
continue
|
||||
picture.unicode_path = str(picture.path)
|
||||
logging.debug("Analyzing picture at %s", picture.unicode_path)
|
||||
if with_dimensions:
|
||||
picture.dimensions # pre-read dimensions
|
||||
try:
|
||||
if picture.unicode_path not in cache:
|
||||
blocks = picture.get_blocks(BLOCK_COUNT_PER_SIDE)
|
||||
cache[picture.unicode_path] = blocks
|
||||
prepared.append(picture)
|
||||
except (IOError, ValueError) as e:
|
||||
logging.warning(str(e))
|
||||
except MemoryError:
|
||||
logging.warning("Ran out of memory while reading %s of size %d", picture.unicode_path, picture.size)
|
||||
if picture.size < 10 * 1024 * 1024: # We're really running out of memory
|
||||
raise
|
||||
except MemoryError:
|
||||
logging.warning('Ran out of memory while preparing pictures')
|
||||
cache.close()
|
||||
return prepared
|
||||
|
||||
def get_chunks(pictures):
|
||||
min_chunk_count = multiprocessing.cpu_count() * 2 # have enough chunks to feed all subprocesses
|
||||
chunk_count = len(pictures) // DEFAULT_CHUNK_SIZE
|
||||
chunk_count = max(min_chunk_count, chunk_count)
|
||||
chunk_size = (len(pictures) // chunk_count) + 1
|
||||
chunk_size = max(MIN_CHUNK_SIZE, chunk_size)
|
||||
logging.info(
|
||||
"Creating %d chunks with a chunk size of %d for %d pictures", chunk_count,
|
||||
chunk_size, len(pictures)
|
||||
)
|
||||
chunks = [pictures[i:i+chunk_size] for i in range(0, len(pictures), chunk_size)]
|
||||
return chunks
|
||||
|
||||
def get_match(first, second, percentage):
|
||||
if percentage < 0:
|
||||
percentage = 0
|
||||
return Match(first, second, percentage)
|
||||
|
||||
def async_compare(ref_ids, other_ids, dbname, threshold, picinfo):
|
||||
# The list of ids in ref_ids have to be compared to the list of ids in other_ids. other_ids
|
||||
# can be None. In this case, ref_ids has to be compared with itself
|
||||
# picinfo is a dictionary {pic_id: (dimensions, is_ref)}
|
||||
cache = Cache(dbname)
|
||||
limit = 100 - threshold
|
||||
ref_pairs = list(cache.get_multiple(ref_ids))
|
||||
if other_ids is not None:
|
||||
other_pairs = list(cache.get_multiple(other_ids))
|
||||
comparisons_to_do = [(r, o) for r in ref_pairs for o in other_pairs]
|
||||
else:
|
||||
comparisons_to_do = list(combinations(ref_pairs, 2))
|
||||
results = []
|
||||
for (ref_id, ref_blocks), (other_id, other_blocks) in comparisons_to_do:
|
||||
ref_dimensions, ref_is_ref = picinfo[ref_id]
|
||||
other_dimensions, other_is_ref = picinfo[other_id]
|
||||
if ref_is_ref and other_is_ref:
|
||||
continue
|
||||
if ref_dimensions != other_dimensions:
|
||||
continue
|
||||
try:
|
||||
diff = avgdiff(ref_blocks, other_blocks, limit, MIN_ITERATIONS)
|
||||
percentage = 100 - diff
|
||||
except (DifferentBlockCountError, NoBlocksError):
|
||||
percentage = 0
|
||||
if percentage >= threshold:
|
||||
results.append((ref_id, other_id, percentage))
|
||||
cache.close()
|
||||
return results
|
||||
|
||||
def getmatches(pictures, cache_path, threshold=75, match_scaled=False, j=job.nulljob):
|
||||
def get_picinfo(p):
|
||||
if match_scaled:
|
||||
return (None, p.is_ref)
|
||||
else:
|
||||
return (p.dimensions, p.is_ref)
|
||||
|
||||
def collect_results(collect_all=False):
|
||||
# collect results and wait until the queue is small enough to accomodate a new results.
|
||||
nonlocal async_results, matches, comparison_count, comparisons_to_do
|
||||
limit = 0 if collect_all else RESULTS_QUEUE_LIMIT
|
||||
while len(async_results) > limit:
|
||||
ready, working = extract(lambda r: r.ready(), async_results)
|
||||
for result in ready:
|
||||
matches += result.get()
|
||||
async_results.remove(result)
|
||||
comparison_count += 1
|
||||
# About the NOQA below: I think there's a bug in pyflakes. To investigate...
|
||||
progress_msg = tr("Performed %d/%d chunk matches") % (comparison_count, len(comparisons_to_do)) # NOQA
|
||||
j.set_progress(comparison_count, progress_msg)
|
||||
|
||||
j = j.start_subjob([3, 7])
|
||||
pictures = prepare_pictures(pictures, cache_path, with_dimensions=not match_scaled, j=j)
|
||||
j = j.start_subjob([9, 1], tr("Preparing for matching"))
|
||||
cache = Cache(cache_path)
|
||||
id2picture = {}
|
||||
for picture in pictures:
|
||||
try:
|
||||
picture.cache_id = cache.get_id(picture.unicode_path)
|
||||
id2picture[picture.cache_id] = picture
|
||||
except ValueError:
|
||||
pass
|
||||
cache.close()
|
||||
pictures = [p for p in pictures if hasattr(p, 'cache_id')]
|
||||
pool = multiprocessing.Pool()
|
||||
async_results = []
|
||||
matches = []
|
||||
chunks = get_chunks(pictures)
|
||||
# We add a None element at the end of the chunk list because each chunk has to be compared
|
||||
# with itself. Thus, each chunk will show up as a ref_chunk having other_chunk set to None once.
|
||||
comparisons_to_do = list(combinations(chunks + [None], 2))
|
||||
comparison_count = 0
|
||||
j.start_job(len(comparisons_to_do))
|
||||
try:
|
||||
for ref_chunk, other_chunk in comparisons_to_do:
|
||||
picinfo = {p.cache_id: get_picinfo(p) for p in ref_chunk}
|
||||
ref_ids = [p.cache_id for p in ref_chunk]
|
||||
if other_chunk is not None:
|
||||
other_ids = [p.cache_id for p in other_chunk]
|
||||
picinfo.update({p.cache_id: get_picinfo(p) for p in other_chunk})
|
||||
else:
|
||||
other_ids = None
|
||||
args = (ref_ids, other_ids, cache_path, threshold, picinfo)
|
||||
async_results.append(pool.apply_async(async_compare, args))
|
||||
collect_results()
|
||||
collect_results(collect_all=True)
|
||||
except MemoryError:
|
||||
# Rare, but possible, even in 64bit situations (ref #264). What do we do now? We free us
|
||||
# some wiggle room, log about the incident, and stop matching right here. We then process
|
||||
# the matches we have. The rest of the process doesn't allocate much and we should be
|
||||
# alright.
|
||||
del comparisons_to_do, chunks, pictures # some wiggle room for the next statements
|
||||
logging.warning("Ran out of memory when scanning! We had %d matches.", len(matches))
|
||||
del matches[-len(matches)//3:] # some wiggle room to ensure we don't run out of memory again.
|
||||
pool.close()
|
||||
result = []
|
||||
myiter = j.iter_with_progress(
|
||||
iterconsume(matches, reverse=False),
|
||||
tr("Verified %d/%d matches"),
|
||||
every=10,
|
||||
count=len(matches),
|
||||
)
|
||||
for ref_id, other_id, percentage in myiter:
|
||||
ref = id2picture[ref_id]
|
||||
other = id2picture[other_id]
|
||||
if percentage == 100 and ref.md5 != other.md5:
|
||||
percentage = 99
|
||||
if percentage >= threshold:
|
||||
ref.dimensions # pre-read dimensions for display in results
|
||||
other.dimensions
|
||||
result.append(get_match(ref, other, percentage))
|
||||
return result
|
||||
|
||||
multiprocessing.freeze_support()
|
||||
|
||||
31
core/pe/matchexif.py
Normal file
31
core/pe/matchexif.py
Normal file
@@ -0,0 +1,31 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2011-04-20
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from collections import defaultdict
|
||||
from itertools import combinations
|
||||
|
||||
from hscommon.trans import tr
|
||||
|
||||
from core.engine import Match
|
||||
|
||||
def getmatches(files, match_scaled, j):
|
||||
timestamp2pic = defaultdict(set)
|
||||
for picture in j.iter_with_progress(files, tr("Read EXIF of %d/%d pictures")):
|
||||
timestamp = picture.exif_timestamp
|
||||
if timestamp:
|
||||
timestamp2pic[timestamp].add(picture)
|
||||
if '0000:00:00 00:00:00' in timestamp2pic: # very likely false matches
|
||||
del timestamp2pic['0000:00:00 00:00:00']
|
||||
matches = []
|
||||
for pictures in timestamp2pic.values():
|
||||
for p1, p2 in combinations(pictures, 2):
|
||||
if (not match_scaled) and (p1.dimensions != p2.dimensions):
|
||||
continue
|
||||
matches.append(Match(p1, p2, 100))
|
||||
return matches
|
||||
|
||||
253
core/pe/modules/block.c
Normal file
253
core/pe/modules/block.c
Normal file
@@ -0,0 +1,253 @@
|
||||
/* Created By: Virgil Dupras
|
||||
* Created On: 2010-01-30
|
||||
* Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
|
||||
*
|
||||
* This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
||||
* which should be included with this package. The terms are also available at
|
||||
* http://www.hardcoded.net/licenses/bsd_license
|
||||
*/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/* avgdiff/maxdiff has been called with empty lists */
|
||||
static PyObject *NoBlocksError;
|
||||
/* avgdiff/maxdiff has been called with 2 block lists of different size. */
|
||||
static PyObject *DifferentBlockCountError;
|
||||
|
||||
/* Returns a 3 sized tuple containing the mean color of 'image'.
|
||||
* image: a PIL image or crop.
|
||||
*/
|
||||
static PyObject* getblock(PyObject *image)
|
||||
{
|
||||
int i, totr, totg, totb;
|
||||
Py_ssize_t pixel_count;
|
||||
PyObject *ppixels;
|
||||
|
||||
totr = totg = totb = 0;
|
||||
ppixels = PyObject_CallMethod(image, "getdata", NULL);
|
||||
if (ppixels == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pixel_count = PySequence_Length(ppixels);
|
||||
for (i=0; i<pixel_count; i++) {
|
||||
PyObject *ppixel, *pr, *pg, *pb;
|
||||
int r, g, b;
|
||||
|
||||
ppixel = PySequence_ITEM(ppixels, i);
|
||||
pr = PySequence_ITEM(ppixel, 0);
|
||||
pg = PySequence_ITEM(ppixel, 1);
|
||||
pb = PySequence_ITEM(ppixel, 2);
|
||||
Py_DECREF(ppixel);
|
||||
r = PyLong_AsLong(pr);
|
||||
g = PyLong_AsLong(pg);
|
||||
b = PyLong_AsLong(pb);
|
||||
Py_DECREF(pr);
|
||||
Py_DECREF(pg);
|
||||
Py_DECREF(pb);
|
||||
|
||||
totr += r;
|
||||
totg += g;
|
||||
totb += b;
|
||||
}
|
||||
|
||||
Py_DECREF(ppixels);
|
||||
|
||||
if (pixel_count) {
|
||||
totr /= pixel_count;
|
||||
totg /= pixel_count;
|
||||
totb /= pixel_count;
|
||||
}
|
||||
|
||||
return inttuple(3, totr, totg, totb);
|
||||
}
|
||||
|
||||
/* Returns the difference between the first block and the second.
|
||||
* It returns an absolute sum of the 3 differences (RGB).
|
||||
*/
|
||||
static int diff(PyObject *first, PyObject *second)
|
||||
{
|
||||
int r1, g1, b1, r2, b2, g2;
|
||||
PyObject *pr, *pg, *pb;
|
||||
pr = PySequence_ITEM(first, 0);
|
||||
pg = PySequence_ITEM(first, 1);
|
||||
pb = PySequence_ITEM(first, 2);
|
||||
r1 = PyLong_AsLong(pr);
|
||||
g1 = PyLong_AsLong(pg);
|
||||
b1 = PyLong_AsLong(pb);
|
||||
Py_DECREF(pr);
|
||||
Py_DECREF(pg);
|
||||
Py_DECREF(pb);
|
||||
|
||||
pr = PySequence_ITEM(second, 0);
|
||||
pg = PySequence_ITEM(second, 1);
|
||||
pb = PySequence_ITEM(second, 2);
|
||||
r2 = PyLong_AsLong(pr);
|
||||
g2 = PyLong_AsLong(pg);
|
||||
b2 = PyLong_AsLong(pb);
|
||||
Py_DECREF(pr);
|
||||
Py_DECREF(pg);
|
||||
Py_DECREF(pb);
|
||||
|
||||
return abs(r1 - r2) + abs(g1 - g2) + abs(b1 - b2);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(block_getblocks2_doc,
|
||||
"Returns a list of blocks (3 sized tuples).\n\
|
||||
\n\
|
||||
image: A PIL image to base the blocks on.\n\
|
||||
block_count_per_side: This integer determine the number of blocks the function will return.\n\
|
||||
If it is 10, for example, 100 blocks will be returns (10 width, 10 height). The blocks will not\n\
|
||||
necessarely cover square areas. The area covered by each block will be proportional to the image\n\
|
||||
itself.\n");
|
||||
|
||||
static PyObject* block_getblocks2(PyObject *self, PyObject *args)
|
||||
{
|
||||
int block_count_per_side, width, height, block_width, block_height, ih;
|
||||
PyObject *image;
|
||||
PyObject *pimage_size, *pwidth, *pheight;
|
||||
PyObject *result;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "Oi", &image, &block_count_per_side)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pimage_size = PyObject_GetAttrString(image, "size");
|
||||
pwidth = PySequence_ITEM(pimage_size, 0);
|
||||
pheight = PySequence_ITEM(pimage_size, 1);
|
||||
width = PyLong_AsLong(pwidth);
|
||||
height = PyLong_AsLong(pheight);
|
||||
Py_DECREF(pimage_size);
|
||||
Py_DECREF(pwidth);
|
||||
Py_DECREF(pheight);
|
||||
|
||||
if (!(width && height)) {
|
||||
return PyList_New(0);
|
||||
}
|
||||
|
||||
block_width = max(width / block_count_per_side, 1);
|
||||
block_height = max(height / block_count_per_side, 1);
|
||||
|
||||
result = PyList_New(block_count_per_side * block_count_per_side);
|
||||
if (result == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (ih=0; ih<block_count_per_side; ih++) {
|
||||
int top, bottom, iw;
|
||||
top = min(ih*block_height, height-block_height);
|
||||
bottom = top + block_height;
|
||||
for (iw=0; iw<block_count_per_side; iw++) {
|
||||
int left, right;
|
||||
PyObject *pbox;
|
||||
PyObject *pmethodname;
|
||||
PyObject *pcrop;
|
||||
PyObject *pblock;
|
||||
|
||||
left = min(iw*block_width, width-block_width);
|
||||
right = left + block_width;
|
||||
pbox = inttuple(4, left, top, right, bottom);
|
||||
pmethodname = PyUnicode_FromString("crop");
|
||||
pcrop = PyObject_CallMethodObjArgs(image, pmethodname, pbox, NULL);
|
||||
Py_DECREF(pmethodname);
|
||||
Py_DECREF(pbox);
|
||||
if (pcrop == NULL) {
|
||||
Py_DECREF(result);
|
||||
return NULL;
|
||||
}
|
||||
pblock = getblock(pcrop);
|
||||
Py_DECREF(pcrop);
|
||||
if (pblock == NULL) {
|
||||
Py_DECREF(result);
|
||||
return NULL;
|
||||
}
|
||||
PyList_SET_ITEM(result, ih*block_count_per_side+iw, pblock);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(block_avgdiff_doc,
|
||||
"Returns the average diff between first blocks and seconds.\n\
|
||||
\n\
|
||||
If the result surpasses limit, limit + 1 is returned, except if less than min_iterations\n\
|
||||
iterations have been made in the blocks.\n");
|
||||
|
||||
static PyObject* block_avgdiff(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *first, *second;
|
||||
int limit, min_iterations;
|
||||
Py_ssize_t count;
|
||||
int sum, i, result;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "OOii", &first, &second, &limit, &min_iterations)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
count = PySequence_Length(first);
|
||||
if (count != PySequence_Length(second)) {
|
||||
PyErr_SetString(DifferentBlockCountError, "");
|
||||
return NULL;
|
||||
}
|
||||
if (!count) {
|
||||
PyErr_SetString(NoBlocksError, "");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
sum = 0;
|
||||
for (i=0; i<count; i++) {
|
||||
int iteration_count;
|
||||
PyObject *item1, *item2;
|
||||
|
||||
iteration_count = i + 1;
|
||||
item1 = PySequence_ITEM(first, i);
|
||||
item2 = PySequence_ITEM(second, i);
|
||||
sum += diff(item1, item2);
|
||||
Py_DECREF(item1);
|
||||
Py_DECREF(item2);
|
||||
if ((sum > limit*iteration_count) && (iteration_count >= min_iterations)) {
|
||||
return PyLong_FromLong(limit + 1);
|
||||
}
|
||||
}
|
||||
|
||||
result = sum / count;
|
||||
if (!result && sum) {
|
||||
result = 1;
|
||||
}
|
||||
return PyLong_FromLong(result);
|
||||
}
|
||||
|
||||
static PyMethodDef BlockMethods[] = {
|
||||
{"getblocks2", block_getblocks2, METH_VARARGS, block_getblocks2_doc},
|
||||
{"avgdiff", block_avgdiff, METH_VARARGS, block_avgdiff_doc},
|
||||
{NULL, NULL, 0, NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
static struct PyModuleDef BlockDef = {
|
||||
PyModuleDef_HEAD_INIT,
|
||||
"_block",
|
||||
NULL,
|
||||
-1,
|
||||
BlockMethods,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL
|
||||
};
|
||||
|
||||
PyObject *
|
||||
PyInit__block(void)
|
||||
{
|
||||
PyObject *m = PyModule_Create(&BlockDef);
|
||||
if (m == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
NoBlocksError = PyErr_NewException("_block.NoBlocksError", NULL, NULL);
|
||||
PyModule_AddObject(m, "NoBlocksError", NoBlocksError);
|
||||
DifferentBlockCountError = PyErr_NewException("_block.DifferentBlockCountError", NULL, NULL);
|
||||
PyModule_AddObject(m, "DifferentBlockCountError", DifferentBlockCountError);
|
||||
|
||||
return m;
|
||||
}
|
||||
303
core/pe/modules/block_osx.m
Normal file
303
core/pe/modules/block_osx.m
Normal file
@@ -0,0 +1,303 @@
|
||||
/* Created By: Virgil Dupras
|
||||
* Created On: 2010-02-04
|
||||
* Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
*
|
||||
* This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
* which should be included with this package. The terms are also available at
|
||||
* http://www.gnu.org/licenses/gpl-3.0.html
|
||||
**/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
#define RADIANS( degrees ) ( degrees * M_PI / 180 )
|
||||
|
||||
static CFStringRef
|
||||
pystring2cfstring(PyObject *pystring)
|
||||
{
|
||||
PyObject *encoded;
|
||||
UInt8 *s;
|
||||
CFIndex size;
|
||||
CFStringRef result;
|
||||
|
||||
if (PyUnicode_Check(pystring)) {
|
||||
encoded = PyUnicode_AsUTF8String(pystring);
|
||||
if (encoded == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
encoded = pystring;
|
||||
Py_INCREF(encoded);
|
||||
}
|
||||
|
||||
s = (UInt8*)PyBytes_AS_STRING(encoded);
|
||||
size = PyBytes_GET_SIZE(encoded);
|
||||
result = CFStringCreateWithBytes(NULL, s, size, kCFStringEncodingUTF8, FALSE);
|
||||
Py_DECREF(encoded);
|
||||
return result;
|
||||
}
|
||||
|
||||
static PyObject* block_osx_get_image_size(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *path;
|
||||
CFStringRef image_path;
|
||||
CFURLRef image_url;
|
||||
CGImageSourceRef source;
|
||||
CGImageRef image;
|
||||
long width, height;
|
||||
PyObject *pwidth, *pheight;
|
||||
PyObject *result;
|
||||
|
||||
width = 0;
|
||||
height = 0;
|
||||
if (!PyArg_ParseTuple(args, "O", &path)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
image_path = pystring2cfstring(path);
|
||||
if (image_path == NULL) {
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
image_url = CFURLCreateWithFileSystemPath(NULL, image_path, kCFURLPOSIXPathStyle, FALSE);
|
||||
CFRelease(image_path);
|
||||
|
||||
source = CGImageSourceCreateWithURL(image_url, NULL);
|
||||
CFRelease(image_url);
|
||||
if (source != NULL) {
|
||||
image = CGImageSourceCreateImageAtIndex(source, 0, NULL);
|
||||
if (image != NULL) {
|
||||
width = CGImageGetWidth(image);
|
||||
height = CGImageGetHeight(image);
|
||||
CGImageRelease(image);
|
||||
}
|
||||
CFRelease(source);
|
||||
}
|
||||
|
||||
pwidth = PyLong_FromLong(width);
|
||||
if (pwidth == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
pheight = PyLong_FromLong(height);
|
||||
if (pheight == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
result = PyTuple_Pack(2, pwidth, pheight);
|
||||
Py_DECREF(pwidth);
|
||||
Py_DECREF(pheight);
|
||||
return result;
|
||||
}
|
||||
|
||||
static CGContextRef
|
||||
MyCreateBitmapContext(int width, int height)
|
||||
{
|
||||
CGContextRef context = NULL;
|
||||
CGColorSpaceRef colorSpace;
|
||||
void *bitmapData;
|
||||
int bitmapByteCount;
|
||||
int bitmapBytesPerRow;
|
||||
|
||||
bitmapBytesPerRow = (width * 4);
|
||||
bitmapByteCount = (bitmapBytesPerRow * height);
|
||||
|
||||
colorSpace = CGColorSpaceCreateWithName(kCGColorSpaceGenericRGB);
|
||||
|
||||
// calloc() must be used to allocate bitmapData here because the buffer has to be zeroed.
|
||||
// If it's not zeroes, when images with transparency are drawn in the context, this buffer
|
||||
// will stay with undefined pixels, which means that two pictures with the same pixels will
|
||||
// most likely have different blocks (which is not supposed to happen).
|
||||
bitmapData = calloc(bitmapByteCount, 1);
|
||||
if (bitmapData == NULL) {
|
||||
fprintf(stderr, "Memory not allocated!");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
context = CGBitmapContextCreate(bitmapData, width, height, 8, bitmapBytesPerRow, colorSpace,
|
||||
(CGBitmapInfo)kCGImageAlphaNoneSkipLast);
|
||||
if (context== NULL) {
|
||||
free(bitmapData);
|
||||
fprintf(stderr, "Context not created!");
|
||||
return NULL;
|
||||
}
|
||||
CGColorSpaceRelease(colorSpace);
|
||||
return context;
|
||||
}
|
||||
|
||||
static PyObject* getblock(unsigned char *imageData, int imageWidth, int imageHeight, int boxX, int boxY, int boxW, int boxH)
|
||||
{
|
||||
int i,j, totalR, totalG, totalB;
|
||||
|
||||
totalR = totalG = totalB = 0;
|
||||
for(i=boxY; i<boxY+boxH; i++) {
|
||||
for(j=boxX; j<boxX+boxW; j++) {
|
||||
int offset = (i * imageWidth * 4) + (j * 4);
|
||||
totalR += *(imageData + offset);
|
||||
totalG += *(imageData + offset + 1);
|
||||
totalB += *(imageData + offset + 2);
|
||||
}
|
||||
}
|
||||
int pixelCount = boxH * boxW;
|
||||
totalR /= pixelCount;
|
||||
totalG /= pixelCount;
|
||||
totalB /= pixelCount;
|
||||
|
||||
return inttuple(3, totalR, totalG, totalB);
|
||||
}
|
||||
|
||||
static PyObject* block_osx_getblocks(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *path, *result;
|
||||
CFStringRef image_path;
|
||||
CFURLRef image_url;
|
||||
CGImageSourceRef source;
|
||||
CGImageRef image;
|
||||
size_t width, height, image_width, image_height;
|
||||
int block_count, block_width, block_height, orientation, i;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "Oii", &path, &block_count, &orientation)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (PySequence_Length(path) == 0) {
|
||||
PyErr_SetString(PyExc_ValueError, "empty path");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ((orientation > 8) || (orientation < 0)) {
|
||||
orientation = 0; // simplifies checks later since we can only have values in 0-8
|
||||
}
|
||||
|
||||
image_path = pystring2cfstring(path);
|
||||
if (image_path == NULL) {
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
image_url = CFURLCreateWithFileSystemPath(NULL, image_path, kCFURLPOSIXPathStyle, FALSE);
|
||||
CFRelease(image_path);
|
||||
|
||||
source = CGImageSourceCreateWithURL(image_url, NULL);
|
||||
CFRelease(image_url);
|
||||
if (source == NULL) {
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
|
||||
image = CGImageSourceCreateImageAtIndex(source, 0, NULL);
|
||||
if (image == NULL) {
|
||||
CFRelease(source);
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
|
||||
|
||||
width = image_width = CGImageGetWidth(image);
|
||||
height = image_height = CGImageGetHeight(image);
|
||||
if (orientation >= 5) {
|
||||
// orientations 5-8 rotate the photo sideways, so we have to swap width and height
|
||||
width = image_height;
|
||||
height = image_width;
|
||||
}
|
||||
|
||||
CGContextRef context = MyCreateBitmapContext(width, height);
|
||||
|
||||
if (orientation == 2) {
|
||||
// Flip X
|
||||
CGContextTranslateCTM(context, width, 0);
|
||||
CGContextScaleCTM(context, -1, 1);
|
||||
}
|
||||
else if (orientation == 3) {
|
||||
// Rot 180
|
||||
CGContextTranslateCTM(context, width, height);
|
||||
CGContextRotateCTM(context, RADIANS(180));
|
||||
}
|
||||
else if (orientation == 4) {
|
||||
// Flip Y
|
||||
CGContextTranslateCTM(context, 0, height);
|
||||
CGContextScaleCTM(context, 1, -1);
|
||||
}
|
||||
else if (orientation == 5) {
|
||||
// Flip X + Rot CW 90
|
||||
CGContextTranslateCTM(context, width, 0);
|
||||
CGContextScaleCTM(context, -1, 1);
|
||||
CGContextTranslateCTM(context, 0, height);
|
||||
CGContextRotateCTM(context, RADIANS(-90));
|
||||
}
|
||||
else if (orientation == 6) {
|
||||
// Rot CW 90
|
||||
CGContextTranslateCTM(context, 0, height);
|
||||
CGContextRotateCTM(context, RADIANS(-90));
|
||||
}
|
||||
else if (orientation == 7) {
|
||||
// Rot CCW 90 + Flip X
|
||||
CGContextTranslateCTM(context, width, 0);
|
||||
CGContextScaleCTM(context, -1, 1);
|
||||
CGContextTranslateCTM(context, width, 0);
|
||||
CGContextRotateCTM(context, RADIANS(90));
|
||||
}
|
||||
else if (orientation == 8) {
|
||||
// Rot CCW 90
|
||||
CGContextTranslateCTM(context, width, 0);
|
||||
CGContextRotateCTM(context, RADIANS(90));
|
||||
}
|
||||
CGRect myBoundingBox = CGRectMake(0, 0, image_width, image_height);
|
||||
CGContextDrawImage(context, myBoundingBox, image);
|
||||
unsigned char *bitmapData = CGBitmapContextGetData(context);
|
||||
CGContextRelease(context);
|
||||
|
||||
CGImageRelease(image);
|
||||
CFRelease(source);
|
||||
if (bitmapData == NULL) {
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
|
||||
block_width = max(width/block_count, 1);
|
||||
block_height = max(height/block_count, 1);
|
||||
|
||||
result = PyList_New(block_count * block_count);
|
||||
if (result == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for(i=0; i<block_count; i++) {
|
||||
int j, top;
|
||||
top = min(i*block_height, height-block_height);
|
||||
for(j=0; j<block_count; j++) {
|
||||
int left;
|
||||
left = min(j*block_width, width-block_width);
|
||||
PyObject *block = getblock(bitmapData, width, height, left, top, block_width, block_height);
|
||||
if (block == NULL) {
|
||||
Py_DECREF(result);
|
||||
return NULL;
|
||||
}
|
||||
PyList_SET_ITEM(result, i*block_count+j, block);
|
||||
}
|
||||
}
|
||||
|
||||
free(bitmapData);
|
||||
return result;
|
||||
}
|
||||
|
||||
static PyMethodDef BlockOsxMethods[] = {
|
||||
{"get_image_size", block_osx_get_image_size, METH_VARARGS, ""},
|
||||
{"getblocks", block_osx_getblocks, METH_VARARGS, ""},
|
||||
{NULL, NULL, 0, NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
static struct PyModuleDef BlockOsxDef = {
|
||||
PyModuleDef_HEAD_INIT,
|
||||
"_block_osx",
|
||||
NULL,
|
||||
-1,
|
||||
BlockOsxMethods,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL
|
||||
};
|
||||
|
||||
PyObject *
|
||||
PyInit__block_osx(void)
|
||||
{
|
||||
PyObject *m = PyModule_Create(&BlockOsxDef);
|
||||
if (m == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
return m;
|
||||
}
|
||||
95
core/pe/modules/cache.c
Normal file
95
core/pe/modules/cache.c
Normal file
@@ -0,0 +1,95 @@
|
||||
/* Created By: Virgil Dupras
|
||||
* Created On: 2010-01-30
|
||||
* Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
|
||||
*
|
||||
* This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
||||
* which should be included with this package. The terms are also available at
|
||||
* http://www.hardcoded.net/licenses/bsd_license
|
||||
*/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/* I know that there strtol out there, but it requires a pointer to
|
||||
* a char, which would in turn require me to buffer my chars around,
|
||||
* making the whole process slower.
|
||||
*/
|
||||
static long
|
||||
xchar_to_long(char c)
|
||||
{
|
||||
if ((c >= 48) && (c <= 57)) { /* 0-9 */
|
||||
return c - 48;
|
||||
}
|
||||
else if ((c >= 65) && (c <= 70)) { /* A-F */
|
||||
return c - 55;
|
||||
}
|
||||
else if ((c >= 97) && (c <= 102)) { /* a-f */
|
||||
return c - 87;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
cache_string_to_colors(PyObject *self, PyObject *args)
|
||||
{
|
||||
char *s;
|
||||
Py_ssize_t char_count, color_count, i;
|
||||
PyObject *result;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "s#", &s, &char_count)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
color_count = (char_count / 6);
|
||||
result = PyList_New(color_count);
|
||||
if (result == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (i=0; i<color_count; i++) {
|
||||
long r, g, b;
|
||||
Py_ssize_t ci;
|
||||
PyObject *color_tuple;
|
||||
|
||||
ci = i * 6;
|
||||
r = (xchar_to_long(s[ci]) << 4) + xchar_to_long(s[ci+1]);
|
||||
g = (xchar_to_long(s[ci+2]) << 4) + xchar_to_long(s[ci+3]);
|
||||
b = (xchar_to_long(s[ci+4]) << 4) + xchar_to_long(s[ci+5]);
|
||||
|
||||
color_tuple = inttuple(3, r, g, b);
|
||||
if (color_tuple == NULL) {
|
||||
Py_DECREF(result);
|
||||
return NULL;
|
||||
}
|
||||
PyList_SET_ITEM(result, i, color_tuple);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static PyMethodDef CacheMethods[] = {
|
||||
{"string_to_colors", cache_string_to_colors, METH_VARARGS,
|
||||
"Transform the string 's' in a list of 3 sized tuples."},
|
||||
{NULL, NULL, 0, NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
static struct PyModuleDef CacheDef = {
|
||||
PyModuleDef_HEAD_INIT,
|
||||
"_cache",
|
||||
NULL,
|
||||
-1,
|
||||
CacheMethods,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL
|
||||
};
|
||||
|
||||
PyObject *
|
||||
PyInit__cache(void)
|
||||
{
|
||||
PyObject *m = PyModule_Create(&CacheDef);
|
||||
if (m == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
return m;
|
||||
}
|
||||
45
core/pe/modules/common.c
Normal file
45
core/pe/modules/common.c
Normal file
@@ -0,0 +1,45 @@
|
||||
/* Created By: Virgil Dupras
|
||||
* Created On: 2010-02-04
|
||||
* Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
|
||||
*
|
||||
* This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
||||
* which should be included with this package. The terms are also available at
|
||||
* http://www.hardcoded.net/licenses/bsd_license
|
||||
*/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#ifndef _MSC_VER
|
||||
int max(int a, int b)
|
||||
{
|
||||
return b > a ? b : a;
|
||||
}
|
||||
|
||||
int min(int a, int b)
|
||||
{
|
||||
return b < a ? b : a;
|
||||
}
|
||||
#endif
|
||||
|
||||
PyObject* inttuple(int n, ...)
|
||||
{
|
||||
int i;
|
||||
PyObject *pnumber;
|
||||
PyObject *result;
|
||||
va_list numbers;
|
||||
|
||||
va_start(numbers, n);
|
||||
result = PyTuple_New(n);
|
||||
|
||||
for (i=0; i<n; i++) {
|
||||
pnumber = PyLong_FromLong(va_arg(numbers, long));
|
||||
if (pnumber == NULL) {
|
||||
Py_DECREF(result);
|
||||
return NULL;
|
||||
}
|
||||
PyTuple_SET_ITEM(result, i, pnumber);
|
||||
}
|
||||
|
||||
va_end(numbers);
|
||||
return result;
|
||||
}
|
||||
20
core/pe/modules/common.h
Normal file
20
core/pe/modules/common.h
Normal file
@@ -0,0 +1,20 @@
|
||||
/* Created By: Virgil Dupras
|
||||
* Created On: 2010-02-04
|
||||
* Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
*
|
||||
* This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
* which should be included with this package. The terms are also available at
|
||||
* http://www.gnu.org/licenses/gpl-3.0.html
|
||||
*/
|
||||
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include "Python.h"
|
||||
|
||||
/* It seems like MS VC defines min/max already */
|
||||
#ifndef _MSC_VER
|
||||
int max(int a, int b);
|
||||
int min(int a, int b);
|
||||
#endif
|
||||
|
||||
/* Create a tuple out of an array of integers. */
|
||||
PyObject* inttuple(int n, ...);
|
||||
106
core/pe/photo.py
Normal file
106
core/pe/photo.py
Normal file
@@ -0,0 +1,106 @@
|
||||
# Copyright 2016 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
import logging
|
||||
from hscommon.util import get_file_ext, format_size
|
||||
|
||||
from core.util import format_timestamp, format_perc, format_dupe_count
|
||||
from core import fs
|
||||
from . import exif
|
||||
|
||||
# This global value is set by the platform-specific subclasser of the Photo base class
|
||||
PLAT_SPECIFIC_PHOTO_CLASS = None
|
||||
|
||||
def format_dimensions(dimensions):
|
||||
return '%d x %d' % (dimensions[0], dimensions[1])
|
||||
|
||||
def get_delta_dimensions(value, ref_value):
|
||||
return (value[0]-ref_value[0], value[1]-ref_value[1])
|
||||
|
||||
|
||||
class Photo(fs.File):
|
||||
INITIAL_INFO = fs.File.INITIAL_INFO.copy()
|
||||
INITIAL_INFO.update({
|
||||
'dimensions': (0, 0),
|
||||
'exif_timestamp': '',
|
||||
})
|
||||
__slots__ = fs.File.__slots__ + tuple(INITIAL_INFO.keys())
|
||||
|
||||
# These extensions are supported on all platforms
|
||||
HANDLED_EXTS = {'png', 'jpg', 'jpeg', 'gif', 'bmp', 'tiff', 'tif'}
|
||||
|
||||
def _plat_get_dimensions(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
def _plat_get_blocks(self, block_count_per_side, orientation):
|
||||
raise NotImplementedError()
|
||||
|
||||
def _get_orientation(self):
|
||||
if not hasattr(self, '_cached_orientation'):
|
||||
try:
|
||||
with self.path.open('rb') as fp:
|
||||
exifdata = exif.get_fields(fp)
|
||||
# the value is a list (probably one-sized) of ints
|
||||
orientations = exifdata['Orientation']
|
||||
self._cached_orientation = orientations[0]
|
||||
except Exception: # Couldn't read EXIF data, no transforms
|
||||
self._cached_orientation = 0
|
||||
return self._cached_orientation
|
||||
|
||||
def _get_exif_timestamp(self):
|
||||
try:
|
||||
with self.path.open('rb') as fp:
|
||||
exifdata = exif.get_fields(fp)
|
||||
return exifdata['DateTimeOriginal']
|
||||
except Exception:
|
||||
logging.info("Couldn't read EXIF of picture: %s", self.path)
|
||||
return ''
|
||||
|
||||
@classmethod
|
||||
def can_handle(cls, path):
|
||||
return fs.File.can_handle(path) and get_file_ext(path.name) in cls.HANDLED_EXTS
|
||||
|
||||
def get_display_info(self, group, delta):
|
||||
size = self.size
|
||||
mtime = self.mtime
|
||||
dimensions = self.dimensions
|
||||
m = group.get_match_of(self)
|
||||
if m:
|
||||
percentage = m.percentage
|
||||
dupe_count = 0
|
||||
if delta:
|
||||
r = group.ref
|
||||
size -= r.size
|
||||
mtime -= r.mtime
|
||||
dimensions = get_delta_dimensions(dimensions, r.dimensions)
|
||||
else:
|
||||
percentage = group.percentage
|
||||
dupe_count = len(group.dupes)
|
||||
dupe_folder_path = getattr(self, 'display_folder_path', self.folder_path)
|
||||
return {
|
||||
'name': self.name,
|
||||
'folder_path': str(dupe_folder_path),
|
||||
'size': format_size(size, 0, 1, False),
|
||||
'extension': self.extension,
|
||||
'dimensions': format_dimensions(dimensions),
|
||||
'exif_timestamp': self.exif_timestamp,
|
||||
'mtime': format_timestamp(mtime, delta and m),
|
||||
'percentage': format_perc(percentage),
|
||||
'dupe_count': format_dupe_count(dupe_count),
|
||||
}
|
||||
|
||||
def _read_info(self, field):
|
||||
fs.File._read_info(self, field)
|
||||
if field == 'dimensions':
|
||||
self.dimensions = self._plat_get_dimensions()
|
||||
if self._get_orientation() in {5, 6, 7, 8}:
|
||||
self.dimensions = (self.dimensions[1], self.dimensions[0])
|
||||
elif field == 'exif_timestamp':
|
||||
self.exif_timestamp = self._get_exif_timestamp()
|
||||
|
||||
def get_blocks(self, block_count_per_side):
|
||||
return self._plat_get_blocks(block_count_per_side, self._get_orientation())
|
||||
|
||||
31
core/pe/prioritize.py
Normal file
31
core/pe/prioritize.py
Normal file
@@ -0,0 +1,31 @@
|
||||
# Created On: 2011/09/16
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from hscommon.trans import trget
|
||||
|
||||
from core.prioritize import (
|
||||
KindCategory, FolderCategory, FilenameCategory, NumericalCategory,
|
||||
SizeCategory, MtimeCategory
|
||||
)
|
||||
|
||||
coltr = trget('columns')
|
||||
|
||||
class DimensionsCategory(NumericalCategory):
|
||||
NAME = coltr("Dimensions")
|
||||
|
||||
def extract_value(self, dupe):
|
||||
return dupe.dimensions
|
||||
|
||||
def invert_numerical_value(self, value):
|
||||
width, height = value
|
||||
return (-width, -height)
|
||||
|
||||
def all_categories():
|
||||
return [
|
||||
KindCategory, FolderCategory, FilenameCategory, SizeCategory, DimensionsCategory,
|
||||
MtimeCategory
|
||||
]
|
||||
28
core/pe/result_table.py
Normal file
28
core/pe/result_table.py
Normal file
@@ -0,0 +1,28 @@
|
||||
# Created On: 2011-11-27
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from hscommon.gui.column import Column
|
||||
from hscommon.trans import trget
|
||||
|
||||
from core.gui.result_table import ResultTable as ResultTableBase
|
||||
|
||||
coltr = trget('columns')
|
||||
|
||||
class ResultTable(ResultTableBase):
|
||||
COLUMNS = [
|
||||
Column('marked', ''),
|
||||
Column('name', coltr("Filename")),
|
||||
Column('folder_path', coltr("Folder"), optional=True),
|
||||
Column('size', coltr("Size (KB)"), optional=True),
|
||||
Column('extension', coltr("Kind"), visible=False, optional=True),
|
||||
Column('dimensions', coltr("Dimensions"), optional=True),
|
||||
Column('exif_timestamp', coltr("EXIF Timestamp"), visible=False, optional=True),
|
||||
Column('mtime', coltr("Modification"), visible=False, optional=True),
|
||||
Column('percentage', coltr("Match %"), optional=True),
|
||||
Column('dupe_count', coltr("Dupe Count"), visible=False, optional=True),
|
||||
]
|
||||
DELTA_COLUMNS = {'size', 'dimensions', 'mtime'}
|
||||
32
core/pe/scanner.py
Normal file
32
core/pe/scanner.py
Normal file
@@ -0,0 +1,32 @@
|
||||
# Copyright 2016 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from hscommon.trans import tr
|
||||
|
||||
from core.scanner import Scanner, ScanType, ScanOption
|
||||
|
||||
from . import matchblock, matchexif
|
||||
|
||||
class ScannerPE(Scanner):
|
||||
cache_path = None
|
||||
match_scaled = False
|
||||
threshold = 75
|
||||
|
||||
@staticmethod
|
||||
def get_scan_options():
|
||||
return [
|
||||
ScanOption(ScanType.FuzzyBlock, tr("Contents")),
|
||||
ScanOption(ScanType.ExifTimestamp, tr("EXIF Timestamp")),
|
||||
]
|
||||
|
||||
def _getmatches(self, files, j):
|
||||
if self.scan_type == ScanType.FuzzyBlock:
|
||||
return matchblock.getmatches(files, self.cache_path, self.threshold, self.match_scaled, j)
|
||||
elif self.scan_type == ScanType.ExifTimestamp:
|
||||
return matchexif.getmatches(files, self.match_scaled, j)
|
||||
else:
|
||||
raise Exception("Invalid scan type")
|
||||
|
||||
Reference in New Issue
Block a user