mirror of
https://github.com/arsenetar/dupeguru.git
synced 2026-01-22 14:41:39 +00:00
Merge remote-tracking branch 'upstream/master' into colors-bytes
This commit is contained in:
@@ -1,141 +0,0 @@
|
||||
# Copyright 2016 Virgil Dupras
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
import os
|
||||
import os.path as op
|
||||
import shelve
|
||||
import tempfile
|
||||
from collections import namedtuple
|
||||
|
||||
from core.pe.cache import bytes_to_colors, colors_to_bytes
|
||||
|
||||
|
||||
def wrap_path(path):
|
||||
return f"path:{path}"
|
||||
|
||||
|
||||
def unwrap_path(key):
|
||||
return key[5:]
|
||||
|
||||
|
||||
def wrap_id(path):
|
||||
return f"id:{path}"
|
||||
|
||||
|
||||
def unwrap_id(key):
|
||||
return int(key[3:])
|
||||
|
||||
|
||||
CacheRow = namedtuple("CacheRow", "id path blocks mtime")
|
||||
|
||||
|
||||
class ShelveCache:
|
||||
"""A class to cache picture blocks in a shelve backend."""
|
||||
|
||||
def __init__(self, db=None, readonly=False):
|
||||
self.istmp = db is None
|
||||
if self.istmp:
|
||||
self.dtmp = tempfile.mkdtemp()
|
||||
self.ftmp = db = op.join(self.dtmp, "tmpdb")
|
||||
flag = "r" if readonly else "c"
|
||||
self.shelve = shelve.open(db, flag)
|
||||
self.maxid = self._compute_maxid()
|
||||
|
||||
def __contains__(self, key):
|
||||
return wrap_path(key) in self.shelve
|
||||
|
||||
def __delitem__(self, key):
|
||||
row = self.shelve[wrap_path(key)]
|
||||
del self.shelve[wrap_path(key)]
|
||||
del self.shelve[wrap_id(row.id)]
|
||||
|
||||
def __getitem__(self, key):
|
||||
if isinstance(key, int):
|
||||
skey = self.shelve[wrap_id(key)]
|
||||
else:
|
||||
skey = wrap_path(key)
|
||||
return bytes_to_colors(self.shelve[skey].blocks)
|
||||
|
||||
def __iter__(self):
|
||||
return (unwrap_path(k) for k in self.shelve if k.startswith("path:"))
|
||||
|
||||
def __len__(self):
|
||||
return sum(1 for k in self.shelve if k.startswith("path:"))
|
||||
|
||||
def __setitem__(self, path_str, blocks):
|
||||
blocks = colors_to_bytes(blocks)
|
||||
if op.exists(path_str):
|
||||
mtime = int(os.stat(path_str).st_mtime)
|
||||
else:
|
||||
mtime = 0
|
||||
if path_str in self:
|
||||
rowid = self.shelve[wrap_path(path_str)].id
|
||||
else:
|
||||
rowid = self._get_new_id()
|
||||
row = CacheRow(rowid, path_str, blocks, mtime)
|
||||
self.shelve[wrap_path(path_str)] = row
|
||||
self.shelve[wrap_id(rowid)] = wrap_path(path_str)
|
||||
|
||||
def _compute_maxid(self):
|
||||
return max((unwrap_id(k) for k in self.shelve if k.startswith("id:")), default=1)
|
||||
|
||||
def _get_new_id(self):
|
||||
self.maxid += 1
|
||||
return self.maxid
|
||||
|
||||
def clear(self):
|
||||
self.shelve.clear()
|
||||
|
||||
def close(self):
|
||||
if self.shelve is not None:
|
||||
self.shelve.close()
|
||||
if self.istmp:
|
||||
os.remove(self.ftmp)
|
||||
os.rmdir(self.dtmp)
|
||||
self.shelve = None
|
||||
|
||||
def filter(self, func):
|
||||
to_delete = [key for key in self if not func(key)]
|
||||
for key in to_delete:
|
||||
del self[key]
|
||||
|
||||
def get_id(self, path):
|
||||
if path in self:
|
||||
return self.shelve[wrap_path(path)].id
|
||||
else:
|
||||
raise ValueError(path)
|
||||
|
||||
def get_multiple(self, rowids):
|
||||
for rowid in rowids:
|
||||
try:
|
||||
skey = self.shelve[wrap_id(rowid)]
|
||||
except KeyError:
|
||||
continue
|
||||
yield (rowid, bytes_to_colors(self.shelve[skey].blocks))
|
||||
|
||||
def purge_outdated(self):
|
||||
"""Go through the cache and purge outdated records.
|
||||
|
||||
A record is outdated if the picture doesn't exist or if its mtime is greater than the one in
|
||||
the db.
|
||||
"""
|
||||
todelete = []
|
||||
for path in self:
|
||||
row = self.shelve[wrap_path(path)]
|
||||
if row.mtime and op.exists(path):
|
||||
picture_mtime = os.stat(path).st_mtime
|
||||
if int(picture_mtime) <= row.mtime:
|
||||
# not outdated
|
||||
continue
|
||||
todelete.append(path)
|
||||
for path in todelete:
|
||||
try:
|
||||
del self[path]
|
||||
except KeyError:
|
||||
# I have no idea why a KeyError sometimes happen, but it does, as we can see in
|
||||
# #402 and #439. I don't think it hurts to silently ignore the error, so that's
|
||||
# what we do
|
||||
pass
|
||||
@@ -16,6 +16,7 @@ from hscommon.jobprogress import job
|
||||
|
||||
from core.engine import Match
|
||||
from core.pe.block import avgdiff, DifferentBlockCountError, NoBlocksError
|
||||
from core.pe.cache_sqlite import SqliteCache
|
||||
|
||||
# OPTIMIZATION NOTES:
|
||||
# The bottleneck of the matching phase is CPU, which is why we use multiprocessing. However, another
|
||||
@@ -50,14 +51,7 @@ except Exception:
|
||||
|
||||
|
||||
def get_cache(cache_path, readonly=False):
|
||||
if cache_path.endswith("shelve"):
|
||||
from core.pe.cache_shelve import ShelveCache
|
||||
|
||||
return ShelveCache(cache_path, readonly=readonly)
|
||||
else:
|
||||
from core.pe.cache_sqlite import SqliteCache
|
||||
|
||||
return SqliteCache(cache_path, readonly=readonly)
|
||||
return SqliteCache(cache_path, readonly=readonly)
|
||||
|
||||
|
||||
def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
|
||||
|
||||
@@ -245,4 +245,4 @@ PyObject *PyInit__block(void) {
|
||||
PyModule_AddObject(m, "DifferentBlockCountError", DifferentBlockCountError);
|
||||
|
||||
return m;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
* Created On: 2010-02-04
|
||||
* Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
*
|
||||
* This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
* which should be included with this package. The terms are also available at
|
||||
* This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
* which should be included with this package. The terms are also available at
|
||||
* http://www.gnu.org/licenses/gpl-3.0.html
|
||||
**/
|
||||
|
||||
@@ -22,7 +22,7 @@ pystring2cfstring(PyObject *pystring)
|
||||
UInt8 *s;
|
||||
CFIndex size;
|
||||
CFStringRef result;
|
||||
|
||||
|
||||
if (PyUnicode_Check(pystring)) {
|
||||
encoded = PyUnicode_AsUTF8String(pystring);
|
||||
if (encoded == NULL) {
|
||||
@@ -32,7 +32,7 @@ pystring2cfstring(PyObject *pystring)
|
||||
encoded = pystring;
|
||||
Py_INCREF(encoded);
|
||||
}
|
||||
|
||||
|
||||
s = (UInt8*)PyBytes_AS_STRING(encoded);
|
||||
size = PyBytes_GET_SIZE(encoded);
|
||||
result = CFStringCreateWithBytes(NULL, s, size, kCFStringEncodingUTF8, FALSE);
|
||||
@@ -50,20 +50,20 @@ static PyObject* block_osx_get_image_size(PyObject *self, PyObject *args)
|
||||
long width, height;
|
||||
PyObject *pwidth, *pheight;
|
||||
PyObject *result;
|
||||
|
||||
|
||||
width = 0;
|
||||
height = 0;
|
||||
if (!PyArg_ParseTuple(args, "O", &path)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
image_path = pystring2cfstring(path);
|
||||
if (image_path == NULL) {
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
image_url = CFURLCreateWithFileSystemPath(NULL, image_path, kCFURLPOSIXPathStyle, FALSE);
|
||||
CFRelease(image_path);
|
||||
|
||||
|
||||
source = CGImageSourceCreateWithURL(image_url, NULL);
|
||||
CFRelease(image_url);
|
||||
if (source != NULL) {
|
||||
@@ -75,7 +75,7 @@ static PyObject* block_osx_get_image_size(PyObject *self, PyObject *args)
|
||||
}
|
||||
CFRelease(source);
|
||||
}
|
||||
|
||||
|
||||
pwidth = PyLong_FromLong(width);
|
||||
if (pwidth == NULL) {
|
||||
return NULL;
|
||||
@@ -91,19 +91,19 @@ static PyObject* block_osx_get_image_size(PyObject *self, PyObject *args)
|
||||
}
|
||||
|
||||
static CGContextRef
|
||||
MyCreateBitmapContext(int width, int height)
|
||||
MyCreateBitmapContext(int width, int height)
|
||||
{
|
||||
CGContextRef context = NULL;
|
||||
CGColorSpaceRef colorSpace;
|
||||
void *bitmapData;
|
||||
int bitmapByteCount;
|
||||
int bitmapBytesPerRow;
|
||||
|
||||
|
||||
bitmapBytesPerRow = (width * 4);
|
||||
bitmapByteCount = (bitmapBytesPerRow * height);
|
||||
|
||||
|
||||
colorSpace = CGColorSpaceCreateWithName(kCGColorSpaceGenericRGB);
|
||||
|
||||
|
||||
// calloc() must be used to allocate bitmapData here because the buffer has to be zeroed.
|
||||
// If it's not zeroes, when images with transparency are drawn in the context, this buffer
|
||||
// will stay with undefined pixels, which means that two pictures with the same pixels will
|
||||
@@ -113,7 +113,7 @@ MyCreateBitmapContext(int width, int height)
|
||||
fprintf(stderr, "Memory not allocated!");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
context = CGBitmapContextCreate(bitmapData, width, height, 8, bitmapBytesPerRow, colorSpace,
|
||||
(CGBitmapInfo)kCGImageAlphaNoneSkipLast);
|
||||
if (context== NULL) {
|
||||
@@ -128,7 +128,7 @@ MyCreateBitmapContext(int width, int height)
|
||||
static PyObject* getblock(unsigned char *imageData, int imageWidth, int imageHeight, int boxX, int boxY, int boxW, int boxH)
|
||||
{
|
||||
int i,j, totalR, totalG, totalB;
|
||||
|
||||
|
||||
totalR = totalG = totalB = 0;
|
||||
for(i=boxY; i<boxY+boxH; i++) {
|
||||
for(j=boxX; j<boxX+boxW; j++) {
|
||||
@@ -142,7 +142,7 @@ static PyObject* getblock(unsigned char *imageData, int imageWidth, int imageHei
|
||||
totalR /= pixelCount;
|
||||
totalG /= pixelCount;
|
||||
totalB /= pixelCount;
|
||||
|
||||
|
||||
return inttuple(3, totalR, totalG, totalB);
|
||||
}
|
||||
|
||||
@@ -155,27 +155,27 @@ static PyObject* block_osx_getblocks(PyObject *self, PyObject *args)
|
||||
CGImageRef image;
|
||||
size_t width, height, image_width, image_height;
|
||||
int block_count, block_width, block_height, orientation, i;
|
||||
|
||||
|
||||
if (!PyArg_ParseTuple(args, "Oii", &path, &block_count, &orientation)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
if (PySequence_Length(path) == 0) {
|
||||
PyErr_SetString(PyExc_ValueError, "empty path");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
if ((orientation > 8) || (orientation < 0)) {
|
||||
orientation = 0; // simplifies checks later since we can only have values in 0-8
|
||||
}
|
||||
|
||||
|
||||
image_path = pystring2cfstring(path);
|
||||
if (image_path == NULL) {
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
image_url = CFURLCreateWithFileSystemPath(NULL, image_path, kCFURLPOSIXPathStyle, FALSE);
|
||||
CFRelease(image_path);
|
||||
|
||||
|
||||
source = CGImageSourceCreateWithURL(image_url, NULL);
|
||||
CFRelease(image_url);
|
||||
if (source == NULL) {
|
||||
@@ -187,8 +187,8 @@ static PyObject* block_osx_getblocks(PyObject *self, PyObject *args)
|
||||
CFRelease(source);
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
width = image_width = CGImageGetWidth(image);
|
||||
height = image_height = CGImageGetHeight(image);
|
||||
if (orientation >= 5) {
|
||||
@@ -196,9 +196,9 @@ static PyObject* block_osx_getblocks(PyObject *self, PyObject *args)
|
||||
width = image_height;
|
||||
height = image_width;
|
||||
}
|
||||
|
||||
|
||||
CGContextRef context = MyCreateBitmapContext(width, height);
|
||||
|
||||
|
||||
if (orientation == 2) {
|
||||
// Flip X
|
||||
CGContextTranslateCTM(context, width, 0);
|
||||
@@ -207,7 +207,7 @@ static PyObject* block_osx_getblocks(PyObject *self, PyObject *args)
|
||||
else if (orientation == 3) {
|
||||
// Rot 180
|
||||
CGContextTranslateCTM(context, width, height);
|
||||
CGContextRotateCTM(context, RADIANS(180));
|
||||
CGContextRotateCTM(context, RADIANS(180));
|
||||
}
|
||||
else if (orientation == 4) {
|
||||
// Flip Y
|
||||
@@ -242,21 +242,21 @@ static PyObject* block_osx_getblocks(PyObject *self, PyObject *args)
|
||||
CGContextDrawImage(context, myBoundingBox, image);
|
||||
unsigned char *bitmapData = CGBitmapContextGetData(context);
|
||||
CGContextRelease(context);
|
||||
|
||||
|
||||
CGImageRelease(image);
|
||||
CFRelease(source);
|
||||
if (bitmapData == NULL) {
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
|
||||
|
||||
block_width = max(width/block_count, 1);
|
||||
block_height = max(height/block_count, 1);
|
||||
|
||||
|
||||
result = PyList_New(block_count * block_count);
|
||||
if (result == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
for(i=0; i<block_count; i++) {
|
||||
int j, top;
|
||||
top = min(i*block_height, height-block_height);
|
||||
@@ -271,8 +271,8 @@ static PyObject* block_osx_getblocks(PyObject *self, PyObject *args)
|
||||
PyList_SET_ITEM(result, i*block_count+j, block);
|
||||
}
|
||||
}
|
||||
|
||||
free(bitmapData);
|
||||
|
||||
free(bitmapData);
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -302,4 +302,4 @@ PyInit__block_osx(void)
|
||||
return NULL;
|
||||
}
|
||||
return m;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,73 +2,68 @@
|
||||
* Created On: 2010-01-30
|
||||
* Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
|
||||
*
|
||||
* This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
||||
* which should be included with this package. The terms are also available at
|
||||
* http://www.hardcoded.net/licenses/bsd_license
|
||||
* This software is licensed under the "BSD" License as described in the
|
||||
* "LICENSE" file, which should be included with this package. The terms are
|
||||
* also available at http://www.hardcoded.net/licenses/bsd_license
|
||||
*/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
static PyObject*
|
||||
cache_bytes_to_colors(PyObject *self, PyObject *args)
|
||||
{
|
||||
char *y;
|
||||
Py_ssize_t char_count, i, color_count;
|
||||
PyObject *result;
|
||||
unsigned long r, g, b;
|
||||
Py_ssize_t ci;
|
||||
PyObject *color_tuple;
|
||||
static PyObject *cache_bytes_to_colors(PyObject *self, PyObject *args) {
|
||||
char *y;
|
||||
Py_ssize_t char_count, i, color_count;
|
||||
PyObject *result;
|
||||
unsigned long r, g, b;
|
||||
Py_ssize_t ci;
|
||||
PyObject *color_tuple;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "y#", &y, &char_count)) {
|
||||
return NULL;
|
||||
if (!PyArg_ParseTuple(args, "y#", &y, &char_count)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
color_count = char_count / 3;
|
||||
result = PyList_New(color_count);
|
||||
if (result == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (i = 0; i < color_count; i++) {
|
||||
ci = i * 3;
|
||||
r = (unsigned char)y[ci];
|
||||
g = (unsigned char)y[ci + 1];
|
||||
b = (unsigned char)y[ci + 2];
|
||||
|
||||
color_tuple = inttuple(3, r, g, b);
|
||||
if (color_tuple == NULL) {
|
||||
Py_DECREF(result);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
color_count = char_count / 3;
|
||||
result = PyList_New(color_count);
|
||||
if (result == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (i=0; i<color_count; i++) {
|
||||
ci = i * 3;
|
||||
r = (unsigned char) y[ci];
|
||||
g = (unsigned char) y[ci+1];
|
||||
b = (unsigned char) y[ci+2];
|
||||
|
||||
color_tuple = inttuple(3, r, g, b);
|
||||
if (color_tuple == NULL) {
|
||||
Py_DECREF(result);
|
||||
return NULL;
|
||||
}
|
||||
PyList_SET_ITEM(result, i, color_tuple);
|
||||
}
|
||||
|
||||
return result;
|
||||
PyList_SET_ITEM(result, i, color_tuple);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static PyMethodDef CacheMethods[] = {
|
||||
{"bytes_to_colors", cache_bytes_to_colors, METH_VARARGS, "Transform the bytes 's' into a list of 3 sized tuples."},
|
||||
{NULL, NULL, 0, NULL} /* Sentinel */
|
||||
{"bytes_to_colors", cache_bytes_to_colors, METH_VARARGS,
|
||||
"Transform the bytes 's' into a list of 3 sized tuples."},
|
||||
{NULL, NULL, 0, NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
static struct PyModuleDef CacheDef = {
|
||||
PyModuleDef_HEAD_INIT,
|
||||
"_cache",
|
||||
NULL,
|
||||
-1,
|
||||
CacheMethods,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL
|
||||
};
|
||||
static struct PyModuleDef CacheDef = {PyModuleDef_HEAD_INIT,
|
||||
"_cache",
|
||||
NULL,
|
||||
-1,
|
||||
CacheMethods,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL};
|
||||
|
||||
PyObject *
|
||||
PyInit__cache(void)
|
||||
{
|
||||
PyObject *m = PyModule_Create(&CacheDef);
|
||||
if (m == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
return m;
|
||||
}
|
||||
PyObject *PyInit__cache(void) {
|
||||
PyObject *m = PyModule_Create(&CacheDef);
|
||||
if (m == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
* Created On: 2010-02-04
|
||||
* Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
|
||||
*
|
||||
* This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
||||
* which should be included with this package. The terms are also available at
|
||||
* This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
||||
* which should be included with this package. The terms are also available at
|
||||
* http://www.hardcoded.net/licenses/bsd_license
|
||||
*/
|
||||
|
||||
@@ -27,10 +27,10 @@ PyObject* inttuple(int n, ...)
|
||||
PyObject *pnumber;
|
||||
PyObject *result;
|
||||
va_list numbers;
|
||||
|
||||
|
||||
va_start(numbers, n);
|
||||
result = PyTuple_New(n);
|
||||
|
||||
|
||||
for (i=0; i<n; i++) {
|
||||
pnumber = PyLong_FromUnsignedLong(va_arg(numbers, long));
|
||||
if (pnumber == NULL) {
|
||||
@@ -39,7 +39,7 @@ PyObject* inttuple(int n, ...)
|
||||
}
|
||||
PyTuple_SET_ITEM(result, i, pnumber);
|
||||
}
|
||||
|
||||
|
||||
va_end(numbers);
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
* Created On: 2010-02-04
|
||||
* Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
*
|
||||
* This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
* which should be included with this package. The terms are also available at
|
||||
* This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
* which should be included with this package. The terms are also available at
|
||||
* http://www.gnu.org/licenses/gpl-3.0.html
|
||||
*/
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ class Photo(fs.File):
|
||||
__slots__ = fs.File.__slots__ + tuple(INITIAL_INFO.keys())
|
||||
|
||||
# These extensions are supported on all platforms
|
||||
HANDLED_EXTS = {"png", "jpg", "jpeg", "gif", "bmp", "tiff", "tif"}
|
||||
HANDLED_EXTS = {"png", "jpg", "jpeg", "gif", "bmp", "tiff", "tif", "webp"}
|
||||
|
||||
def _plat_get_dimensions(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
Reference in New Issue
Block a user