mirror of
https://github.com/arsenetar/dupeguru.git
synced 2025-09-11 17:58:17 +00:00
Merge pull request #1049 from Dobatymo/colors-bytes
serialize/deserialize colors to/from bytes instead of strings
This commit is contained in:
commit
e41c91623c
@ -4,24 +4,13 @@
|
|||||||
# which should be included with this package. The terms are also available at
|
# which should be included with this package. The terms are also available at
|
||||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||||
|
|
||||||
from core.pe._cache import string_to_colors # noqa
|
from core.pe._cache import bytes_to_colors # noqa
|
||||||
|
|
||||||
|
|
||||||
def colors_to_string(colors):
|
def colors_to_bytes(colors):
|
||||||
"""Transform the 3 sized tuples 'colors' into a hex string.
|
"""Transform the 3 sized tuples 'colors' into a bytes string.
|
||||||
|
|
||||||
[(0,100,255)] --> 0064ff
|
[(0,100,255)] --> b'\x00d\xff'
|
||||||
[(1,2,3),(4,5,6)] --> 010203040506
|
[(1,2,3),(4,5,6)] --> b'\x01\x02\x03\x04\x05\x06'
|
||||||
"""
|
"""
|
||||||
return "".join("{:02x}{:02x}{:02x}".format(r, g, b) for r, g, b in colors)
|
return b"".join(map(bytes, colors))
|
||||||
|
|
||||||
|
|
||||||
# This function is an important bottleneck of dupeGuru PE. It has been converted to C.
|
|
||||||
# def string_to_colors(s):
|
|
||||||
# """Transform the string 's' in a list of 3 sized tuples.
|
|
||||||
# """
|
|
||||||
# result = []
|
|
||||||
# for i in xrange(0, len(s), 6):
|
|
||||||
# number = int(s[i:i+6], 16)
|
|
||||||
# result.append((number >> 16, (number >> 8) & 0xff, number & 0xff))
|
|
||||||
# return result
|
|
||||||
|
@ -2,5 +2,5 @@ from typing import Union, Tuple, List
|
|||||||
|
|
||||||
_block = Tuple[int, int, int]
|
_block = Tuple[int, int, int]
|
||||||
|
|
||||||
def colors_to_string(colors: List[_block]) -> str: ... # noqa: E302
|
def colors_to_bytes(colors: List[_block]) -> bytes: ... # noqa: E302
|
||||||
def string_to_colors(s: str) -> Union[List[_block], None]: ...
|
def bytes_to_colors(s: bytes) -> Union[List[_block], None]: ...
|
||||||
|
@ -9,12 +9,20 @@ import os.path as op
|
|||||||
import logging
|
import logging
|
||||||
import sqlite3 as sqlite
|
import sqlite3 as sqlite
|
||||||
|
|
||||||
from core.pe.cache import string_to_colors, colors_to_string
|
from core.pe.cache import bytes_to_colors, colors_to_bytes
|
||||||
|
|
||||||
|
|
||||||
class SqliteCache:
|
class SqliteCache:
|
||||||
"""A class to cache picture blocks in a sqlite backend."""
|
"""A class to cache picture blocks in a sqlite backend."""
|
||||||
|
|
||||||
|
schema_version = 1
|
||||||
|
schema_version_description = "Changed from string to bytes for blocks."
|
||||||
|
|
||||||
|
create_table_query = "CREATE TABLE IF NOT EXISTS pictures(path TEXT, mtime_ns INTEGER, blocks BLOB)"
|
||||||
|
create_index_query = "CREATE INDEX IF NOT EXISTS idx_path on pictures (path)"
|
||||||
|
drop_table_query = "DROP TABLE IF EXISTS pictures"
|
||||||
|
drop_index_query = "DROP INDEX IF EXISTS idx_path"
|
||||||
|
|
||||||
def __init__(self, db=":memory:", readonly=False):
|
def __init__(self, db=":memory:", readonly=False):
|
||||||
# readonly is not used in the sqlite version of the cache
|
# readonly is not used in the sqlite version of the cache
|
||||||
self.dbname = db
|
self.dbname = db
|
||||||
@ -40,7 +48,7 @@ class SqliteCache:
|
|||||||
sql = "select blocks from pictures where path = ?"
|
sql = "select blocks from pictures where path = ?"
|
||||||
result = self.con.execute(sql, [key]).fetchone()
|
result = self.con.execute(sql, [key]).fetchone()
|
||||||
if result:
|
if result:
|
||||||
result = string_to_colors(result[0])
|
result = bytes_to_colors(result[0])
|
||||||
return result
|
return result
|
||||||
else:
|
else:
|
||||||
raise KeyError(key)
|
raise KeyError(key)
|
||||||
@ -56,15 +64,15 @@ class SqliteCache:
|
|||||||
return result[0][0]
|
return result[0][0]
|
||||||
|
|
||||||
def __setitem__(self, path_str, blocks):
|
def __setitem__(self, path_str, blocks):
|
||||||
blocks = colors_to_string(blocks)
|
blocks = colors_to_bytes(blocks)
|
||||||
if op.exists(path_str):
|
if op.exists(path_str):
|
||||||
mtime = int(os.stat(path_str).st_mtime)
|
mtime = int(os.stat(path_str).st_mtime)
|
||||||
else:
|
else:
|
||||||
mtime = 0
|
mtime = 0
|
||||||
if path_str in self:
|
if path_str in self:
|
||||||
sql = "update pictures set blocks = ?, mtime = ? where path = ?"
|
sql = "update pictures set blocks = ?, mtime_ns = ? where path = ?"
|
||||||
else:
|
else:
|
||||||
sql = "insert into pictures(blocks,mtime,path) values(?,?,?)"
|
sql = "insert into pictures(blocks,mtime_ns,path) values(?,?,?)"
|
||||||
try:
|
try:
|
||||||
self.con.execute(sql, [blocks, mtime, path_str])
|
self.con.execute(sql, [blocks, mtime, path_str])
|
||||||
except sqlite.OperationalError:
|
except sqlite.OperationalError:
|
||||||
@ -73,18 +81,9 @@ class SqliteCache:
|
|||||||
logging.warning("DatabaseError while setting value for key %r: %s", path_str, str(e))
|
logging.warning("DatabaseError while setting value for key %r: %s", path_str, str(e))
|
||||||
|
|
||||||
def _create_con(self, second_try=False):
|
def _create_con(self, second_try=False):
|
||||||
def create_tables():
|
|
||||||
logging.debug("Creating picture cache tables.")
|
|
||||||
self.con.execute("drop table if exists pictures")
|
|
||||||
self.con.execute("drop index if exists idx_path")
|
|
||||||
self.con.execute("create table pictures(path TEXT, mtime INTEGER, blocks TEXT)")
|
|
||||||
self.con.execute("create index idx_path on pictures (path)")
|
|
||||||
|
|
||||||
self.con = sqlite.connect(self.dbname, isolation_level=None)
|
|
||||||
try:
|
try:
|
||||||
self.con.execute("select path, mtime, blocks from pictures where 1=2")
|
self.con = sqlite.connect(self.dbname, isolation_level=None)
|
||||||
except sqlite.OperationalError: # new db
|
self._check_upgrade()
|
||||||
create_tables()
|
|
||||||
except sqlite.DatabaseError as e: # corrupted db
|
except sqlite.DatabaseError as e: # corrupted db
|
||||||
if second_try:
|
if second_try:
|
||||||
raise # Something really strange is happening
|
raise # Something really strange is happening
|
||||||
@ -93,6 +92,25 @@ class SqliteCache:
|
|||||||
os.remove(self.dbname)
|
os.remove(self.dbname)
|
||||||
self._create_con(second_try=True)
|
self._create_con(second_try=True)
|
||||||
|
|
||||||
|
def _check_upgrade(self) -> None:
|
||||||
|
with self.con as conn:
|
||||||
|
has_schema = conn.execute(
|
||||||
|
"SELECT NAME FROM sqlite_master WHERE type='table' AND name='schema_version'"
|
||||||
|
).fetchall()
|
||||||
|
version = None
|
||||||
|
if has_schema:
|
||||||
|
version = conn.execute("SELECT version FROM schema_version ORDER BY version DESC").fetchone()[0]
|
||||||
|
else:
|
||||||
|
conn.execute("CREATE TABLE schema_version (version int PRIMARY KEY, description TEXT)")
|
||||||
|
if version != self.schema_version:
|
||||||
|
conn.execute(self.drop_table_query)
|
||||||
|
conn.execute(
|
||||||
|
"INSERT OR REPLACE INTO schema_version VALUES (:version, :description)",
|
||||||
|
{"version": self.schema_version, "description": self.schema_version_description},
|
||||||
|
)
|
||||||
|
conn.execute(self.create_table_query)
|
||||||
|
conn.execute(self.create_index_query)
|
||||||
|
|
||||||
def clear(self):
|
def clear(self):
|
||||||
self.close()
|
self.close()
|
||||||
if self.dbname != ":memory:":
|
if self.dbname != ":memory:":
|
||||||
@ -120,7 +138,7 @@ class SqliteCache:
|
|||||||
def get_multiple(self, rowids):
|
def get_multiple(self, rowids):
|
||||||
sql = "select rowid, blocks from pictures where rowid in (%s)" % ",".join(map(str, rowids))
|
sql = "select rowid, blocks from pictures where rowid in (%s)" % ",".join(map(str, rowids))
|
||||||
cur = self.con.execute(sql)
|
cur = self.con.execute(sql)
|
||||||
return ((rowid, string_to_colors(blocks)) for rowid, blocks in cur)
|
return ((rowid, bytes_to_colors(blocks)) for rowid, blocks in cur)
|
||||||
|
|
||||||
def purge_outdated(self):
|
def purge_outdated(self):
|
||||||
"""Go through the cache and purge outdated records.
|
"""Go through the cache and purge outdated records.
|
||||||
@ -129,12 +147,12 @@ class SqliteCache:
|
|||||||
the db.
|
the db.
|
||||||
"""
|
"""
|
||||||
todelete = []
|
todelete = []
|
||||||
sql = "select rowid, path, mtime from pictures"
|
sql = "select rowid, path, mtime_ns from pictures"
|
||||||
cur = self.con.execute(sql)
|
cur = self.con.execute(sql)
|
||||||
for rowid, path_str, mtime in cur:
|
for rowid, path_str, mtime_ns in cur:
|
||||||
if mtime and op.exists(path_str):
|
if mtime_ns and op.exists(path_str):
|
||||||
picture_mtime = os.stat(path_str).st_mtime
|
picture_mtime = os.stat(path_str).st_mtime
|
||||||
if int(picture_mtime) <= mtime:
|
if int(picture_mtime) <= mtime_ns:
|
||||||
# not outdated
|
# not outdated
|
||||||
continue
|
continue
|
||||||
todelete.append(rowid)
|
todelete.append(rowid)
|
||||||
|
@ -28,7 +28,7 @@ from core.pe.cache_sqlite import SqliteCache
|
|||||||
# to files in other chunks. So chunkifying doesn't save us any actual comparison, but the advantage
|
# to files in other chunks. So chunkifying doesn't save us any actual comparison, but the advantage
|
||||||
# is that instead of reading blocks from disk number_of_files**2 times, we read it
|
# is that instead of reading blocks from disk number_of_files**2 times, we read it
|
||||||
# number_of_files*number_of_chunks times.
|
# number_of_files*number_of_chunks times.
|
||||||
# Determining the right chunk size is tricky, bceause if it's too big, too many blocks will be in
|
# Determining the right chunk size is tricky, because if it's too big, too many blocks will be in
|
||||||
# memory at the same time and we might end up with memory trashing, which is awfully slow. So,
|
# memory at the same time and we might end up with memory trashing, which is awfully slow. So,
|
||||||
# because our *real* bottleneck is CPU, the chunk size must simply be enough so that the CPU isn't
|
# because our *real* bottleneck is CPU, the chunk size must simply be enough so that the CPU isn't
|
||||||
# starved by Disk IOs.
|
# starved by Disk IOs.
|
||||||
|
@ -2,58 +2,36 @@
|
|||||||
* Created On: 2010-01-30
|
* Created On: 2010-01-30
|
||||||
* Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
|
* Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
|
||||||
*
|
*
|
||||||
* This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
* This software is licensed under the "BSD" License as described in the
|
||||||
* which should be included with this package. The terms are also available at
|
* "LICENSE" file, which should be included with this package. The terms are
|
||||||
* http://www.hardcoded.net/licenses/bsd_license
|
* also available at http://www.hardcoded.net/licenses/bsd_license
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
/* I know that there strtol out there, but it requires a pointer to
|
static PyObject *cache_bytes_to_colors(PyObject *self, PyObject *args) {
|
||||||
* a char, which would in turn require me to buffer my chars around,
|
char *y;
|
||||||
* making the whole process slower.
|
Py_ssize_t char_count, i, color_count;
|
||||||
*/
|
|
||||||
static long
|
|
||||||
xchar_to_long(char c)
|
|
||||||
{
|
|
||||||
if ((c >= 48) && (c <= 57)) { /* 0-9 */
|
|
||||||
return c - 48;
|
|
||||||
}
|
|
||||||
else if ((c >= 65) && (c <= 70)) { /* A-F */
|
|
||||||
return c - 55;
|
|
||||||
}
|
|
||||||
else if ((c >= 97) && (c <= 102)) { /* a-f */
|
|
||||||
return c - 87;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static PyObject*
|
|
||||||
cache_string_to_colors(PyObject *self, PyObject *args)
|
|
||||||
{
|
|
||||||
char *s;
|
|
||||||
Py_ssize_t char_count, color_count, i;
|
|
||||||
PyObject *result;
|
PyObject *result;
|
||||||
|
unsigned long r, g, b;
|
||||||
|
Py_ssize_t ci;
|
||||||
|
PyObject *color_tuple;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "s#", &s, &char_count)) {
|
if (!PyArg_ParseTuple(args, "y#", &y, &char_count)) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
color_count = (char_count / 6);
|
color_count = char_count / 3;
|
||||||
result = PyList_New(color_count);
|
result = PyList_New(color_count);
|
||||||
if (result == NULL) {
|
if (result == NULL) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i=0; i<color_count; i++) {
|
for (i = 0; i < color_count; i++) {
|
||||||
long r, g, b;
|
ci = i * 3;
|
||||||
Py_ssize_t ci;
|
r = (unsigned char)y[ci];
|
||||||
PyObject *color_tuple;
|
g = (unsigned char)y[ci + 1];
|
||||||
|
b = (unsigned char)y[ci + 2];
|
||||||
ci = i * 6;
|
|
||||||
r = (xchar_to_long(s[ci]) << 4) + xchar_to_long(s[ci+1]);
|
|
||||||
g = (xchar_to_long(s[ci+2]) << 4) + xchar_to_long(s[ci+3]);
|
|
||||||
b = (xchar_to_long(s[ci+4]) << 4) + xchar_to_long(s[ci+5]);
|
|
||||||
|
|
||||||
color_tuple = inttuple(3, r, g, b);
|
color_tuple = inttuple(3, r, g, b);
|
||||||
if (color_tuple == NULL) {
|
if (color_tuple == NULL) {
|
||||||
@ -67,13 +45,12 @@ cache_string_to_colors(PyObject *self, PyObject *args)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static PyMethodDef CacheMethods[] = {
|
static PyMethodDef CacheMethods[] = {
|
||||||
{"string_to_colors", cache_string_to_colors, METH_VARARGS,
|
{"bytes_to_colors", cache_bytes_to_colors, METH_VARARGS,
|
||||||
"Transform the string 's' in a list of 3 sized tuples."},
|
"Transform the bytes 's' into a list of 3 sized tuples."},
|
||||||
{NULL, NULL, 0, NULL} /* Sentinel */
|
{NULL, NULL, 0, NULL} /* Sentinel */
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct PyModuleDef CacheDef = {
|
static struct PyModuleDef CacheDef = {PyModuleDef_HEAD_INIT,
|
||||||
PyModuleDef_HEAD_INIT,
|
|
||||||
"_cache",
|
"_cache",
|
||||||
NULL,
|
NULL,
|
||||||
-1,
|
-1,
|
||||||
@ -81,12 +58,9 @@ static struct PyModuleDef CacheDef = {
|
|||||||
NULL,
|
NULL,
|
||||||
NULL,
|
NULL,
|
||||||
NULL,
|
NULL,
|
||||||
NULL
|
NULL};
|
||||||
};
|
|
||||||
|
|
||||||
PyObject *
|
PyObject *PyInit__cache(void) {
|
||||||
PyInit__cache(void)
|
|
||||||
{
|
|
||||||
PyObject *m = PyModule_Create(&CacheDef);
|
PyObject *m = PyModule_Create(&CacheDef);
|
||||||
if (m == NULL) {
|
if (m == NULL) {
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -32,7 +32,7 @@ PyObject* inttuple(int n, ...)
|
|||||||
result = PyTuple_New(n);
|
result = PyTuple_New(n);
|
||||||
|
|
||||||
for (i=0; i<n; i++) {
|
for (i=0; i<n; i++) {
|
||||||
pnumber = PyLong_FromLong(va_arg(numbers, long));
|
pnumber = PyLong_FromUnsignedLong(va_arg(numbers, long));
|
||||||
if (pnumber == NULL) {
|
if (pnumber == NULL) {
|
||||||
Py_DECREF(result);
|
Py_DECREF(result);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -10,7 +10,7 @@ from pytest import raises, skip
|
|||||||
from hscommon.testutil import eq_
|
from hscommon.testutil import eq_
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from core.pe.cache import colors_to_string, string_to_colors
|
from core.pe.cache import colors_to_bytes, bytes_to_colors
|
||||||
from core.pe.cache_sqlite import SqliteCache
|
from core.pe.cache_sqlite import SqliteCache
|
||||||
except ImportError:
|
except ImportError:
|
||||||
skip("Can't import the cache module, probably hasn't been compiled.")
|
skip("Can't import the cache module, probably hasn't been compiled.")
|
||||||
@ -18,32 +18,33 @@ except ImportError:
|
|||||||
|
|
||||||
class TestCaseColorsToString:
|
class TestCaseColorsToString:
|
||||||
def test_no_color(self):
|
def test_no_color(self):
|
||||||
eq_("", colors_to_string([]))
|
eq_(b"", colors_to_bytes([]))
|
||||||
|
|
||||||
def test_single_color(self):
|
def test_single_color(self):
|
||||||
eq_("000000", colors_to_string([(0, 0, 0)]))
|
eq_(b"\x00\x00\x00", colors_to_bytes([(0, 0, 0)]))
|
||||||
eq_("010101", colors_to_string([(1, 1, 1)]))
|
eq_(b"\x01\x01\x01", colors_to_bytes([(1, 1, 1)]))
|
||||||
eq_("0a141e", colors_to_string([(10, 20, 30)]))
|
eq_(b"\x0a\x14\x1e", colors_to_bytes([(10, 20, 30)]))
|
||||||
|
|
||||||
def test_two_colors(self):
|
def test_two_colors(self):
|
||||||
eq_("000102030405", colors_to_string([(0, 1, 2), (3, 4, 5)]))
|
eq_(b"\x00\x01\x02\x03\x04\x05", colors_to_bytes([(0, 1, 2), (3, 4, 5)]))
|
||||||
|
|
||||||
|
|
||||||
class TestCaseStringToColors:
|
class TestCaseStringToColors:
|
||||||
def test_empty(self):
|
def test_empty(self):
|
||||||
eq_([], string_to_colors(""))
|
eq_([], bytes_to_colors(b""))
|
||||||
|
|
||||||
def test_single_color(self):
|
def test_single_color(self):
|
||||||
eq_([(0, 0, 0)], string_to_colors("000000"))
|
eq_([(0, 0, 0)], bytes_to_colors(b"\x00\x00\x00"))
|
||||||
eq_([(2, 3, 4)], string_to_colors("020304"))
|
eq_([(2, 3, 4)], bytes_to_colors(b"\x02\x03\x04"))
|
||||||
eq_([(10, 20, 30)], string_to_colors("0a141e"))
|
eq_([(10, 20, 30)], bytes_to_colors(b"\x0a\x14\x1e"))
|
||||||
|
|
||||||
def test_two_colors(self):
|
def test_two_colors(self):
|
||||||
eq_([(10, 20, 30), (40, 50, 60)], string_to_colors("0a141e28323c"))
|
eq_([(10, 20, 30), (40, 50, 60)], bytes_to_colors(b"\x0a\x14\x1e\x28\x32\x3c"))
|
||||||
|
|
||||||
def test_incomplete_color(self):
|
def test_incomplete_color(self):
|
||||||
# don't return anything if it's not a complete color
|
# don't return anything if it's not a complete color
|
||||||
eq_([], string_to_colors("102"))
|
eq_([], bytes_to_colors(b"\x01"))
|
||||||
|
eq_([(1, 2, 3)], bytes_to_colors(b"\x01\x02\x03\x04"))
|
||||||
|
|
||||||
|
|
||||||
class BaseTestCaseCache:
|
class BaseTestCaseCache:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user