From 20320f539f7ddd2d68c1a6bbe71aed6c231377b8 Mon Sep 17 00:00:00 2001 From: Virgil Dupras Date: Fri, 10 Aug 2012 15:58:37 -0400 Subject: [PATCH] [#199 state:fixed] Added a mtime column to PE's cache DB so that we can purge outdated caches. --- core_pe/cache.py | 54 +++++++++++++++++++++++++++---------- core_pe/matchblock.py | 1 + core_pe/tests/cache_test.py | 2 +- 3 files changed, 42 insertions(+), 15 deletions(-) diff --git a/core_pe/cache.py b/core_pe/cache.py index 0bbe4b15..01421f01 100644 --- a/core_pe/cache.py +++ b/core_pe/cache.py @@ -7,6 +7,7 @@ # http://www.hardcoded.net/licenses/bsd_license import os +import os.path as op import logging import sqlite3 as sqlite @@ -30,7 +31,7 @@ def colors_to_string(colors): # result.append((number >> 16, (number >> 8) & 0xff, number & 0xff)) # return result -class Cache(object): +class Cache: """A class to cache picture blocks. """ def __init__(self, db=':memory:'): @@ -72,29 +73,34 @@ class Cache(object): result = self.con.execute(sql).fetchall() return result[0][0] - def __setitem__(self, key, value): - value = colors_to_string(value) - if key in self: - sql = "update pictures set blocks = ? where path = ?" + def __setitem__(self, path_str, blocks): + blocks = colors_to_string(blocks) + if op.exists(path_str): + mtime = int(os.stat(path_str).st_mtime) else: - sql = "insert into pictures(blocks,path) values(?,?)" + mtime = 0 + if path_str in self: + sql = "update pictures set blocks = ?, mtime = ? where path = ?" + else: + sql = "insert into pictures(blocks,mtime,path) values(?,?,?)" try: - self.con.execute(sql, [value, key]) + self.con.execute(sql, [blocks, mtime, path_str]) except sqlite.OperationalError: - logging.warning('Picture cache could not set value for key %r', key) + logging.warning('Picture cache could not set value for key %r', path_str) except sqlite.DatabaseError as e: - logging.warning('DatabaseError while setting value for key %r: %s', key, str(e)) + logging.warning('DatabaseError while setting value for key %r: %s', path_str, str(e)) def _create_con(self, second_try=False): def create_tables(): - sql = "create table pictures(path TEXT, blocks TEXT)" - self.con.execute(sql); - sql = "create index idx_path on pictures (path)" - self.con.execute(sql) + logging.debug("Creating picture cache tables.") + self.con.execute("drop table if exists pictures"); + self.con.execute("drop index if exists idx_path"); + self.con.execute("create table pictures(path TEXT, mtime INTEGER, blocks TEXT)"); + self.con.execute("create index idx_path on pictures (path)") self.con = sqlite.connect(self.dbname, isolation_level=None) try: - self.con.execute("select * from pictures where 1=2") + self.con.execute("select path, mtime, blocks from pictures where 1=2") except sqlite.OperationalError: # new db create_tables() except sqlite.DatabaseError as e: # corrupted db @@ -134,3 +140,23 @@ class Cache(object): cur = self.con.execute(sql) return ((rowid, string_to_colors(blocks)) for rowid, blocks in cur) + def purge_outdated(self): + """Go through the cache and purge outdated records. + + A record is outdated if the picture doesn't exist or if its mtime is greater than the one in + the db. + """ + todelete = [] + sql = "select rowid, path, mtime from pictures" + cur = self.con.execute(sql) + for rowid, path_str, mtime in cur: + if mtime and op.exists(path_str): + picture_mtime = os.stat(path_str).st_mtime + if int(picture_mtime) <= mtime: + # not outdated + continue + todelete.append(rowid) + if todelete: + sql = "delete from pictures where rowid in (%s)" % ','.join(map(str, todelete)) + self.con.execute(sql) + diff --git a/core_pe/matchblock.py b/core_pe/matchblock.py index 583efba4..4ab88762 100644 --- a/core_pe/matchblock.py +++ b/core_pe/matchblock.py @@ -55,6 +55,7 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob): # MemoryError happens when trying to read an image file, which is freed from memory by the # time that MemoryError is raised. cache = Cache(cache_path) + cache.purge_outdated() prepared = [] # only pictures for which there was no error getting blocks try: for picture in j.iter_with_progress(pictures, tr("Analyzed %d/%d pictures")): diff --git a/core_pe/tests/cache_test.py b/core_pe/tests/cache_test.py index 49b51712..3b45fd95 100644 --- a/core_pe/tests/cache_test.py +++ b/core_pe/tests/cache_test.py @@ -141,5 +141,5 @@ class TestCaseCacheSQLEscape: try: del c["foo'bar"] except KeyError: - self.fail() + assert False