1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2025-05-08 17:59:50 +00:00

Compare commits

..

No commits in common. "eb57d269fcc1392fac9d49eb10d597a9c66fcc82" and "b80489fd66fe6b67b8f3f4cf3e19bf443b0caf2b" have entirely different histories.

7 changed files with 52 additions and 172 deletions

1
.gitignore vendored
View File

@ -7,7 +7,6 @@ __pycache__
.lock-waf* .lock-waf*
.tox .tox
/tags /tags
*.eggs
build build
dist dist

View File

@ -138,8 +138,6 @@ class DupeGuru(Broadcaster):
self.app_mode = AppMode.STANDARD self.app_mode = AppMode.STANDARD
self.discarded_file_count = 0 self.discarded_file_count = 0
self.exclude_list = ExcludeList() self.exclude_list = ExcludeList()
hash_cache_file = op.join(self.appdata, "hash_cache.db")
fs.filesdb.connect(hash_cache_file)
self.directories = directories.Directories(self.exclude_list) self.directories = directories.Directories(self.exclude_list)
self.results = results.Results(self) self.results = results.Results(self)
self.ignore_list = IgnoreList() self.ignore_list = IgnoreList()
@ -295,7 +293,6 @@ class DupeGuru(Broadcaster):
def _job_completed(self, jobid): def _job_completed(self, jobid):
if jobid == JobType.SCAN: if jobid == JobType.SCAN:
self._results_changed() self._results_changed()
fs.filesdb.commit()
if not self.results.groups: if not self.results.groups:
self.view.show_message(tr("No duplicates found.")) self.view.show_message(tr("No duplicates found."))
else: else:
@ -423,9 +420,6 @@ class DupeGuru(Broadcaster):
except FileNotFoundError: except FileNotFoundError:
pass # we don't care pass # we don't care
def clear_hash_cache(self):
fs.filesdb.clear()
def copy_or_move(self, dupe, copy: bool, destination: str, dest_type: DestType): def copy_or_move(self, dupe, copy: bool, destination: str, dest_type: DestType):
source_path = dupe.path source_path = dupe.path
location_path = first(p for p in self.directories if dupe.path in p) location_path = first(p for p in self.directories if dupe.path in p)
@ -757,9 +751,6 @@ class DupeGuru(Broadcaster):
self.exclude_list.save_to_xml(p) self.exclude_list.save_to_xml(p)
self.notify("save_session") self.notify("save_session")
def close(self):
fs.filesdb.close()
def save_as(self, filename): def save_as(self, filename):
"""Save results in ``filename``. """Save results in ``filename``.

View File

@ -14,11 +14,7 @@
import hashlib import hashlib
from math import floor from math import floor
import logging import logging
import sqlite3
from threading import Lock
from typing import Any
from hscommon.path import Path
from hscommon.util import nonone, get_file_ext from hscommon.util import nonone, get_file_ext
__all__ = [ __all__ = [
@ -82,82 +78,6 @@ class OperationError(FSError):
cls_message = "Operation on '{name}' failed." cls_message = "Operation on '{name}' failed."
class FilesDB:
create_table_query = "CREATE TABLE IF NOT EXISTS files (path TEXT PRIMARY KEY, size INTEGER, mtime_ns INTEGER, entry_dt DATETIME, md5 BLOB, md5partial BLOB)"
drop_table_query = "DROP TABLE files;"
select_query = "SELECT {key} FROM files WHERE path=:path AND size=:size and mtime_ns=:mtime_ns"
insert_query = """
INSERT INTO files (path, size, mtime_ns, entry_dt, {key}) VALUES (:path, :size, :mtime_ns, datetime('now'), :value)
ON CONFLICT(path) DO UPDATE SET size=:size, mtime_ns=:mtime_ns, entry_dt=datetime('now'), {key}=:value;
"""
def __init__(self):
self.conn = None
self.cur = None
self.lock = None
def connect(self, path):
# type: (str, ) -> None
self.conn = sqlite3.connect(path, check_same_thread=False)
self.cur = self.conn.cursor()
self.cur.execute(self.create_table_query)
self.lock = Lock()
def clear(self):
# type: () -> None
with self.lock:
self.cur.execute(self.drop_table_query)
self.cur.execute(self.create_table_query)
def get(self, path, key):
# type: (Path, str) -> bytes
stat = path.stat()
size = stat.st_size
mtime_ns = stat.st_mtime_ns
with self.lock:
self.cur.execute(self.select_query.format(key=key), {"path": str(path), "size": size, "mtime_ns": mtime_ns})
result = self.cur.fetchone()
if result:
return result[0]
return None
def put(self, path, key, value):
# type: (Path, str, Any) -> None
stat = path.stat()
size = stat.st_size
mtime_ns = stat.st_mtime_ns
with self.lock:
self.cur.execute(
self.insert_query.format(key=key),
{"path": str(path), "size": size, "mtime_ns": mtime_ns, "value": value},
)
def commit(self):
# type: () -> None
with self.lock:
self.conn.commit()
def close(self):
# type: () -> None
with self.lock:
self.cur.close()
self.conn.close()
filesdb = FilesDB() # Singleton
class File: class File:
"""Represents a file and holds metadata to be used for scanning.""" """Represents a file and holds metadata to be used for scanning."""
@ -187,32 +107,10 @@ class File:
result = self.INITIAL_INFO[attrname] result = self.INITIAL_INFO[attrname]
return result return result
def _calc_md5(self): # This offset is where we should start reading the file to get a partial md5
# type: () -> bytes # For audio file, it should be where audio data starts
def _get_md5partial_offset_and_size(self):
with self.path.open("rb") as fp: return (0x4000, 0x4000) # 16Kb
md5 = hashlib.md5()
# The goal here is to not run out of memory on really big files. However, the chunk
# size has to be large enough so that the python loop isn't too costly in terms of
# CPU.
CHUNK_SIZE = 1024 * 1024 # 1 mb
filedata = fp.read(CHUNK_SIZE)
while filedata:
md5.update(filedata)
filedata = fp.read(CHUNK_SIZE)
return md5.digest()
def _calc_md5partial(self):
# type: () -> bytes
# This offset is where we should start reading the file to get a partial md5
# For audio file, it should be where audio data starts
offset, size = (0x4000, 0x4000)
with self.path.open("rb") as fp:
fp.seek(offset)
partialdata = fp.read(size)
return hashlib.md5(partialdata).digest()
def _read_info(self, field): def _read_info(self, field):
# print(f"_read_info({field}) for {self}") # print(f"_read_info({field}) for {self}")
@ -222,20 +120,28 @@ class File:
self.mtime = nonone(stats.st_mtime, 0) self.mtime = nonone(stats.st_mtime, 0)
elif field == "md5partial": elif field == "md5partial":
try: try:
self.md5partial = filesdb.get(self.path, "md5partial") with self.path.open("rb") as fp:
if self.md5partial is None: offset, size = self._get_md5partial_offset_and_size()
self.md5partial = self._calc_md5partial() fp.seek(offset)
filesdb.put(self.path, "md5partial", self.md5partial) partialdata = fp.read(size)
except Exception as e: md5 = hashlib.md5(partialdata)
logging.warning("Couldn't get md5partial for %s: %s", self.path, e) self.md5partial = md5.digest()
except Exception:
pass
elif field == "md5": elif field == "md5":
try: try:
self.md5 = filesdb.get(self.path, "md5") with self.path.open("rb") as fp:
if self.md5 is None: md5 = hashlib.md5()
self.md5 = self._calc_md5() filedata = fp.read(CHUNK_SIZE)
filesdb.put(self.path, "md5", self.md5) while filedata:
except Exception as e: md5.update(filedata)
logging.warning("Couldn't get md5 for %s: %s", self.path, e) filedata = fp.read(CHUNK_SIZE)
# FIXME For python 3.8 and later
# while filedata := fp.read(CHUNK_SIZE):
# md5.update(filedata)
self.md5 = md5.digest()
except Exception:
pass
elif field == "md5samples": elif field == "md5samples":
try: try:
with self.path.open("rb") as fp: with self.path.open("rb") as fp:

View File

@ -36,83 +36,83 @@ msgstr ""
msgid "Sending to Trash" msgid "Sending to Trash"
msgstr "" msgstr ""
#: core\app.py:289 #: core\app.py:287
msgid "A previous action is still hanging in there. You can't start a new one yet. Wait a few seconds, then try again." msgid "A previous action is still hanging in there. You can't start a new one yet. Wait a few seconds, then try again."
msgstr "" msgstr ""
#: core\app.py:300 #: core\app.py:297
msgid "No duplicates found." msgid "No duplicates found."
msgstr "" msgstr ""
#: core\app.py:315 #: core\app.py:312
msgid "All marked files were copied successfully." msgid "All marked files were copied successfully."
msgstr "" msgstr ""
#: core\app.py:317 #: core\app.py:314
msgid "All marked files were moved successfully." msgid "All marked files were moved successfully."
msgstr "" msgstr ""
#: core\app.py:319 #: core\app.py:316
msgid "All marked files were deleted successfully." msgid "All marked files were deleted successfully."
msgstr "" msgstr ""
#: core\app.py:321 #: core\app.py:318
msgid "All marked files were successfully sent to Trash." msgid "All marked files were successfully sent to Trash."
msgstr "" msgstr ""
#: core\app.py:326 #: core\app.py:323
msgid "Could not load file: {}" msgid "Could not load file: {}"
msgstr "" msgstr ""
#: core\app.py:382 #: core\app.py:379
msgid "'{}' already is in the list." msgid "'{}' already is in the list."
msgstr "" msgstr ""
#: core\app.py:384 #: core\app.py:381
msgid "'{}' does not exist." msgid "'{}' does not exist."
msgstr "" msgstr ""
#: core\app.py:392 #: core\app.py:389
msgid "All selected %d matches are going to be ignored in all subsequent scans. Continue?" msgid "All selected %d matches are going to be ignored in all subsequent scans. Continue?"
msgstr "" msgstr ""
#: core\app.py:469 #: core\app.py:463
msgid "Select a directory to copy marked files to" msgid "Select a directory to copy marked files to"
msgstr "" msgstr ""
#: core\app.py:471 #: core\app.py:465
msgid "Select a directory to move marked files to" msgid "Select a directory to move marked files to"
msgstr "" msgstr ""
#: core\app.py:510 #: core\app.py:504
msgid "Select a destination for your exported CSV" msgid "Select a destination for your exported CSV"
msgstr "" msgstr ""
#: core\app.py:516 core\app.py:771 core\app.py:781 #: core\app.py:510 core\app.py:762 core\app.py:772
msgid "Couldn't write to file: {}" msgid "Couldn't write to file: {}"
msgstr "" msgstr ""
#: core\app.py:539 #: core\app.py:533
msgid "You have no custom command set up. Set it up in your preferences." msgid "You have no custom command set up. Set it up in your preferences."
msgstr "" msgstr ""
#: core\app.py:695 core\app.py:707 #: core\app.py:689 core\app.py:701
msgid "You are about to remove %d files from results. Continue?" msgid "You are about to remove %d files from results. Continue?"
msgstr "" msgstr ""
#: core\app.py:743 #: core\app.py:737
msgid "{} duplicate groups were changed by the re-prioritization." msgid "{} duplicate groups were changed by the re-prioritization."
msgstr "" msgstr ""
#: core\app.py:790 #: core\app.py:781
msgid "The selected directories contain no scannable file." msgid "The selected directories contain no scannable file."
msgstr "" msgstr ""
#: core\app.py:803 #: core\app.py:794
msgid "Collecting files to scan" msgid "Collecting files to scan"
msgstr "" msgstr ""
#: core\app.py:850 #: core\app.py:841
msgid "%s (%d discarded)" msgid "%s (%d discarded)"
msgstr "" msgstr ""

View File

@ -927,17 +927,3 @@ msgstr ""
#: qt\se\preferences_dialog.py:68 #: qt\se\preferences_dialog.py:68
msgid "Ignore files larger than" msgid "Ignore files larger than"
msgstr "" msgstr ""
#: qt\app.py:135 qt\app.py:293
msgid "Clear Cache"
msgstr ""
#: qt\app.py:294
msgid ""
"Do you really want to clear the cache? This will remove all cached file "
"hashes and picture analysis."
msgstr ""
#: qt\app.py:299
msgid "Cache cleared."
msgstr ""

View File

@ -129,11 +129,11 @@ class DupeGuru(QObject):
self.showDirectoriesWindow, self.showDirectoriesWindow,
), ),
( (
"actionClearCache", "actionClearPictureCache",
"Ctrl+Shift+P", "Ctrl+Shift+P",
"", "",
tr("Clear Cache"), tr("Clear Picture Cache"),
self.clearCacheTriggered, self.clearPictureCacheTriggered,
), ),
( (
"actionExcludeList", "actionExcludeList",
@ -258,7 +258,6 @@ class DupeGuru(QObject):
self.willSavePrefs.emit() self.willSavePrefs.emit()
self.prefs.save() self.prefs.save()
self.model.save() self.model.save()
self.model.close()
# Workaround for #857, hide() or close(). # Workaround for #857, hide() or close().
if self.details_dialog is not None: if self.details_dialog is not None:
self.details_dialog.close() self.details_dialog.close()
@ -289,14 +288,13 @@ class DupeGuru(QObject):
self.model.load_from(results) self.model.load_from(results)
self.recentResults.insertItem(results) self.recentResults.insertItem(results)
def clearCacheTriggered(self): def clearPictureCacheTriggered(self):
title = tr("Clear Cache") title = tr("Clear Picture Cache")
msg = tr("Do you really want to clear the cache? This will remove all cached file hashes and picture analysis.") msg = tr("Do you really want to remove all your cached picture analysis?")
if self.confirm(title, msg, QMessageBox.No): if self.confirm(title, msg, QMessageBox.No):
self.model.clear_picture_cache() self.model.clear_picture_cache()
self.model.clear_hash_cache()
active = QApplication.activeWindow() active = QApplication.activeWindow()
QMessageBox.information(active, title, tr("Cache cleared.")) QMessageBox.information(active, title, tr("Picture cache cleared."))
def ignoreListTriggered(self): def ignoreListTriggered(self):
if self.use_tabs: if self.use_tabs:

View File

@ -126,7 +126,7 @@ class DirectoriesDialog(QMainWindow):
self.menuFile.addAction(self.actionLoadResults) self.menuFile.addAction(self.actionLoadResults)
self.menuFile.addAction(self.menuLoadRecent.menuAction()) self.menuFile.addAction(self.menuLoadRecent.menuAction())
self.menuFile.addSeparator() self.menuFile.addSeparator()
self.menuFile.addAction(self.app.actionClearCache) self.menuFile.addAction(self.app.actionClearPictureCache)
self.menuFile.addSeparator() self.menuFile.addSeparator()
self.menuFile.addAction(self.actionLoadDirectories) self.menuFile.addAction(self.actionLoadDirectories)
self.menuFile.addAction(self.actionSaveDirectories) self.menuFile.addAction(self.actionSaveDirectories)