mirror of
https://github.com/arsenetar/dupeguru.git
synced 2025-05-08 17:59:50 +00:00
Compare commits
No commits in common. "eb57d269fcc1392fac9d49eb10d597a9c66fcc82" and "b80489fd66fe6b67b8f3f4cf3e19bf443b0caf2b" have entirely different histories.
eb57d269fc
...
b80489fd66
1
.gitignore
vendored
1
.gitignore
vendored
@ -7,7 +7,6 @@ __pycache__
|
|||||||
.lock-waf*
|
.lock-waf*
|
||||||
.tox
|
.tox
|
||||||
/tags
|
/tags
|
||||||
*.eggs
|
|
||||||
|
|
||||||
build
|
build
|
||||||
dist
|
dist
|
||||||
|
@ -138,8 +138,6 @@ class DupeGuru(Broadcaster):
|
|||||||
self.app_mode = AppMode.STANDARD
|
self.app_mode = AppMode.STANDARD
|
||||||
self.discarded_file_count = 0
|
self.discarded_file_count = 0
|
||||||
self.exclude_list = ExcludeList()
|
self.exclude_list = ExcludeList()
|
||||||
hash_cache_file = op.join(self.appdata, "hash_cache.db")
|
|
||||||
fs.filesdb.connect(hash_cache_file)
|
|
||||||
self.directories = directories.Directories(self.exclude_list)
|
self.directories = directories.Directories(self.exclude_list)
|
||||||
self.results = results.Results(self)
|
self.results = results.Results(self)
|
||||||
self.ignore_list = IgnoreList()
|
self.ignore_list = IgnoreList()
|
||||||
@ -295,7 +293,6 @@ class DupeGuru(Broadcaster):
|
|||||||
def _job_completed(self, jobid):
|
def _job_completed(self, jobid):
|
||||||
if jobid == JobType.SCAN:
|
if jobid == JobType.SCAN:
|
||||||
self._results_changed()
|
self._results_changed()
|
||||||
fs.filesdb.commit()
|
|
||||||
if not self.results.groups:
|
if not self.results.groups:
|
||||||
self.view.show_message(tr("No duplicates found."))
|
self.view.show_message(tr("No duplicates found."))
|
||||||
else:
|
else:
|
||||||
@ -423,9 +420,6 @@ class DupeGuru(Broadcaster):
|
|||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
pass # we don't care
|
pass # we don't care
|
||||||
|
|
||||||
def clear_hash_cache(self):
|
|
||||||
fs.filesdb.clear()
|
|
||||||
|
|
||||||
def copy_or_move(self, dupe, copy: bool, destination: str, dest_type: DestType):
|
def copy_or_move(self, dupe, copy: bool, destination: str, dest_type: DestType):
|
||||||
source_path = dupe.path
|
source_path = dupe.path
|
||||||
location_path = first(p for p in self.directories if dupe.path in p)
|
location_path = first(p for p in self.directories if dupe.path in p)
|
||||||
@ -757,9 +751,6 @@ class DupeGuru(Broadcaster):
|
|||||||
self.exclude_list.save_to_xml(p)
|
self.exclude_list.save_to_xml(p)
|
||||||
self.notify("save_session")
|
self.notify("save_session")
|
||||||
|
|
||||||
def close(self):
|
|
||||||
fs.filesdb.close()
|
|
||||||
|
|
||||||
def save_as(self, filename):
|
def save_as(self, filename):
|
||||||
"""Save results in ``filename``.
|
"""Save results in ``filename``.
|
||||||
|
|
||||||
|
142
core/fs.py
142
core/fs.py
@ -14,11 +14,7 @@
|
|||||||
import hashlib
|
import hashlib
|
||||||
from math import floor
|
from math import floor
|
||||||
import logging
|
import logging
|
||||||
import sqlite3
|
|
||||||
from threading import Lock
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
from hscommon.path import Path
|
|
||||||
from hscommon.util import nonone, get_file_ext
|
from hscommon.util import nonone, get_file_ext
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
@ -82,82 +78,6 @@ class OperationError(FSError):
|
|||||||
cls_message = "Operation on '{name}' failed."
|
cls_message = "Operation on '{name}' failed."
|
||||||
|
|
||||||
|
|
||||||
class FilesDB:
|
|
||||||
|
|
||||||
create_table_query = "CREATE TABLE IF NOT EXISTS files (path TEXT PRIMARY KEY, size INTEGER, mtime_ns INTEGER, entry_dt DATETIME, md5 BLOB, md5partial BLOB)"
|
|
||||||
drop_table_query = "DROP TABLE files;"
|
|
||||||
select_query = "SELECT {key} FROM files WHERE path=:path AND size=:size and mtime_ns=:mtime_ns"
|
|
||||||
insert_query = """
|
|
||||||
INSERT INTO files (path, size, mtime_ns, entry_dt, {key}) VALUES (:path, :size, :mtime_ns, datetime('now'), :value)
|
|
||||||
ON CONFLICT(path) DO UPDATE SET size=:size, mtime_ns=:mtime_ns, entry_dt=datetime('now'), {key}=:value;
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.conn = None
|
|
||||||
self.cur = None
|
|
||||||
self.lock = None
|
|
||||||
|
|
||||||
def connect(self, path):
|
|
||||||
# type: (str, ) -> None
|
|
||||||
|
|
||||||
self.conn = sqlite3.connect(path, check_same_thread=False)
|
|
||||||
self.cur = self.conn.cursor()
|
|
||||||
self.cur.execute(self.create_table_query)
|
|
||||||
self.lock = Lock()
|
|
||||||
|
|
||||||
def clear(self):
|
|
||||||
# type: () -> None
|
|
||||||
|
|
||||||
with self.lock:
|
|
||||||
self.cur.execute(self.drop_table_query)
|
|
||||||
self.cur.execute(self.create_table_query)
|
|
||||||
|
|
||||||
def get(self, path, key):
|
|
||||||
# type: (Path, str) -> bytes
|
|
||||||
|
|
||||||
stat = path.stat()
|
|
||||||
size = stat.st_size
|
|
||||||
mtime_ns = stat.st_mtime_ns
|
|
||||||
|
|
||||||
with self.lock:
|
|
||||||
self.cur.execute(self.select_query.format(key=key), {"path": str(path), "size": size, "mtime_ns": mtime_ns})
|
|
||||||
result = self.cur.fetchone()
|
|
||||||
|
|
||||||
if result:
|
|
||||||
return result[0]
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
def put(self, path, key, value):
|
|
||||||
# type: (Path, str, Any) -> None
|
|
||||||
|
|
||||||
stat = path.stat()
|
|
||||||
size = stat.st_size
|
|
||||||
mtime_ns = stat.st_mtime_ns
|
|
||||||
|
|
||||||
with self.lock:
|
|
||||||
self.cur.execute(
|
|
||||||
self.insert_query.format(key=key),
|
|
||||||
{"path": str(path), "size": size, "mtime_ns": mtime_ns, "value": value},
|
|
||||||
)
|
|
||||||
|
|
||||||
def commit(self):
|
|
||||||
# type: () -> None
|
|
||||||
|
|
||||||
with self.lock:
|
|
||||||
self.conn.commit()
|
|
||||||
|
|
||||||
def close(self):
|
|
||||||
# type: () -> None
|
|
||||||
|
|
||||||
with self.lock:
|
|
||||||
self.cur.close()
|
|
||||||
self.conn.close()
|
|
||||||
|
|
||||||
|
|
||||||
filesdb = FilesDB() # Singleton
|
|
||||||
|
|
||||||
|
|
||||||
class File:
|
class File:
|
||||||
"""Represents a file and holds metadata to be used for scanning."""
|
"""Represents a file and holds metadata to be used for scanning."""
|
||||||
|
|
||||||
@ -187,32 +107,10 @@ class File:
|
|||||||
result = self.INITIAL_INFO[attrname]
|
result = self.INITIAL_INFO[attrname]
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def _calc_md5(self):
|
# This offset is where we should start reading the file to get a partial md5
|
||||||
# type: () -> bytes
|
# For audio file, it should be where audio data starts
|
||||||
|
def _get_md5partial_offset_and_size(self):
|
||||||
with self.path.open("rb") as fp:
|
return (0x4000, 0x4000) # 16Kb
|
||||||
md5 = hashlib.md5()
|
|
||||||
# The goal here is to not run out of memory on really big files. However, the chunk
|
|
||||||
# size has to be large enough so that the python loop isn't too costly in terms of
|
|
||||||
# CPU.
|
|
||||||
CHUNK_SIZE = 1024 * 1024 # 1 mb
|
|
||||||
filedata = fp.read(CHUNK_SIZE)
|
|
||||||
while filedata:
|
|
||||||
md5.update(filedata)
|
|
||||||
filedata = fp.read(CHUNK_SIZE)
|
|
||||||
return md5.digest()
|
|
||||||
|
|
||||||
def _calc_md5partial(self):
|
|
||||||
# type: () -> bytes
|
|
||||||
|
|
||||||
# This offset is where we should start reading the file to get a partial md5
|
|
||||||
# For audio file, it should be where audio data starts
|
|
||||||
offset, size = (0x4000, 0x4000)
|
|
||||||
|
|
||||||
with self.path.open("rb") as fp:
|
|
||||||
fp.seek(offset)
|
|
||||||
partialdata = fp.read(size)
|
|
||||||
return hashlib.md5(partialdata).digest()
|
|
||||||
|
|
||||||
def _read_info(self, field):
|
def _read_info(self, field):
|
||||||
# print(f"_read_info({field}) for {self}")
|
# print(f"_read_info({field}) for {self}")
|
||||||
@ -222,20 +120,28 @@ class File:
|
|||||||
self.mtime = nonone(stats.st_mtime, 0)
|
self.mtime = nonone(stats.st_mtime, 0)
|
||||||
elif field == "md5partial":
|
elif field == "md5partial":
|
||||||
try:
|
try:
|
||||||
self.md5partial = filesdb.get(self.path, "md5partial")
|
with self.path.open("rb") as fp:
|
||||||
if self.md5partial is None:
|
offset, size = self._get_md5partial_offset_and_size()
|
||||||
self.md5partial = self._calc_md5partial()
|
fp.seek(offset)
|
||||||
filesdb.put(self.path, "md5partial", self.md5partial)
|
partialdata = fp.read(size)
|
||||||
except Exception as e:
|
md5 = hashlib.md5(partialdata)
|
||||||
logging.warning("Couldn't get md5partial for %s: %s", self.path, e)
|
self.md5partial = md5.digest()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
elif field == "md5":
|
elif field == "md5":
|
||||||
try:
|
try:
|
||||||
self.md5 = filesdb.get(self.path, "md5")
|
with self.path.open("rb") as fp:
|
||||||
if self.md5 is None:
|
md5 = hashlib.md5()
|
||||||
self.md5 = self._calc_md5()
|
filedata = fp.read(CHUNK_SIZE)
|
||||||
filesdb.put(self.path, "md5", self.md5)
|
while filedata:
|
||||||
except Exception as e:
|
md5.update(filedata)
|
||||||
logging.warning("Couldn't get md5 for %s: %s", self.path, e)
|
filedata = fp.read(CHUNK_SIZE)
|
||||||
|
# FIXME For python 3.8 and later
|
||||||
|
# while filedata := fp.read(CHUNK_SIZE):
|
||||||
|
# md5.update(filedata)
|
||||||
|
self.md5 = md5.digest()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
elif field == "md5samples":
|
elif field == "md5samples":
|
||||||
try:
|
try:
|
||||||
with self.path.open("rb") as fp:
|
with self.path.open("rb") as fp:
|
||||||
|
@ -36,83 +36,83 @@ msgstr ""
|
|||||||
msgid "Sending to Trash"
|
msgid "Sending to Trash"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: core\app.py:289
|
#: core\app.py:287
|
||||||
msgid "A previous action is still hanging in there. You can't start a new one yet. Wait a few seconds, then try again."
|
msgid "A previous action is still hanging in there. You can't start a new one yet. Wait a few seconds, then try again."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: core\app.py:300
|
#: core\app.py:297
|
||||||
msgid "No duplicates found."
|
msgid "No duplicates found."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: core\app.py:315
|
#: core\app.py:312
|
||||||
msgid "All marked files were copied successfully."
|
msgid "All marked files were copied successfully."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: core\app.py:317
|
#: core\app.py:314
|
||||||
msgid "All marked files were moved successfully."
|
msgid "All marked files were moved successfully."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: core\app.py:319
|
#: core\app.py:316
|
||||||
msgid "All marked files were deleted successfully."
|
msgid "All marked files were deleted successfully."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: core\app.py:321
|
#: core\app.py:318
|
||||||
msgid "All marked files were successfully sent to Trash."
|
msgid "All marked files were successfully sent to Trash."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: core\app.py:326
|
#: core\app.py:323
|
||||||
msgid "Could not load file: {}"
|
msgid "Could not load file: {}"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: core\app.py:382
|
#: core\app.py:379
|
||||||
msgid "'{}' already is in the list."
|
msgid "'{}' already is in the list."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: core\app.py:384
|
#: core\app.py:381
|
||||||
msgid "'{}' does not exist."
|
msgid "'{}' does not exist."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: core\app.py:392
|
#: core\app.py:389
|
||||||
msgid "All selected %d matches are going to be ignored in all subsequent scans. Continue?"
|
msgid "All selected %d matches are going to be ignored in all subsequent scans. Continue?"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: core\app.py:469
|
#: core\app.py:463
|
||||||
msgid "Select a directory to copy marked files to"
|
msgid "Select a directory to copy marked files to"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: core\app.py:471
|
#: core\app.py:465
|
||||||
msgid "Select a directory to move marked files to"
|
msgid "Select a directory to move marked files to"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: core\app.py:510
|
#: core\app.py:504
|
||||||
msgid "Select a destination for your exported CSV"
|
msgid "Select a destination for your exported CSV"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: core\app.py:516 core\app.py:771 core\app.py:781
|
#: core\app.py:510 core\app.py:762 core\app.py:772
|
||||||
msgid "Couldn't write to file: {}"
|
msgid "Couldn't write to file: {}"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: core\app.py:539
|
#: core\app.py:533
|
||||||
msgid "You have no custom command set up. Set it up in your preferences."
|
msgid "You have no custom command set up. Set it up in your preferences."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: core\app.py:695 core\app.py:707
|
#: core\app.py:689 core\app.py:701
|
||||||
msgid "You are about to remove %d files from results. Continue?"
|
msgid "You are about to remove %d files from results. Continue?"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: core\app.py:743
|
#: core\app.py:737
|
||||||
msgid "{} duplicate groups were changed by the re-prioritization."
|
msgid "{} duplicate groups were changed by the re-prioritization."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: core\app.py:790
|
#: core\app.py:781
|
||||||
msgid "The selected directories contain no scannable file."
|
msgid "The selected directories contain no scannable file."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: core\app.py:803
|
#: core\app.py:794
|
||||||
msgid "Collecting files to scan"
|
msgid "Collecting files to scan"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: core\app.py:850
|
#: core\app.py:841
|
||||||
msgid "%s (%d discarded)"
|
msgid "%s (%d discarded)"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
|
@ -927,17 +927,3 @@ msgstr ""
|
|||||||
#: qt\se\preferences_dialog.py:68
|
#: qt\se\preferences_dialog.py:68
|
||||||
msgid "Ignore files larger than"
|
msgid "Ignore files larger than"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: qt\app.py:135 qt\app.py:293
|
|
||||||
msgid "Clear Cache"
|
|
||||||
msgstr ""
|
|
||||||
|
|
||||||
#: qt\app.py:294
|
|
||||||
msgid ""
|
|
||||||
"Do you really want to clear the cache? This will remove all cached file "
|
|
||||||
"hashes and picture analysis."
|
|
||||||
msgstr ""
|
|
||||||
|
|
||||||
#: qt\app.py:299
|
|
||||||
msgid "Cache cleared."
|
|
||||||
msgstr ""
|
|
||||||
|
16
qt/app.py
16
qt/app.py
@ -129,11 +129,11 @@ class DupeGuru(QObject):
|
|||||||
self.showDirectoriesWindow,
|
self.showDirectoriesWindow,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"actionClearCache",
|
"actionClearPictureCache",
|
||||||
"Ctrl+Shift+P",
|
"Ctrl+Shift+P",
|
||||||
"",
|
"",
|
||||||
tr("Clear Cache"),
|
tr("Clear Picture Cache"),
|
||||||
self.clearCacheTriggered,
|
self.clearPictureCacheTriggered,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"actionExcludeList",
|
"actionExcludeList",
|
||||||
@ -258,7 +258,6 @@ class DupeGuru(QObject):
|
|||||||
self.willSavePrefs.emit()
|
self.willSavePrefs.emit()
|
||||||
self.prefs.save()
|
self.prefs.save()
|
||||||
self.model.save()
|
self.model.save()
|
||||||
self.model.close()
|
|
||||||
# Workaround for #857, hide() or close().
|
# Workaround for #857, hide() or close().
|
||||||
if self.details_dialog is not None:
|
if self.details_dialog is not None:
|
||||||
self.details_dialog.close()
|
self.details_dialog.close()
|
||||||
@ -289,14 +288,13 @@ class DupeGuru(QObject):
|
|||||||
self.model.load_from(results)
|
self.model.load_from(results)
|
||||||
self.recentResults.insertItem(results)
|
self.recentResults.insertItem(results)
|
||||||
|
|
||||||
def clearCacheTriggered(self):
|
def clearPictureCacheTriggered(self):
|
||||||
title = tr("Clear Cache")
|
title = tr("Clear Picture Cache")
|
||||||
msg = tr("Do you really want to clear the cache? This will remove all cached file hashes and picture analysis.")
|
msg = tr("Do you really want to remove all your cached picture analysis?")
|
||||||
if self.confirm(title, msg, QMessageBox.No):
|
if self.confirm(title, msg, QMessageBox.No):
|
||||||
self.model.clear_picture_cache()
|
self.model.clear_picture_cache()
|
||||||
self.model.clear_hash_cache()
|
|
||||||
active = QApplication.activeWindow()
|
active = QApplication.activeWindow()
|
||||||
QMessageBox.information(active, title, tr("Cache cleared."))
|
QMessageBox.information(active, title, tr("Picture cache cleared."))
|
||||||
|
|
||||||
def ignoreListTriggered(self):
|
def ignoreListTriggered(self):
|
||||||
if self.use_tabs:
|
if self.use_tabs:
|
||||||
|
@ -126,7 +126,7 @@ class DirectoriesDialog(QMainWindow):
|
|||||||
self.menuFile.addAction(self.actionLoadResults)
|
self.menuFile.addAction(self.actionLoadResults)
|
||||||
self.menuFile.addAction(self.menuLoadRecent.menuAction())
|
self.menuFile.addAction(self.menuLoadRecent.menuAction())
|
||||||
self.menuFile.addSeparator()
|
self.menuFile.addSeparator()
|
||||||
self.menuFile.addAction(self.app.actionClearCache)
|
self.menuFile.addAction(self.app.actionClearPictureCache)
|
||||||
self.menuFile.addSeparator()
|
self.menuFile.addSeparator()
|
||||||
self.menuFile.addAction(self.actionLoadDirectories)
|
self.menuFile.addAction(self.actionLoadDirectories)
|
||||||
self.menuFile.addAction(self.actionSaveDirectories)
|
self.menuFile.addAction(self.actionSaveDirectories)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user