1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2026-01-22 06:37:17 +00:00

Format all files with black correcting line length

This commit is contained in:
2021-08-15 04:10:18 -05:00
parent 9446f37fad
commit ffe6b7047c
80 changed files with 517 additions and 970 deletions

View File

@@ -132,9 +132,7 @@ class DupeGuru(Broadcaster):
logging.debug("Debug mode enabled")
Broadcaster.__init__(self)
self.view = view
self.appdata = desktop.special_folder_path(
desktop.SpecialFolder.AppData, appname=self.NAME
)
self.appdata = desktop.special_folder_path(desktop.SpecialFolder.AppData, appname=self.NAME)
if not op.exists(self.appdata):
os.makedirs(self.appdata)
self.app_mode = AppMode.Standard
@@ -182,17 +180,13 @@ class DupeGuru(Broadcaster):
def _get_picture_cache_path(self):
cache_type = self.options["picture_cache_type"]
cache_name = (
"cached_pictures.shelve" if cache_type == "shelve" else "cached_pictures.db"
)
cache_name = "cached_pictures.shelve" if cache_type == "shelve" else "cached_pictures.db"
return op.join(self.appdata, cache_name)
def _get_dupe_sort_key(self, dupe, get_group, key, delta):
if self.app_mode in (AppMode.Music, AppMode.Picture):
if key == "folder_path":
dupe_folder_path = getattr(
dupe, "display_folder_path", dupe.folder_path
)
dupe_folder_path = getattr(dupe, "display_folder_path", dupe.folder_path)
return str(dupe_folder_path).lower()
if self.app_mode == AppMode.Picture:
if delta and key == "dimensions":
@@ -220,9 +214,7 @@ class DupeGuru(Broadcaster):
def _get_group_sort_key(self, group, key):
if self.app_mode in (AppMode.Music, AppMode.Picture):
if key == "folder_path":
dupe_folder_path = getattr(
group.ref, "display_folder_path", group.ref.folder_path
)
dupe_folder_path = getattr(group.ref, "display_folder_path", group.ref.folder_path)
return str(dupe_folder_path).lower()
if key == "percentage":
return group.percentage
@@ -235,9 +227,7 @@ class DupeGuru(Broadcaster):
def _do_delete(self, j, link_deleted, use_hardlinks, direct_deletion):
def op(dupe):
j.add_progress()
return self._do_delete_dupe(
dupe, link_deleted, use_hardlinks, direct_deletion
)
return self._do_delete_dupe(dupe, link_deleted, use_hardlinks, direct_deletion)
j.start_job(self.results.mark_count)
self.results.perform_on_marked(op, True)
@@ -277,11 +267,7 @@ class DupeGuru(Broadcaster):
return None
def _get_export_data(self):
columns = [
col
for col in self.result_table.columns.ordered_columns
if col.visible and col.name != "marked"
]
columns = [col for col in self.result_table.columns.ordered_columns if col.visible and col.name != "marked"]
colnames = [col.display for col in columns]
rows = []
for group_id, group in enumerate(self.results.groups):
@@ -293,11 +279,7 @@ class DupeGuru(Broadcaster):
return colnames, rows
def _results_changed(self):
self.selected_dupes = [
d
for d in self.selected_dupes
if self.results.get_group_of_duplicate(d) is not None
]
self.selected_dupes = [d for d in self.selected_dupes if self.results.get_group_of_duplicate(d) is not None]
self.notify("results_changed")
def _start_job(self, jobid, func, args=()):
@@ -332,9 +314,7 @@ class DupeGuru(Broadcaster):
msg = {
JobType.Copy: tr("All marked files were copied successfully."),
JobType.Move: tr("All marked files were moved successfully."),
JobType.Delete: tr(
"All marked files were successfully sent to Trash."
),
JobType.Delete: tr("All marked files were successfully sent to Trash."),
}[jobid]
self.view.show_message(msg)
@@ -401,15 +381,12 @@ class DupeGuru(Broadcaster):
self.view.show_message(tr("'{}' does not exist.").format(d))
def add_selected_to_ignore_list(self):
"""Adds :attr:`selected_dupes` to :attr:`ignore_list`.
"""
"""Adds :attr:`selected_dupes` to :attr:`ignore_list`."""
dupes = self.without_ref(self.selected_dupes)
if not dupes:
self.view.show_message(MSG_NO_SELECTED_DUPES)
return
msg = tr(
"All selected %d matches are going to be ignored in all subsequent scans. Continue?"
)
msg = tr("All selected %d matches are going to be ignored in all subsequent scans. Continue?")
if not self.view.ask_yes_no(msg % len(dupes)):
return
for dupe in dupes:
@@ -483,16 +460,17 @@ class DupeGuru(Broadcaster):
self.view.show_message(MSG_NO_MARKED_DUPES)
return
destination = self.view.select_dest_folder(
tr("Select a directory to copy marked files to") if copy
else tr("Select a directory to move marked files to"))
tr("Select a directory to copy marked files to")
if copy
else tr("Select a directory to move marked files to")
)
if destination:
desttype = self.options["copymove_dest_type"]
jobid = JobType.Copy if copy else JobType.Move
self._start_job(jobid, do)
def delete_marked(self):
"""Start an async job to send marked duplicates to the trash.
"""
"""Start an async job to send marked duplicates to the trash."""
if not self.results.mark_count:
self.view.show_message(MSG_NO_MARKED_DUPES)
return
@@ -523,9 +501,7 @@ class DupeGuru(Broadcaster):
The columns and their order in the resulting CSV file is determined in the same way as in
:meth:`export_to_xhtml`.
"""
dest_file = self.view.select_dest_file(
tr("Select a destination for your exported CSV"), "csv"
)
dest_file = self.view.select_dest_file(tr("Select a destination for your exported CSV"), "csv")
if dest_file:
colnames, rows = self._get_export_data()
try:
@@ -542,9 +518,7 @@ class DupeGuru(Broadcaster):
try:
return dupe.get_display_info(group, delta)
except Exception as e:
logging.warning(
"Exception (type: %s) on GetDisplayInfo for %s: %s",
type(e), str(dupe.path), str(e))
logging.warning("Exception (type: %s) on GetDisplayInfo for %s: %s", type(e), str(dupe.path), str(e))
return empty_data()
def invoke_custom_command(self):
@@ -556,9 +530,7 @@ class DupeGuru(Broadcaster):
"""
cmd = self.view.get_default("CustomCommand")
if not cmd:
msg = tr(
"You have no custom command set up. Set it up in your preferences."
)
msg = tr("You have no custom command set up. Set it up in your preferences.")
self.view.show_message(msg)
return
if not self.selected_dupes:
@@ -634,9 +606,7 @@ class DupeGuru(Broadcaster):
if not self.result_table.power_marker:
if changed_groups:
self.selected_dupes = [
d
for d in self.selected_dupes
if self.results.get_group_of_duplicate(d).ref is d
d for d in self.selected_dupes if self.results.get_group_of_duplicate(d).ref is d
]
self.notify("results_changed")
else:
@@ -648,20 +618,17 @@ class DupeGuru(Broadcaster):
self.notify("results_changed_but_keep_selection")
def mark_all(self):
"""Set all dupes in the results as marked.
"""
"""Set all dupes in the results as marked."""
self.results.mark_all()
self.notify("marking_changed")
def mark_none(self):
"""Set all dupes in the results as unmarked.
"""
"""Set all dupes in the results as unmarked."""
self.results.mark_none()
self.notify("marking_changed")
def mark_invert(self):
"""Invert the marked state of all dupes in the results.
"""
"""Invert the marked state of all dupes in the results."""
self.results.mark_invert()
self.notify("marking_changed")
@@ -679,8 +646,7 @@ class DupeGuru(Broadcaster):
self.notify("marking_changed")
def open_selected(self):
"""Open :attr:`selected_dupes` with their associated application.
"""
"""Open :attr:`selected_dupes` with their associated application."""
if len(self.selected_dupes) > 10:
if not self.view.ask_yes_no(MSG_MANY_FILES_TO_OPEN):
return
@@ -688,8 +654,7 @@ class DupeGuru(Broadcaster):
desktop.open_path(dupe.path)
def purge_ignore_list(self):
"""Remove files that don't exist from :attr:`ignore_list`.
"""
"""Remove files that don't exist from :attr:`ignore_list`."""
self.ignore_list.Filter(lambda f, s: op.exists(f) and op.exists(s))
self.ignore_list_dialog.refresh()
@@ -719,8 +684,7 @@ class DupeGuru(Broadcaster):
self.notify("results_changed_but_keep_selection")
def remove_marked(self):
"""Removed marked duplicates from the results (without touching the files themselves).
"""
"""Removed marked duplicates from the results (without touching the files themselves)."""
if not self.results.mark_count:
self.view.show_message(MSG_NO_MARKED_DUPES)
return
@@ -731,8 +695,7 @@ class DupeGuru(Broadcaster):
self._results_changed()
def remove_selected(self):
"""Removed :attr:`selected_dupes` from the results (without touching the files themselves).
"""
"""Removed :attr:`selected_dupes` from the results (without touching the files themselves)."""
dupes = self.without_ref(self.selected_dupes)
if not dupes:
self.view.show_message(MSG_NO_SELECTED_DUPES)
@@ -773,9 +736,7 @@ class DupeGuru(Broadcaster):
if count:
self.results.refresh_required = True
self._results_changed()
msg = tr("{} duplicate groups were changed by the re-prioritization.").format(
count
)
msg = tr("{} duplicate groups were changed by the re-prioritization.").format(count)
self.view.show_message(msg)
def reveal_selected(self):
@@ -819,9 +780,7 @@ class DupeGuru(Broadcaster):
"""
scanner = self.SCANNER_CLASS()
if not self.directories.has_any_file():
self.view.show_message(
tr("The selected directories contain no scannable file.")
)
self.view.show_message(tr("The selected directories contain no scannable file."))
return
# Send relevant options down to the scanner instance
for k, v in self.options.items():
@@ -836,13 +795,9 @@ class DupeGuru(Broadcaster):
def do(j):
j.set_progress(0, tr("Collecting files to scan"))
if scanner.scan_type == ScanType.Folders:
files = list(
self.directories.get_folders(folderclass=se.fs.Folder, j=j)
)
files = list(self.directories.get_folders(folderclass=se.fs.Folder, j=j))
else:
files = list(
self.directories.get_files(fileclasses=self.fileclasses, j=j)
)
files = list(self.directories.get_files(fileclasses=self.fileclasses, j=j))
if self.options["ignore_hardlink_matches"]:
files = self._remove_hardlink_dupes(files)
logging.info("Scanning %d files" % len(files))
@@ -864,13 +819,8 @@ class DupeGuru(Broadcaster):
self.notify("marking_changed")
def without_ref(self, dupes):
"""Returns ``dupes`` with all reference elements removed.
"""
return [
dupe
for dupe in dupes
if self.results.get_group_of_duplicate(dupe).ref is not dupe
]
"""Returns ``dupes`` with all reference elements removed."""
return [dupe for dupe in dupes if self.results.get_group_of_duplicate(dupe).ref is not dupe]
def get_default(self, key, fallback_value=None):
result = nonone(self.view.get_default(key), fallback_value)

View File

@@ -109,8 +109,7 @@ class Directories:
# print(f"len of files: {len(files)} {files}")
for f in files:
if not self._exclude_list.is_excluded(root, f):
found_files.append(fs.get_file(rootPath + f,
fileclasses=fileclasses))
found_files.append(fs.get_file(rootPath + f, fileclasses=fileclasses))
found_files = [f for f in found_files if f is not None]
# In some cases, directories can be considered as files by dupeGuru, which is
# why we have this line below. In fact, there only one case: Bundle files under

View File

@@ -4,6 +4,7 @@
from .markable import Markable
from xml.etree import ElementTree as ET
# TODO: perhaps use regex module for better Unicode support? https://pypi.org/project/regex/
# also https://pypi.org/project/re2/
# TODO update the Result list with newly added regexes if possible
@@ -15,13 +16,14 @@ from hscommon.util import FileOrPath
from hscommon.plat import ISWINDOWS
import time
default_regexes = [r"^thumbs\.db$", # Obsolete after WindowsXP
r"^desktop\.ini$", # Windows metadata
r"^\.DS_Store$", # MacOS metadata
r"^\.Trash\-.*", # Linux trash directories
r"^\$Recycle\.Bin$", # Windows
r"^\..*", # Hidden files on Unix-like
]
default_regexes = [
r"^thumbs\.db$", # Obsolete after WindowsXP
r"^desktop\.ini$", # Windows metadata
r"^\.DS_Store$", # MacOS metadata
r"^\.Trash\-.*", # Linux trash directories
r"^\$Recycle\.Bin$", # Windows
r"^\..*", # Hidden files on Unix-like
]
# These are too broad
forbidden_regexes = [r".*", r"\/.*", r".*\/.*", r".*\\\\.*", r".*\..*"]
@@ -34,6 +36,7 @@ def timer(func):
end = time.perf_counter_ns()
print(f"DEBUG: func {func.__name__!r} took {end - start} ns.")
return value
return wrapper_timer
@@ -45,11 +48,13 @@ def memoize(func):
if args not in func.cache:
func.cache[args] = func(*args)
return func.cache[args]
return _memoize
class AlreadyThereException(Exception):
"""Expression already in the list"""
def __init__(self, arg="Expression is already in excluded list."):
super().__init__(arg)
@@ -148,7 +153,7 @@ class ExcludeList(Markable):
try:
return re.compile(expr)
except Exception as e:
raise(e)
raise (e)
# @timer
# @memoize # probably not worth memoizing this one if we memoize the above
@@ -169,10 +174,8 @@ class ExcludeList(Markable):
def build_compiled_caches(self, union=False):
if not union:
self._cached_compiled_files =\
[x for x in self._excluded_compiled if not has_sep(x.pattern)]
self._cached_compiled_paths =\
[x for x in self._excluded_compiled if has_sep(x.pattern)]
self._cached_compiled_files = [x for x in self._excluded_compiled if not has_sep(x.pattern)]
self._cached_compiled_paths = [x for x in self._excluded_compiled if has_sep(x.pattern)]
self._dirty = False
return
@@ -185,20 +188,17 @@ class ExcludeList(Markable):
else:
# HACK returned as a tuple to get a free iterator and keep interface
# the same regardless of whether the client asked for union or not
self._cached_compiled_union_all =\
(re.compile('|'.join(marked_count)),)
self._cached_compiled_union_all = (re.compile("|".join(marked_count)),)
files_marked = [x for x in marked_count if not has_sep(x)]
if not files_marked:
self._cached_compiled_union_files = tuple()
else:
self._cached_compiled_union_files =\
(re.compile('|'.join(files_marked)),)
self._cached_compiled_union_files = (re.compile("|".join(files_marked)),)
paths_marked = [x for x in marked_count if has_sep(x)]
if not paths_marked:
self._cached_compiled_union_paths = tuple()
else:
self._cached_compiled_union_paths =\
(re.compile('|'.join(paths_marked)),)
self._cached_compiled_union_paths = (re.compile("|".join(paths_marked)),)
self._dirty = False
@property
@@ -218,16 +218,14 @@ class ExcludeList(Markable):
one item (one Pattern in the union case)."""
if self._dirty:
self.build_compiled_caches(self._use_union)
return self._cached_compiled_union_files if self._use_union\
else self._cached_compiled_files
return self._cached_compiled_union_files if self._use_union else self._cached_compiled_files
@property
def compiled_paths(self):
"""Returns patterns with only separators in them, for more precise filtering."""
if self._dirty:
self.build_compiled_caches(self._use_union)
return self._cached_compiled_union_paths if self._use_union\
else self._cached_compiled_paths
return self._cached_compiled_union_paths if self._use_union else self._cached_compiled_paths
# ---Public
def add(self, regex, forced=False):
@@ -295,8 +293,7 @@ class ExcludeList(Markable):
was_marked = self.is_marked(regex)
is_compilable, exception, compiled = self.compile_re(newregex)
# We overwrite the found entry
self._excluded[self._excluded.index(item)] =\
[newregex, is_compilable, exception, compiled]
self._excluded[self._excluded.index(item)] = [newregex, is_compilable, exception, compiled]
self._remove_compiled(regex)
break
if not found:
@@ -343,8 +340,10 @@ class ExcludeList(Markable):
# "forced" avoids compilation exceptions and adds anyway
self.add(regex_string, forced=True)
except AlreadyThereException:
logging.error(f"Regex \"{regex_string}\" \
loaded from XML was already present in the list.")
logging.error(
f'Regex "{regex_string}" \
loaded from XML was already present in the list.'
)
continue
if exclude_item.get("marked") == "y":
marked.add(regex_string)
@@ -369,6 +368,7 @@ loaded from XML was already present in the list.")
class ExcludeDict(ExcludeList):
"""Exclusion list holding a set of regular expressions as keys, the compiled
Pattern, compilation error and compilable boolean as values."""
# Implemntation around a dictionary instead of a list, which implies
# to keep the index of each string-key as its sub-element and keep it updated
# whenever insert/remove is done.
@@ -435,12 +435,7 @@ class ExcludeDict(ExcludeList):
# and other indices should be pushed by one
for value in self._excluded.values():
value["index"] += 1
self._excluded[regex] = {
"index": 0,
"compilable": iscompilable,
"error": exception,
"compiled": compiled
}
self._excluded[regex] = {"index": 0, "compilable": iscompilable, "error": exception, "compiled": compiled}
def has_entry(self, regex):
if regex in self._excluded.keys():
@@ -468,10 +463,10 @@ class ExcludeDict(ExcludeList):
previous = self._excluded.pop(regex)
iscompilable, error, compiled = self.compile_re(newregex)
self._excluded[newregex] = {
"index": previous.get('index'),
"index": previous.get("index"),
"compilable": iscompilable,
"error": error,
"compiled": compiled
"compiled": compiled,
}
self._remove_compiled(regex)
if iscompilable:
@@ -511,8 +506,12 @@ def ordered_keys(_dict):
if ISWINDOWS:
def has_sep(regexp):
return '\\' + sep in regexp
return "\\" + sep in regexp
else:
def has_sep(regexp):
return sep in regexp

View File

@@ -131,15 +131,11 @@ def export_to_xhtml(colnames, rows):
indented = "indented"
filename = row[1]
cells = "".join(CELL_TEMPLATE.format(value=value) for value in row[2:])
rendered_rows.append(
ROW_TEMPLATE.format(indented=indented, filename=filename, cells=cells)
)
rendered_rows.append(ROW_TEMPLATE.format(indented=indented, filename=filename, cells=cells))
previous_group_id = row[0]
rendered_rows = "".join(rendered_rows)
# The main template can't use format because the css code uses {}
content = MAIN_TEMPLATE.replace("$colheaders", colheaders).replace(
"$rows", rendered_rows
)
content = MAIN_TEMPLATE.replace("$colheaders", colheaders).replace("$rows", rendered_rows)
folder = mkdtemp()
destpath = op.join(folder, "export.htm")
fp = open(destpath, "wt", encoding="utf-8")

View File

@@ -79,16 +79,9 @@ class OperationError(FSError):
class File:
"""Represents a file and holds metadata to be used for scanning.
"""
"""Represents a file and holds metadata to be used for scanning."""
INITIAL_INFO = {
"size": 0,
"mtime": 0,
"md5": b"",
"md5partial": b"",
"md5samples": b""
}
INITIAL_INFO = {"size": 0, "mtime": 0, "md5": b"", "md5partial": b"", "md5samples": b""}
# Slots for File make us save quite a bit of memory. In a memory test I've made with a lot of
# files, I saved 35% memory usage with "unread" files (no _read_info() call) and gains become
# even greater when we take into account read attributes (70%!). Yeah, it's worth it.
@@ -108,9 +101,7 @@ class File:
try:
self._read_info(attrname)
except Exception as e:
logging.warning(
"An error '%s' was raised while decoding '%s'", e, repr(self.path)
)
logging.warning("An error '%s' was raised while decoding '%s'", e, repr(self.path))
result = object.__getattribute__(self, attrname)
if result is NOT_SET:
result = self.INITIAL_INFO[attrname]
@@ -192,8 +183,7 @@ class File:
# --- Public
@classmethod
def can_handle(cls, path):
"""Returns whether this file wrapper class can handle ``path``.
"""
"""Returns whether this file wrapper class can handle ``path``."""
return not path.islink() and path.isfile()
def rename(self, newname):
@@ -211,8 +201,7 @@ class File:
self.path = destpath
def get_display_info(self, group, delta):
"""Returns a display-ready dict of dupe's data.
"""
"""Returns a display-ready dict of dupe's data."""
raise NotImplementedError()
# --- Properties
@@ -271,9 +260,7 @@ class Folder(File):
@property
def subfolders(self):
if self._subfolders is None:
subfolders = [
p for p in self.path.listdir() if not p.islink() and p.isdir()
]
subfolders = [p for p in self.path.listdir() if not p.islink() and p.isdir()]
self._subfolders = [self.__class__(p) for p in subfolders]
return self._subfolders

View File

@@ -29,8 +29,7 @@ class DeletionOptionsView:
"""
def update_msg(self, msg: str):
"""Update the dialog's prompt with ``str``.
"""
"""Update the dialog's prompt with ``str``."""
def show(self):
"""Show the dialog in a modal fashion.
@@ -39,8 +38,7 @@ class DeletionOptionsView:
"""
def set_hardlink_option_enabled(self, is_enabled: bool):
"""Enable or disable the widget controlling :attr:`DeletionOptions.use_hardlinks`.
"""
"""Enable or disable the widget controlling :attr:`DeletionOptions.use_hardlinks`."""
class DeletionOptions(GUIObject):
@@ -75,8 +73,7 @@ class DeletionOptions(GUIObject):
return self.view.show()
def supports_links(self):
"""Returns whether our platform supports symlinks.
"""
"""Returns whether our platform supports symlinks."""
# When on a platform that doesn't implement it, calling os.symlink() (with the wrong number
# of arguments) raises NotImplementedError, which allows us to gracefully check for the
# feature.

View File

@@ -32,9 +32,7 @@ class DetailsPanel(GUIObject, DupeGuruGUIObject):
# we don't want the two sides of the table to display the stats for the same file
ref = group.ref if group is not None and group.ref is not dupe else None
data2 = self.app.get_display_info(ref, group, False)
columns = self.app.result_table.COLUMNS[
1:
] # first column is the 'marked' column
columns = self.app.result_table.COLUMNS[1:] # first column is the 'marked' column
self._table = [(c.display, data1[c.name], data2[c.name]) for c in columns]
# --- Public

View File

@@ -36,9 +36,7 @@ class DirectoryNode(Node):
self._loaded = True
def update_all_states(self):
self._state = STATE_ORDER.index(
self._tree.app.directories.get_state(self._directory_path)
)
self._state = STATE_ORDER.index(self._tree.app.directories.get_state(self._directory_path))
for node in self:
node.update_all_states()

View File

@@ -50,7 +50,7 @@ class ExcludeListDialogCore:
try:
self.exclude_list.add(regex)
except Exception as e:
raise(e)
raise (e)
self.exclude_list.mark(regex)
self.exclude_list_table.add(regex)

View File

@@ -6,14 +6,12 @@ from .base import DupeGuruGUIObject
from hscommon.gui.table import GUITable, Row
from hscommon.gui.column import Column, Columns
from hscommon.trans import trget
tr = trget("ui")
class ExcludeListTable(GUITable, DupeGuruGUIObject):
COLUMNS = [
Column("marked", ""),
Column("regex", tr("Regular Expressions"))
]
COLUMNS = [Column("marked", ""), Column("regex", tr("Regular Expressions"))]
def __init__(self, exclude_list_dialog, app):
GUITable.__init__(self)

View File

@@ -22,9 +22,7 @@ class IgnoreListDialog:
def clear(self):
if not self.ignore_list:
return
msg = tr(
"Do you really want to remove all %d items from the ignore list?"
) % len(self.ignore_list)
msg = tr("Do you really want to remove all %d items from the ignore list?") % len(self.ignore_list)
if self.app.view.ask_yes_no(msg):
self.ignore_list.Clear()
self.refresh()

View File

@@ -45,9 +45,7 @@ class DupeRow(Row):
return False
ref_info = self._group.ref.get_display_info(group=self._group, delta=False)
for key, value in dupe_info.items():
if (key not in self._delta_columns) and (
ref_info[key].lower() != value.lower()
):
if (key not in self._delta_columns) and (ref_info[key].lower() != value.lower()):
self._delta_columns.add(key)
return column_name in self._delta_columns

View File

@@ -33,8 +33,7 @@ CacheRow = namedtuple("CacheRow", "id path blocks mtime")
class ShelveCache:
"""A class to cache picture blocks in a shelve backend.
"""
"""A class to cache picture blocks in a shelve backend."""
def __init__(self, db=None, readonly=False):
self.istmp = db is None
@@ -81,9 +80,7 @@ class ShelveCache:
self.shelve[wrap_id(rowid)] = wrap_path(path_str)
def _compute_maxid(self):
return max(
(unwrap_id(k) for k in self.shelve if k.startswith("id:")), default=1
)
return max((unwrap_id(k) for k in self.shelve if k.startswith("id:")), default=1)
def _get_new_id(self):
self.maxid += 1

View File

@@ -13,8 +13,7 @@ from .cache import string_to_colors, colors_to_string
class SqliteCache:
"""A class to cache picture blocks in a sqlite backend.
"""
"""A class to cache picture blocks in a sqlite backend."""
def __init__(self, db=":memory:", readonly=False):
# readonly is not used in the sqlite version of the cache
@@ -71,18 +70,14 @@ class SqliteCache:
except sqlite.OperationalError:
logging.warning("Picture cache could not set value for key %r", path_str)
except sqlite.DatabaseError as e:
logging.warning(
"DatabaseError while setting value for key %r: %s", path_str, str(e)
)
logging.warning("DatabaseError while setting value for key %r: %s", path_str, str(e))
def _create_con(self, second_try=False):
def create_tables():
logging.debug("Creating picture cache tables.")
self.con.execute("drop table if exists pictures")
self.con.execute("drop index if exists idx_path")
self.con.execute(
"create table pictures(path TEXT, mtime INTEGER, blocks TEXT)"
)
self.con.execute("create table pictures(path TEXT, mtime INTEGER, blocks TEXT)")
self.con.execute("create index idx_path on pictures (path)")
self.con = sqlite.connect(self.dbname, isolation_level=None)
@@ -93,9 +88,7 @@ class SqliteCache:
except sqlite.DatabaseError as e: # corrupted db
if second_try:
raise # Something really strange is happening
logging.warning(
"Could not create picture cache because of an error: %s", str(e)
)
logging.warning("Could not create picture cache because of an error: %s", str(e))
self.con.close()
os.remove(self.dbname)
self._create_con(second_try=True)
@@ -125,9 +118,7 @@ class SqliteCache:
raise ValueError(path)
def get_multiple(self, rowids):
sql = "select rowid, blocks from pictures where rowid in (%s)" % ",".join(
map(str, rowids)
)
sql = "select rowid, blocks from pictures where rowid in (%s)" % ",".join(map(str, rowids))
cur = self.con.execute(sql)
return ((rowid, string_to_colors(blocks)) for rowid, blocks in cur)
@@ -148,7 +139,5 @@ class SqliteCache:
continue
todelete.append(rowid)
if todelete:
sql = "delete from pictures where rowid in (%s)" % ",".join(
map(str, todelete)
)
sql = "delete from pictures where rowid in (%s)" % ",".join(map(str, todelete))
self.con.execute(sql)

View File

@@ -256,9 +256,7 @@ class TIFF_file:
for j in range(count):
if type in {5, 10}:
# The type is either 5 or 10
value_j = Fraction(
self.s2n(offset, 4, signed), self.s2n(offset + 4, 4, signed)
)
value_j = Fraction(self.s2n(offset, 4, signed), self.s2n(offset + 4, 4, signed))
else:
# Not a fraction
value_j = self.s2n(offset, typelen, signed)
@@ -296,9 +294,7 @@ def get_fields(fp):
logging.debug("Exif header length: %d bytes", length)
data = fp.read(length - 8)
data_format = data[0]
logging.debug(
"%s format", {INTEL_ENDIAN: "Intel", MOTOROLA_ENDIAN: "Motorola"}[data_format]
)
logging.debug("%s format", {INTEL_ENDIAN: "Intel", MOTOROLA_ENDIAN: "Motorola"}[data_format])
T = TIFF_file(data)
# There may be more than one IFD per file, but we only read the first one because others are
# most likely thumbnails.

View File

@@ -95,9 +95,7 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
picture.unicode_path,
picture.size,
)
if (
picture.size < 10 * 1024 * 1024
): # We're really running out of memory
if picture.size < 10 * 1024 * 1024: # We're really running out of memory
raise
except MemoryError:
logging.warning("Ran out of memory while preparing pictures")
@@ -106,9 +104,7 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
def get_chunks(pictures):
min_chunk_count = (
multiprocessing.cpu_count() * 2
) # have enough chunks to feed all subprocesses
min_chunk_count = multiprocessing.cpu_count() * 2 # have enough chunks to feed all subprocesses
chunk_count = len(pictures) // DEFAULT_CHUNK_SIZE
chunk_count = max(min_chunk_count, chunk_count)
chunk_size = (len(pictures) // chunk_count) + 1
@@ -185,9 +181,7 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
j.set_progress(comparison_count, progress_msg)
j = j.start_subjob([3, 7])
pictures = prepare_pictures(
pictures, cache_path, with_dimensions=not match_scaled, j=j
)
pictures = prepare_pictures(pictures, cache_path, with_dimensions=not match_scaled, j=j)
j = j.start_subjob([9, 1], tr("Preparing for matching"))
cache = get_cache(cache_path)
id2picture = {}
@@ -231,12 +225,8 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
chunks,
pictures,
) # some wiggle room for the next statements
logging.warning(
"Ran out of memory when scanning! We had %d matches.", len(matches)
)
del matches[
-len(matches) // 3 :
] # some wiggle room to ensure we don't run out of memory again.
logging.warning("Ran out of memory when scanning! We had %d matches.", len(matches))
del matches[-len(matches) // 3 :] # some wiggle room to ensure we don't run out of memory again.
pool.close()
result = []
myiter = j.iter_with_progress(

View File

@@ -87,11 +87,7 @@ class Scanner:
if self.size_threshold:
files = [f for f in files if f.size >= self.size_threshold]
if self.scan_type in {ScanType.Contents, ScanType.Folders}:
return engine.getmatches_by_contents(
files,
bigsize=self.big_file_size_threshold,
j=j
)
return engine.getmatches_by_contents(files, bigsize=self.big_file_size_threshold, j=j)
else:
j = j.start_subjob([2, 8])
kw = {}
@@ -165,27 +161,13 @@ class Scanner:
toremove.add(p)
else:
last_parent_path = p
matches = [
m
for m in matches
if m.first.path not in toremove or m.second.path not in toremove
]
matches = [m for m in matches if m.first.path not in toremove or m.second.path not in toremove]
if not self.mix_file_kind:
matches = [
m
for m in matches
if get_file_ext(m.first.name) == get_file_ext(m.second.name)
]
matches = [
m for m in matches if m.first.path.exists() and m.second.path.exists()
]
matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)]
matches = [m for m in matches if m.first.path.exists() and m.second.path.exists()]
matches = [m for m in matches if not (m.first.is_ref and m.second.is_ref)]
if ignore_list:
matches = [
m
for m in matches
if not ignore_list.AreIgnored(str(m.first.path), str(m.second.path))
]
matches = [m for m in matches if not ignore_list.AreIgnored(str(m.first.path), str(m.second.path))]
logging.info("Grouping matches")
groups = engine.get_groups(matches)
if self.scan_type in {
@@ -194,9 +176,7 @@ class Scanner:
ScanType.FieldsNoOrder,
ScanType.Tag,
}:
matched_files = dedupe(
[m.first for m in matches] + [m.second for m in matches]
)
matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups)
else:
# Ticket #195

View File

@@ -29,9 +29,7 @@ def add_fake_files_to_directories(directories, files):
class TestCaseDupeGuru:
def test_apply_filter_calls_results_apply_filter(self, monkeypatch):
dgapp = TestApp().app
monkeypatch.setattr(
dgapp.results, "apply_filter", log_calls(dgapp.results.apply_filter)
)
monkeypatch.setattr(dgapp.results, "apply_filter", log_calls(dgapp.results.apply_filter))
dgapp.apply_filter("foo")
eq_(2, len(dgapp.results.apply_filter.calls))
call = dgapp.results.apply_filter.calls[0]
@@ -41,15 +39,11 @@ class TestCaseDupeGuru:
def test_apply_filter_escapes_regexp(self, monkeypatch):
dgapp = TestApp().app
monkeypatch.setattr(
dgapp.results, "apply_filter", log_calls(dgapp.results.apply_filter)
)
monkeypatch.setattr(dgapp.results, "apply_filter", log_calls(dgapp.results.apply_filter))
dgapp.apply_filter("()[]\\.|+?^abc")
call = dgapp.results.apply_filter.calls[1]
eq_("\\(\\)\\[\\]\\\\\\.\\|\\+\\?\\^abc", call["filter_str"])
dgapp.apply_filter(
"(*)"
) # In "simple mode", we want the * to behave as a wilcard
dgapp.apply_filter("(*)") # In "simple mode", we want the * to behave as a wilcard
call = dgapp.results.apply_filter.calls[3]
eq_(r"\(.*\)", call["filter_str"])
dgapp.options["escape_filter_regexp"] = False
@@ -70,9 +64,7 @@ class TestCaseDupeGuru:
)
# XXX This monkeypatch is temporary. will be fixed in a better monkeypatcher.
monkeypatch.setattr(app, "smart_copy", hscommon.conflict.smart_copy)
monkeypatch.setattr(
os, "makedirs", lambda path: None
) # We don't want the test to create that fake directory
monkeypatch.setattr(os, "makedirs", lambda path: None) # We don't want the test to create that fake directory
dgapp = TestApp().app
dgapp.directories.add_path(p)
[f] = dgapp.directories.get_files()
@@ -320,9 +312,7 @@ class TestCaseDupeGuruWithResults:
assert groups[0].ref is objects[1]
assert groups[1].ref is objects[4]
def test_makeSelectedReference_by_selecting_two_dupes_in_the_same_group(
self, do_setup
):
def test_makeSelectedReference_by_selecting_two_dupes_in_the_same_group(self, do_setup):
app = self.app
objects = self.objects
groups = self.groups
@@ -404,9 +394,7 @@ class TestCaseDupeGuruWithResults:
# results table.
app = self.app
app.JOB = Job(1, lambda *args, **kw: False) # Cancels the task
add_fake_files_to_directories(
app.directories, self.objects
) # We want the scan to at least start
add_fake_files_to_directories(app.directories, self.objects) # We want the scan to at least start
app.start_scanning() # will be cancelled immediately
eq_(len(app.result_table), 0)

View File

@@ -140,9 +140,7 @@ def GetTestGroups():
matches = engine.getmatches(objects) # we should have 5 matches
groups = engine.get_groups(matches) # We should have 2 groups
for g in groups:
g.prioritize(
lambda x: objects.index(x)
) # We want the dupes to be in the same order as the list is
g.prioritize(lambda x: objects.index(x)) # We want the dupes to be in the same order as the list is
groups.sort(key=len, reverse=True) # We want the group with 3 members to be first.
return (objects, matches, groups)

View File

@@ -14,9 +14,7 @@ except ImportError:
skip("Can't import the block module, probably hasn't been compiled.")
def my_avgdiff(
first, second, limit=768, min_iter=3
): # this is so I don't have to re-write every call
def my_avgdiff(first, second, limit=768, min_iter=3): # this is so I don't have to re-write every call
return avgdiff(first, second, limit, min_iter)

View File

@@ -254,7 +254,12 @@ def test_invalid_path():
def test_set_state_on_invalid_path():
d = Directories()
try:
d.set_state(Path("foobar",), DirectoryState.Normal)
d.set_state(
Path(
"foobar",
),
DirectoryState.Normal,
)
except LookupError:
assert False
@@ -345,15 +350,17 @@ def test_default_path_state_override(tmpdir):
eq_(len(list(d.get_files())), 2)
class TestExcludeList():
class TestExcludeList:
def setup_method(self, method):
self.d = Directories(exclude_list=ExcludeList(union_regex=False))
def get_files_and_expect_num_result(self, num_result):
"""Calls get_files(), get the filenames only, print for debugging.
num_result is how many files are expected as a result."""
print(f"EXCLUDED REGEX: paths {self.d._exclude_list.compiled_paths} \
files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled}")
print(
f"EXCLUDED REGEX: paths {self.d._exclude_list.compiled_paths} \
files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled}"
)
files = list(self.d.get_files())
files = [file.name for file in files]
print(f"FINAL FILES {files}")

View File

@@ -5,6 +5,7 @@
# http://www.gnu.org/licenses/gpl-3.0.html
import io
# import os.path as op
from xml.etree import ElementTree as ET
@@ -104,7 +105,7 @@ class TestCaseListEmpty:
regex1 = r"one"
regex2 = r"two"
self.exclude_list.add(regex1)
assert(regex1 in self.exclude_list)
assert regex1 in self.exclude_list
self.exclude_list.add(regex2)
self.exclude_list.mark(regex1)
self.exclude_list.mark(regex2)
@@ -113,17 +114,17 @@ class TestCaseListEmpty:
compiled_files = [x for x in self.exclude_list.compiled_files]
eq_(len(compiled_files), 2)
self.exclude_list.remove(regex2)
assert(regex2 not in self.exclude_list)
assert regex2 not in self.exclude_list
eq_(len(self.exclude_list), 1)
def test_add_duplicate(self):
self.exclude_list.add(r"one")
eq_(1 , len(self.exclude_list))
eq_(1, len(self.exclude_list))
try:
self.exclude_list.add(r"one")
except Exception:
pass
eq_(1 , len(self.exclude_list))
eq_(1, len(self.exclude_list))
def test_add_not_compilable(self):
# Trying to add a non-valid regex should not work and raise exception
@@ -230,13 +231,14 @@ class TestCaseListEmpty:
if compiled_re.pattern == re:
found = True
if not found:
raise(Exception(f"Default RE {re} not found in compiled list."))
raise (Exception(f"Default RE {re} not found in compiled list."))
continue
eq_(len(default_regexes), len(self.exclude_list.compiled))
class TestCaseListEmptyUnion(TestCaseListEmpty):
"""Same but with union regex"""
def setup_method(self, method):
self.app = DupeGuru()
self.app.exclude_list = ExcludeList(union_regex=True)
@@ -246,7 +248,7 @@ class TestCaseListEmptyUnion(TestCaseListEmpty):
regex1 = r"one"
regex2 = r"two"
self.exclude_list.add(regex1)
assert(regex1 in self.exclude_list)
assert regex1 in self.exclude_list
self.exclude_list.add(regex2)
self.exclude_list.mark(regex1)
self.exclude_list.mark(regex2)
@@ -256,7 +258,7 @@ class TestCaseListEmptyUnion(TestCaseListEmpty):
eq_(len(compiled_files), 1) # Two patterns joined together into one
assert "|" in compiled_files[0].pattern
self.exclude_list.remove(regex2)
assert(regex2 not in self.exclude_list)
assert regex2 not in self.exclude_list
eq_(len(self.exclude_list), 1)
def test_rename_regex_file_to_path(self):
@@ -296,14 +298,15 @@ class TestCaseListEmptyUnion(TestCaseListEmpty):
compiled = [x for x in self.exclude_list.compiled]
assert regex not in compiled
# Need to escape both to get the same strings after compilation
compiled_escaped = set([x.encode('unicode-escape').decode() for x in compiled[0].pattern.split("|")])
default_escaped = set([x.encode('unicode-escape').decode() for x in default_regexes])
compiled_escaped = set([x.encode("unicode-escape").decode() for x in compiled[0].pattern.split("|")])
default_escaped = set([x.encode("unicode-escape").decode() for x in default_regexes])
assert compiled_escaped == default_escaped
eq_(len(default_regexes), len(compiled[0].pattern.split("|")))
class TestCaseDictEmpty(TestCaseListEmpty):
"""Same, but with dictionary implementation"""
def setup_method(self, method):
self.app = DupeGuru()
self.app.exclude_list = ExcludeDict(union_regex=False)
@@ -312,6 +315,7 @@ class TestCaseDictEmpty(TestCaseListEmpty):
class TestCaseDictEmptyUnion(TestCaseDictEmpty):
"""Same, but with union regex"""
def setup_method(self, method):
self.app = DupeGuru()
self.app.exclude_list = ExcludeDict(union_regex=True)
@@ -321,7 +325,7 @@ class TestCaseDictEmptyUnion(TestCaseDictEmpty):
regex1 = r"one"
regex2 = r"two"
self.exclude_list.add(regex1)
assert(regex1 in self.exclude_list)
assert regex1 in self.exclude_list
self.exclude_list.add(regex2)
self.exclude_list.mark(regex1)
self.exclude_list.mark(regex2)
@@ -331,7 +335,7 @@ class TestCaseDictEmptyUnion(TestCaseDictEmpty):
# two patterns joined into one
eq_(len(compiled_files), 1)
self.exclude_list.remove(regex2)
assert(regex2 not in self.exclude_list)
assert regex2 not in self.exclude_list
eq_(len(self.exclude_list), 1)
def test_rename_regex_file_to_path(self):
@@ -371,8 +375,8 @@ class TestCaseDictEmptyUnion(TestCaseDictEmpty):
compiled = [x for x in self.exclude_list.compiled]
assert regex not in compiled
# Need to escape both to get the same strings after compilation
compiled_escaped = set([x.encode('unicode-escape').decode() for x in compiled[0].pattern.split("|")])
default_escaped = set([x.encode('unicode-escape').decode() for x in default_regexes])
compiled_escaped = set([x.encode("unicode-escape").decode() for x in compiled[0].pattern.split("|")])
default_escaped = set([x.encode("unicode-escape").decode() for x in default_regexes])
assert compiled_escaped == default_escaped
eq_(len(default_regexes), len(compiled[0].pattern.split("|")))
@@ -382,8 +386,9 @@ def split_union(pattern_object):
return [x for x in pattern_object.pattern.split("|")]
class TestCaseCompiledList():
class TestCaseCompiledList:
"""Test consistency between union or and separate versions."""
def setup_method(self, method):
self.e_separate = ExcludeList(union_regex=False)
self.e_separate.restore_defaults()
@@ -431,6 +436,7 @@ class TestCaseCompiledList():
class TestCaseCompiledDict(TestCaseCompiledList):
"""Test the dictionary version"""
def setup_method(self, method):
self.e_separate = ExcludeDict(union_regex=False)
self.e_separate.restore_defaults()

View File

@@ -73,9 +73,7 @@ def test_save_to_xml():
eq_(len(root), 2)
eq_(len([c for c in root if c.tag == "file"]), 2)
f1, f2 = root[:]
subchildren = [c for c in f1 if c.tag == "file"] + [
c for c in f2 if c.tag == "file"
]
subchildren = [c for c in f1 if c.tag == "file"] + [c for c in f2 if c.tag == "file"]
eq_(len(subchildren), 3)
@@ -96,9 +94,7 @@ def test_SaveThenLoad():
def test_LoadXML_with_empty_file_tags():
f = io.BytesIO()
f.write(
b'<?xml version="1.0" encoding="utf-8"?><ignore_list><file><file/></file></ignore_list>'
)
f.write(b'<?xml version="1.0" encoding="utf-8"?><ignore_list><file><file/></file></ignore_list>')
f.seek(0)
il = IgnoreList()
il.load_from_xml(f)

View File

@@ -117,9 +117,7 @@ class TestCaseResultsWithSomeGroups:
assert d is g.ref
def test_sort_groups(self):
self.results.make_ref(
self.objects[1]
) # We want to make the 1024 sized object to go ref.
self.results.make_ref(self.objects[1]) # We want to make the 1024 sized object to go ref.
g1, g2 = self.groups
self.results.sort_groups("size")
assert self.results.groups[0] is g2
@@ -129,9 +127,7 @@ class TestCaseResultsWithSomeGroups:
assert self.results.groups[1] is g2
def test_set_groups_when_sorted(self):
self.results.make_ref(
self.objects[1]
) # We want to make the 1024 sized object to go ref.
self.results.make_ref(self.objects[1]) # We want to make the 1024 sized object to go ref.
self.results.sort_groups("size")
objects, matches, groups = GetTestGroups()
g1, g2 = groups
@@ -601,9 +597,7 @@ class TestCaseResultsXML:
matches = engine.getmatches(objects) # we should have 5 matches
groups = engine.get_groups(matches) # We should have 2 groups
for g in groups:
g.prioritize(
lambda x: objects.index(x)
) # We want the dupes to be in the same order as the list is
g.prioritize(lambda x: objects.index(x)) # We want the dupes to be in the same order as the list is
app = DupeGuru()
results = Results(app)
results.groups = groups
@@ -807,9 +801,7 @@ class TestCaseResultsFilter:
# Now the stats should display *2* markable dupes (instead of 1)
expected = "0 / 2 (0.00 B / 2.00 B) duplicates marked. filter: foo"
eq_(expected, self.results.stat_line)
self.results.apply_filter(
None
) # Now let's make sure our unfiltered results aren't fucked up
self.results.apply_filter(None) # Now let's make sure our unfiltered results aren't fucked up
expected = "0 / 3 (0.00 B / 3.00 B) duplicates marked."
eq_(expected, self.results.stat_line)

View File

@@ -150,8 +150,7 @@ def test_big_file_partial_hashes(fake_fileexists):
bigsize = 100 * 1024 * 1024 # 100MB
s.big_file_size_threshold = bigsize
f = [no("bigfoo", bigsize), no("bigbar", bigsize),
no("smallfoo", smallsize), no("smallbar", smallsize)]
f = [no("bigfoo", bigsize), no("bigbar", bigsize), no("smallfoo", smallsize), no("smallbar", smallsize)]
f[0].md5 = f[0].md5partial = f[0].md5samples = "foobar"
f[1].md5 = f[1].md5partial = f[1].md5samples = "foobar"
f[2].md5 = f[2].md5partial = "bleh"
@@ -193,10 +192,8 @@ def test_content_scan_doesnt_put_md5_in_words_at_the_end(fake_fileexists):
s = Scanner()
s.scan_type = ScanType.Contents
f = [no("foo"), no("bar")]
f[0].md5 = f[0].md5partial = f[0].md5samples =\
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
f[1].md5 = f[1].md5partial = f[1].md5samples =\
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
f[0].md5 = f[0].md5partial = f[0].md5samples = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
f[1].md5 = f[1].md5partial = f[1].md5samples = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
r = s.get_dupe_groups(f)
# FIXME looks like we are missing something here?
r[0]