Format files with black

- Format all files with black
- Update tox.ini flake8 arguments to be compatible
- Add black to requirements-extra.txt
- Reduce ignored flake8 rules and fix a few violations
This commit is contained in:
Andrew Senetar 2019-12-31 20:16:27 -06:00
parent 359d6498f7
commit 7ba8aa3514
Signed by: arsenetar
GPG Key ID: C63300DCE48AB2F1
141 changed files with 5241 additions and 3648 deletions

143
build.py
View File

@ -13,129 +13,165 @@ from setuptools import setup, Extension
from hscommon import sphinxgen
from hscommon.build import (
add_to_pythonpath, print_and_do, move_all, fix_qt_resource_file,
add_to_pythonpath,
print_and_do,
move_all,
fix_qt_resource_file,
)
from hscommon import loc
def parse_args():
usage = "usage: %prog [options]"
parser = OptionParser(usage=usage)
parser.add_option(
'--clean', action='store_true', dest='clean',
help="Clean build folder before building"
"--clean",
action="store_true",
dest="clean",
help="Clean build folder before building",
)
parser.add_option(
'--doc', action='store_true', dest='doc',
help="Build only the help file"
"--doc", action="store_true", dest="doc", help="Build only the help file"
)
parser.add_option(
'--loc', action='store_true', dest='loc',
help="Build only localization"
"--loc", action="store_true", dest="loc", help="Build only localization"
)
parser.add_option(
'--updatepot', action='store_true', dest='updatepot',
help="Generate .pot files from source code."
"--updatepot",
action="store_true",
dest="updatepot",
help="Generate .pot files from source code.",
)
parser.add_option(
'--mergepot', action='store_true', dest='mergepot',
help="Update all .po files based on .pot files."
"--mergepot",
action="store_true",
dest="mergepot",
help="Update all .po files based on .pot files.",
)
parser.add_option(
'--normpo', action='store_true', dest='normpo',
help="Normalize all PO files (do this before commit)."
"--normpo",
action="store_true",
dest="normpo",
help="Normalize all PO files (do this before commit).",
)
(options, args) = parser.parse_args()
return options
def build_help():
print("Generating Help")
current_path = op.abspath('.')
help_basepath = op.join(current_path, 'help', 'en')
help_destpath = op.join(current_path, 'build', 'help')
changelog_path = op.join(current_path, 'help', 'changelog')
current_path = op.abspath(".")
help_basepath = op.join(current_path, "help", "en")
help_destpath = op.join(current_path, "build", "help")
changelog_path = op.join(current_path, "help", "changelog")
tixurl = "https://github.com/hsoft/dupeguru/issues/{}"
confrepl = {'language': 'en'}
changelogtmpl = op.join(current_path, 'help', 'changelog.tmpl')
conftmpl = op.join(current_path, 'help', 'conf.tmpl')
sphinxgen.gen(help_basepath, help_destpath, changelog_path, tixurl, confrepl, conftmpl, changelogtmpl)
confrepl = {"language": "en"}
changelogtmpl = op.join(current_path, "help", "changelog.tmpl")
conftmpl = op.join(current_path, "help", "conf.tmpl")
sphinxgen.gen(
help_basepath,
help_destpath,
changelog_path,
tixurl,
confrepl,
conftmpl,
changelogtmpl,
)
def build_qt_localizations():
loc.compile_all_po(op.join('qtlib', 'locale'))
loc.merge_locale_dir(op.join('qtlib', 'locale'), 'locale')
loc.compile_all_po(op.join("qtlib", "locale"))
loc.merge_locale_dir(op.join("qtlib", "locale"), "locale")
def build_localizations():
loc.compile_all_po('locale')
loc.compile_all_po("locale")
build_qt_localizations()
locale_dest = op.join('build', 'locale')
locale_dest = op.join("build", "locale")
if op.exists(locale_dest):
shutil.rmtree(locale_dest)
shutil.copytree('locale', locale_dest, ignore=shutil.ignore_patterns('*.po', '*.pot'))
shutil.copytree(
"locale", locale_dest, ignore=shutil.ignore_patterns("*.po", "*.pot")
)
def build_updatepot():
print("Building .pot files from source files")
print("Building core.pot")
loc.generate_pot(['core'], op.join('locale', 'core.pot'), ['tr'])
loc.generate_pot(["core"], op.join("locale", "core.pot"), ["tr"])
print("Building columns.pot")
loc.generate_pot(['core'], op.join('locale', 'columns.pot'), ['coltr'])
loc.generate_pot(["core"], op.join("locale", "columns.pot"), ["coltr"])
print("Building ui.pot")
# When we're not under OS X, we don't want to overwrite ui.pot because it contains Cocoa locs
# We want to merge the generated pot with the old pot in the most preserving way possible.
ui_packages = ['qt', op.join('cocoa', 'inter')]
loc.generate_pot(ui_packages, op.join('locale', 'ui.pot'), ['tr'], merge=True)
ui_packages = ["qt", op.join("cocoa", "inter")]
loc.generate_pot(ui_packages, op.join("locale", "ui.pot"), ["tr"], merge=True)
print("Building qtlib.pot")
loc.generate_pot(['qtlib'], op.join('qtlib', 'locale', 'qtlib.pot'), ['tr'])
loc.generate_pot(["qtlib"], op.join("qtlib", "locale", "qtlib.pot"), ["tr"])
def build_mergepot():
print("Updating .po files using .pot files")
loc.merge_pots_into_pos('locale')
loc.merge_pots_into_pos(op.join('qtlib', 'locale'))
loc.merge_pots_into_pos(op.join('cocoalib', 'locale'))
loc.merge_pots_into_pos("locale")
loc.merge_pots_into_pos(op.join("qtlib", "locale"))
loc.merge_pots_into_pos(op.join("cocoalib", "locale"))
def build_normpo():
loc.normalize_all_pos('locale')
loc.normalize_all_pos(op.join('qtlib', 'locale'))
loc.normalize_all_pos(op.join('cocoalib', 'locale'))
loc.normalize_all_pos("locale")
loc.normalize_all_pos(op.join("qtlib", "locale"))
loc.normalize_all_pos(op.join("cocoalib", "locale"))
def build_pe_modules():
print("Building PE Modules")
exts = [
Extension(
"_block",
[op.join('core', 'pe', 'modules', 'block.c'), op.join('core', 'pe', 'modules', 'common.c')]
[
op.join("core", "pe", "modules", "block.c"),
op.join("core", "pe", "modules", "common.c"),
],
),
Extension(
"_cache",
[op.join('core', 'pe', 'modules', 'cache.c'), op.join('core', 'pe', 'modules', 'common.c')]
[
op.join("core", "pe", "modules", "cache.c"),
op.join("core", "pe", "modules", "common.c"),
],
),
]
exts.append(Extension("_block_qt", [op.join('qt', 'pe', 'modules', 'block.c')]))
exts.append(Extension("_block_qt", [op.join("qt", "pe", "modules", "block.c")]))
setup(
script_args=['build_ext', '--inplace'],
ext_modules=exts,
script_args=["build_ext", "--inplace"], ext_modules=exts,
)
move_all('_block_qt*', op.join('qt', 'pe'))
move_all('_block*', op.join('core', 'pe'))
move_all('_cache*', op.join('core', 'pe'))
move_all("_block_qt*", op.join("qt", "pe"))
move_all("_block*", op.join("core", "pe"))
move_all("_cache*", op.join("core", "pe"))
def build_normal():
print("Building dupeGuru with UI qt")
add_to_pythonpath('.')
add_to_pythonpath(".")
print("Building dupeGuru")
build_pe_modules()
print("Building localizations")
build_localizations()
print("Building Qt stuff")
print_and_do("pyrcc5 {0} > {1}".format(op.join('qt', 'dg.qrc'), op.join('qt', 'dg_rc.py')))
fix_qt_resource_file(op.join('qt', 'dg_rc.py'))
print_and_do(
"pyrcc5 {0} > {1}".format(op.join("qt", "dg.qrc"), op.join("qt", "dg_rc.py"))
)
fix_qt_resource_file(op.join("qt", "dg_rc.py"))
build_help()
def main():
options = parse_args()
if options.clean:
if op.exists('build'):
shutil.rmtree('build')
if not op.exists('build'):
os.mkdir('build')
if op.exists("build"):
shutil.rmtree("build")
if not op.exists("build"):
os.mkdir("build")
if options.doc:
build_help()
elif options.loc:
@ -149,5 +185,6 @@ def main():
else:
build_normal()
if __name__ == '__main__':
if __name__ == "__main__":
main()

View File

@ -1,3 +1,2 @@
__version__ = '4.0.4'
__appname__ = 'dupeGuru'
__version__ = "4.0.4"
__appname__ = "dupeGuru"

View File

@ -34,8 +34,8 @@ from .gui.ignore_list_dialog import IgnoreListDialog
from .gui.problem_dialog import ProblemDialog
from .gui.stats_label import StatsLabel
HAD_FIRST_LAUNCH_PREFERENCE = 'HadFirstLaunch'
DEBUG_MODE_PREFERENCE = 'DebugMode'
HAD_FIRST_LAUNCH_PREFERENCE = "HadFirstLaunch"
DEBUG_MODE_PREFERENCE = "DebugMode"
MSG_NO_MARKED_DUPES = tr("There are no marked duplicates. Nothing has been done.")
MSG_NO_SELECTED_DUPES = tr("There are no selected duplicates. Nothing has been done.")
@ -44,23 +44,27 @@ MSG_MANY_FILES_TO_OPEN = tr(
"files are opened with, doing so can create quite a mess. Continue?"
)
class DestType:
Direct = 0
Relative = 1
Absolute = 2
class JobType:
Scan = 'job_scan'
Load = 'job_load'
Move = 'job_move'
Copy = 'job_copy'
Delete = 'job_delete'
Scan = "job_scan"
Load = "job_load"
Move = "job_move"
Copy = "job_copy"
Delete = "job_delete"
class AppMode:
Standard = 0
Music = 1
Picture = 2
JOBID2TITLE = {
JobType.Scan: tr("Scanning for duplicates"),
JobType.Load: tr("Loading"),
@ -69,6 +73,7 @@ JOBID2TITLE = {
JobType.Delete: tr("Sending to Trash"),
}
class DupeGuru(Broadcaster):
"""Holds everything together.
@ -100,7 +105,8 @@ class DupeGuru(Broadcaster):
Instance of :mod:`meta-gui <core.gui>` table listing the results from :attr:`results`
"""
#--- View interface
# --- View interface
# get_default(key_name)
# set_default(key_name, value)
# show_message(msg)
@ -116,7 +122,7 @@ class DupeGuru(Broadcaster):
NAME = PROMPT_NAME = "dupeGuru"
PICTURE_CACHE_TYPE = 'sqlite' # set to 'shelve' for a ShelveCache
PICTURE_CACHE_TYPE = "sqlite" # set to 'shelve' for a ShelveCache
def __init__(self, view):
if view.get_default(DEBUG_MODE_PREFERENCE):
@ -124,7 +130,9 @@ class DupeGuru(Broadcaster):
logging.debug("Debug mode enabled")
Broadcaster.__init__(self)
self.view = view
self.appdata = desktop.special_folder_path(desktop.SpecialFolder.AppData, appname=self.NAME)
self.appdata = desktop.special_folder_path(
desktop.SpecialFolder.AppData, appname=self.NAME
)
if not op.exists(self.appdata):
os.makedirs(self.appdata)
self.app_mode = AppMode.Standard
@ -136,11 +144,11 @@ class DupeGuru(Broadcaster):
# sent to the scanner. They don't have default values because those defaults values are
# defined in the scanner class.
self.options = {
'escape_filter_regexp': True,
'clean_empty_dirs': False,
'ignore_hardlink_matches': False,
'copymove_dest_type': DestType.Relative,
'picture_cache_type': self.PICTURE_CACHE_TYPE
"escape_filter_regexp": True,
"clean_empty_dirs": False,
"ignore_hardlink_matches": False,
"copymove_dest_type": DestType.Relative,
"picture_cache_type": self.PICTURE_CACHE_TYPE,
}
self.selected_dupes = []
self.details_panel = DetailsPanel(self)
@ -155,7 +163,7 @@ class DupeGuru(Broadcaster):
for child in children:
child.connect()
#--- Private
# --- Private
def _recreate_result_table(self):
if self.result_table is not None:
self.result_table.disconnect()
@ -169,26 +177,30 @@ class DupeGuru(Broadcaster):
self.view.create_results_window()
def _get_picture_cache_path(self):
cache_type = self.options['picture_cache_type']
cache_name = 'cached_pictures.shelve' if cache_type == 'shelve' else 'cached_pictures.db'
cache_type = self.options["picture_cache_type"]
cache_name = (
"cached_pictures.shelve" if cache_type == "shelve" else "cached_pictures.db"
)
return op.join(self.appdata, cache_name)
def _get_dupe_sort_key(self, dupe, get_group, key, delta):
if self.app_mode in (AppMode.Music, AppMode.Picture):
if key == 'folder_path':
dupe_folder_path = getattr(dupe, 'display_folder_path', dupe.folder_path)
if key == "folder_path":
dupe_folder_path = getattr(
dupe, "display_folder_path", dupe.folder_path
)
return str(dupe_folder_path).lower()
if self.app_mode == AppMode.Picture:
if delta and key == 'dimensions':
if delta and key == "dimensions":
r = cmp_value(dupe, key)
ref_value = cmp_value(get_group().ref, key)
return get_delta_dimensions(r, ref_value)
if key == 'marked':
if key == "marked":
return self.results.is_marked(dupe)
if key == 'percentage':
if key == "percentage":
m = get_group().get_match_of(dupe)
return m.percentage
elif key == 'dupe_count':
elif key == "dupe_count":
return 0
else:
result = cmp_value(dupe, key)
@ -203,21 +215,25 @@ class DupeGuru(Broadcaster):
def _get_group_sort_key(self, group, key):
if self.app_mode in (AppMode.Music, AppMode.Picture):
if key == 'folder_path':
dupe_folder_path = getattr(group.ref, 'display_folder_path', group.ref.folder_path)
if key == "folder_path":
dupe_folder_path = getattr(
group.ref, "display_folder_path", group.ref.folder_path
)
return str(dupe_folder_path).lower()
if key == 'percentage':
if key == "percentage":
return group.percentage
if key == 'dupe_count':
if key == "dupe_count":
return len(group)
if key == 'marked':
if key == "marked":
return len([dupe for dupe in group.dupes if self.results.is_marked(dupe)])
return cmp_value(group.ref, key)
def _do_delete(self, j, link_deleted, use_hardlinks, direct_deletion):
def op(dupe):
j.add_progress()
return self._do_delete_dupe(dupe, link_deleted, use_hardlinks, direct_deletion)
return self._do_delete_dupe(
dupe, link_deleted, use_hardlinks, direct_deletion
)
j.start_job(self.results.mark_count)
self.results.perform_on_marked(op, True)
@ -233,7 +249,7 @@ class DupeGuru(Broadcaster):
else:
os.remove(str_path)
else:
send2trash(str_path) # Raises OSError when there's a problem
send2trash(str_path) # Raises OSError when there's a problem
if link_deleted:
group = self.results.get_group_of_duplicate(dupe)
ref = group.ref
@ -258,8 +274,9 @@ class DupeGuru(Broadcaster):
def _get_export_data(self):
columns = [
col for col in self.result_table.columns.ordered_columns
if col.visible and col.name != 'marked'
col
for col in self.result_table.columns.ordered_columns
if col.visible and col.name != "marked"
]
colnames = [col.display for col in columns]
rows = []
@ -273,10 +290,11 @@ class DupeGuru(Broadcaster):
def _results_changed(self):
self.selected_dupes = [
d for d in self.selected_dupes
d
for d in self.selected_dupes
if self.results.get_group_of_duplicate(d) is not None
]
self.notify('results_changed')
self.notify("results_changed")
def _start_job(self, jobid, func, args=()):
title = JOBID2TITLE[jobid]
@ -310,7 +328,9 @@ class DupeGuru(Broadcaster):
msg = {
JobType.Copy: tr("All marked files were copied successfully."),
JobType.Move: tr("All marked files were moved successfully."),
JobType.Delete: tr("All marked files were successfully sent to Trash."),
JobType.Delete: tr(
"All marked files were successfully sent to Trash."
),
}[jobid]
self.view.show_message(msg)
@ -341,9 +361,9 @@ class DupeGuru(Broadcaster):
if dupes == self.selected_dupes:
return
self.selected_dupes = dupes
self.notify('dupes_selected')
self.notify("dupes_selected")
#--- Protected
# --- Protected
def _get_fileclasses(self):
if self.app_mode == AppMode.Picture:
return [pe.photo.PLAT_SPECIFIC_PHOTO_CLASS]
@ -360,7 +380,7 @@ class DupeGuru(Broadcaster):
else:
return prioritize.all_categories()
#--- Public
# --- Public
def add_directory(self, d):
"""Adds folder ``d`` to :attr:`directories`.
@ -370,7 +390,7 @@ class DupeGuru(Broadcaster):
"""
try:
self.directories.add_path(Path(d))
self.notify('directories_changed')
self.notify("directories_changed")
except directories.AlreadyThereError:
self.view.show_message(tr("'{}' already is in the list.").format(d))
except directories.InvalidPathError:
@ -383,7 +403,9 @@ class DupeGuru(Broadcaster):
if not dupes:
self.view.show_message(MSG_NO_SELECTED_DUPES)
return
msg = tr("All selected %d matches are going to be ignored in all subsequent scans. Continue?")
msg = tr(
"All selected %d matches are going to be ignored in all subsequent scans. Continue?"
)
if not self.view.ask_yes_no(msg % len(dupes)):
return
for dupe in dupes:
@ -400,22 +422,22 @@ class DupeGuru(Broadcaster):
:param str filter: filter to apply
"""
self.results.apply_filter(None)
if self.options['escape_filter_regexp']:
filter = escape(filter, set('()[]\\.|+?^'))
filter = escape(filter, '*', '.')
if self.options["escape_filter_regexp"]:
filter = escape(filter, set("()[]\\.|+?^"))
filter = escape(filter, "*", ".")
self.results.apply_filter(filter)
self._results_changed()
def clean_empty_dirs(self, path):
if self.options['clean_empty_dirs']:
while delete_if_empty(path, ['.DS_Store']):
if self.options["clean_empty_dirs"]:
while delete_if_empty(path, [".DS_Store"]):
path = path.parent()
def clear_picture_cache(self):
try:
os.remove(self._get_picture_cache_path())
except FileNotFoundError:
pass # we don't care
pass # we don't care
def copy_or_move(self, dupe, copy: bool, destination: str, dest_type: DestType):
source_path = dupe.path
@ -444,6 +466,7 @@ class DupeGuru(Broadcaster):
:param bool copy: If True, duplicates will be copied instead of moved
"""
def do(j):
def op(dupe):
j.add_progress()
@ -459,7 +482,7 @@ class DupeGuru(Broadcaster):
prompt = tr("Select a directory to {} marked files to").format(opname)
destination = self.view.select_dest_folder(prompt)
if destination:
desttype = self.options['copymove_dest_type']
desttype = self.options["copymove_dest_type"]
jobid = JobType.Copy if copy else JobType.Move
self._start_job(jobid, do)
@ -472,8 +495,9 @@ class DupeGuru(Broadcaster):
if not self.deletion_options.show(self.results.mark_count):
return
args = [
self.deletion_options.link_deleted, self.deletion_options.use_hardlinks,
self.deletion_options.direct
self.deletion_options.link_deleted,
self.deletion_options.use_hardlinks,
self.deletion_options.direct,
]
logging.debug("Starting deletion job with args %r", args)
self._start_job(JobType.Delete, self._do_delete, args=args)
@ -495,7 +519,9 @@ class DupeGuru(Broadcaster):
The columns and their order in the resulting CSV file is determined in the same way as in
:meth:`export_to_xhtml`.
"""
dest_file = self.view.select_dest_file(tr("Select a destination for your exported CSV"), 'csv')
dest_file = self.view.select_dest_file(
tr("Select a destination for your exported CSV"), "csv"
)
if dest_file:
colnames, rows = self._get_export_data()
try:
@ -505,13 +531,16 @@ class DupeGuru(Broadcaster):
def get_display_info(self, dupe, group, delta=False):
def empty_data():
return {c.name: '---' for c in self.result_table.COLUMNS[1:]}
return {c.name: "---" for c in self.result_table.COLUMNS[1:]}
if (dupe is None) or (group is None):
return empty_data()
try:
return dupe.get_display_info(group, delta)
except Exception as e:
logging.warning("Exception on GetDisplayInfo for %s: %s", str(dupe.path), str(e))
logging.warning(
"Exception on GetDisplayInfo for %s: %s", str(dupe.path), str(e)
)
return empty_data()
def invoke_custom_command(self):
@ -521,9 +550,11 @@ class DupeGuru(Broadcaster):
is replaced with that dupe's ref file. If there's no selection, the command is not invoked.
If the dupe is a ref, ``%d`` and ``%r`` will be the same.
"""
cmd = self.view.get_default('CustomCommand')
cmd = self.view.get_default("CustomCommand")
if not cmd:
msg = tr("You have no custom command set up. Set it up in your preferences.")
msg = tr(
"You have no custom command set up. Set it up in your preferences."
)
self.view.show_message(msg)
return
if not self.selected_dupes:
@ -531,8 +562,8 @@ class DupeGuru(Broadcaster):
dupe = self.selected_dupes[0]
group = self.results.get_group_of_duplicate(dupe)
ref = group.ref
cmd = cmd.replace('%d', str(dupe.path))
cmd = cmd.replace('%r', str(ref.path))
cmd = cmd.replace("%d", str(dupe.path))
cmd = cmd.replace("%r", str(ref.path))
match = re.match(r'"([^"]+)"(.*)', cmd)
if match is not None:
# This code here is because subprocess. Popen doesn't seem to accept, under Windows,
@ -551,9 +582,9 @@ class DupeGuru(Broadcaster):
is persistent data, is the same as when the last session was closed (when :meth:`save` was
called).
"""
self.directories.load_from_file(op.join(self.appdata, 'last_directories.xml'))
self.notify('directories_changed')
p = op.join(self.appdata, 'ignore_list.xml')
self.directories.load_from_file(op.join(self.appdata, "last_directories.xml"))
self.notify("directories_changed")
p = op.join(self.appdata, "ignore_list.xml")
self.ignore_list.load_from_xml(p)
self.ignore_list_dialog.refresh()
@ -562,8 +593,10 @@ class DupeGuru(Broadcaster):
:param str filename: path of the XML file (created with :meth:`save_as`) to load
"""
def do(j):
self.results.load_from_xml(filename, self._get_file, j)
self._start_job(JobType.Load, do)
def make_selected_reference(self):
@ -588,35 +621,36 @@ class DupeGuru(Broadcaster):
if not self.result_table.power_marker:
if changed_groups:
self.selected_dupes = [
d for d in self.selected_dupes
d
for d in self.selected_dupes
if self.results.get_group_of_duplicate(d).ref is d
]
self.notify('results_changed')
self.notify("results_changed")
else:
# If we're in "Dupes Only" mode (previously called Power Marker), things are a bit
# different. The refs are not shown in the table, and if our operation is successful,
# this means that there's no way to follow our dupe selection. Then, the best thing to
# do is to keep our selection index-wise (different dupe selection, but same index
# selection).
self.notify('results_changed_but_keep_selection')
self.notify("results_changed_but_keep_selection")
def mark_all(self):
"""Set all dupes in the results as marked.
"""
self.results.mark_all()
self.notify('marking_changed')
self.notify("marking_changed")
def mark_none(self):
"""Set all dupes in the results as unmarked.
"""
self.results.mark_none()
self.notify('marking_changed')
self.notify("marking_changed")
def mark_invert(self):
"""Invert the marked state of all dupes in the results.
"""
self.results.mark_invert()
self.notify('marking_changed')
self.notify("marking_changed")
def mark_dupe(self, dupe, marked):
"""Change marked status of ``dupe``.
@ -629,7 +663,7 @@ class DupeGuru(Broadcaster):
self.results.mark(dupe)
else:
self.results.unmark(dupe)
self.notify('marking_changed')
self.notify("marking_changed")
def open_selected(self):
"""Open :attr:`selected_dupes` with their associated application.
@ -656,7 +690,7 @@ class DupeGuru(Broadcaster):
indexes = sorted(indexes, reverse=True)
for index in indexes:
del self.directories[index]
self.notify('directories_changed')
self.notify("directories_changed")
except IndexError:
pass
@ -669,7 +703,7 @@ class DupeGuru(Broadcaster):
:type duplicates: list of :class:`~core.fs.File`
"""
self.results.remove_duplicates(self.without_ref(duplicates))
self.notify('results_changed_but_keep_selection')
self.notify("results_changed_but_keep_selection")
def remove_marked(self):
"""Removed marked duplicates from the results (without touching the files themselves).
@ -724,7 +758,9 @@ class DupeGuru(Broadcaster):
if group.prioritize(key_func=sort_key):
count += 1
self._results_changed()
msg = tr("{} duplicate groups were changed by the re-prioritization.").format(count)
msg = tr("{} duplicate groups were changed by the re-prioritization.").format(
count
)
self.view.show_message(msg)
def reveal_selected(self):
@ -734,10 +770,10 @@ class DupeGuru(Broadcaster):
def save(self):
if not op.exists(self.appdata):
os.makedirs(self.appdata)
self.directories.save_to_file(op.join(self.appdata, 'last_directories.xml'))
p = op.join(self.appdata, 'ignore_list.xml')
self.directories.save_to_file(op.join(self.appdata, "last_directories.xml"))
p = op.join(self.appdata, "ignore_list.xml")
self.ignore_list.save_to_xml(p)
self.notify('save_session')
self.notify("save_session")
def save_as(self, filename):
"""Save results in ``filename``.
@ -756,7 +792,9 @@ class DupeGuru(Broadcaster):
"""
scanner = self.SCANNER_CLASS()
if not self.directories.has_any_file():
self.view.show_message(tr("The selected directories contain no scannable file."))
self.view.show_message(
tr("The selected directories contain no scannable file.")
)
return
# Send relevant options down to the scanner instance
for k, v in self.options.items():
@ -771,12 +809,16 @@ class DupeGuru(Broadcaster):
def do(j):
j.set_progress(0, tr("Collecting files to scan"))
if scanner.scan_type == ScanType.Folders:
files = list(self.directories.get_folders(folderclass=se.fs.Folder, j=j))
files = list(
self.directories.get_folders(folderclass=se.fs.Folder, j=j)
)
else:
files = list(self.directories.get_files(fileclasses=self.fileclasses, j=j))
if self.options['ignore_hardlink_matches']:
files = list(
self.directories.get_files(fileclasses=self.fileclasses, j=j)
)
if self.options["ignore_hardlink_matches"]:
files = self._remove_hardlink_dupes(files)
logging.info('Scanning %d files' % len(files))
logging.info("Scanning %d files" % len(files))
self.results.groups = scanner.get_dupe_groups(files, self.ignore_list, j)
self.discarded_file_count = scanner.discarded_file_count
@ -792,12 +834,16 @@ class DupeGuru(Broadcaster):
markfunc = self.results.mark
for dupe in selected:
markfunc(dupe)
self.notify('marking_changed')
self.notify("marking_changed")
def without_ref(self, dupes):
"""Returns ``dupes`` with all reference elements removed.
"""
return [dupe for dupe in dupes if self.results.get_group_of_duplicate(dupe).ref is not dupe]
return [
dupe
for dupe in dupes
if self.results.get_group_of_duplicate(dupe).ref is not dupe
]
def get_default(self, key, fallback_value=None):
result = nonone(self.view.get_default(key), fallback_value)
@ -812,7 +858,7 @@ class DupeGuru(Broadcaster):
def set_default(self, key, value):
self.view.set_default(key, value)
#--- Properties
# --- Properties
@property
def stat_line(self):
result = self.results.stat_line
@ -836,12 +882,21 @@ class DupeGuru(Broadcaster):
@property
def METADATA_TO_READ(self):
if self.app_mode == AppMode.Picture:
return ['size', 'mtime', 'dimensions', 'exif_timestamp']
return ["size", "mtime", "dimensions", "exif_timestamp"]
elif self.app_mode == AppMode.Music:
return [
'size', 'mtime', 'duration', 'bitrate', 'samplerate', 'title', 'artist',
'album', 'genre', 'year', 'track', 'comment'
"size",
"mtime",
"duration",
"bitrate",
"samplerate",
"title",
"artist",
"album",
"genre",
"year",
"track",
"comment",
]
else:
return ['size', 'mtime']
return ["size", "mtime"]

View File

@ -15,12 +15,13 @@ from hscommon.util import FileOrPath
from . import fs
__all__ = [
'Directories',
'DirectoryState',
'AlreadyThereError',
'InvalidPathError',
"Directories",
"DirectoryState",
"AlreadyThereError",
"InvalidPathError",
]
class DirectoryState:
"""Enum describing how a folder should be considered.
@ -28,16 +29,20 @@ class DirectoryState:
* DirectoryState.Reference: Scan files, but make sure never to delete any of them
* DirectoryState.Excluded: Don't scan this folder
"""
Normal = 0
Reference = 1
Excluded = 2
class AlreadyThereError(Exception):
"""The path being added is already in the directory list"""
class InvalidPathError(Exception):
"""The path being added is invalid"""
class Directories:
"""Holds user folder selection.
@ -47,7 +52,8 @@ class Directories:
Then, when the user starts the scan, :meth:`get_files` is called to retrieve all files (wrapped
in :mod:`core.fs`) that have to be scanned according to the chosen folders/states.
"""
#---Override
# ---Override
def __init__(self):
self._dirs = []
# {path: state}
@ -68,10 +74,10 @@ class Directories:
def __len__(self):
return len(self._dirs)
#---Private
# ---Private
def _default_state_for_path(self, path):
# Override this in subclasses to specify the state of some special folders.
if path.name.startswith('.'): # hidden
if path.name.startswith("."): # hidden
return DirectoryState.Excluded
def _get_files(self, from_path, fileclasses, j):
@ -83,11 +89,13 @@ class Directories:
# Recursively get files from folders with lots of subfolder is expensive. However, there
# might be a subfolder in this path that is not excluded. What we want to do is to skim
# through self.states and see if we must continue, or we can stop right here to save time
if not any(p[:len(root)] == root for p in self.states):
if not any(p[: len(root)] == root for p in self.states):
del dirs[:]
try:
if state != DirectoryState.Excluded:
found_files = [fs.get_file(root + f, fileclasses=fileclasses) for f in files]
found_files = [
fs.get_file(root + f, fileclasses=fileclasses) for f in files
]
found_files = [f for f in found_files if f is not None]
# In some cases, directories can be considered as files by dupeGuru, which is
# why we have this line below. In fact, there only one case: Bundle files under
@ -97,7 +105,11 @@ class Directories:
if f is not None:
found_files.append(f)
dirs.remove(d)
logging.debug("Collected %d files in folder %s", len(found_files), str(from_path))
logging.debug(
"Collected %d files in folder %s",
len(found_files),
str(from_path),
)
for file in found_files:
file.is_ref = state == DirectoryState.Reference
yield file
@ -118,7 +130,7 @@ class Directories:
except (EnvironmentError, fs.InvalidPath):
pass
#---Public
# ---Public
def add_path(self, path):
"""Adds ``path`` to self, if not already there.
@ -212,21 +224,21 @@ class Directories:
root = ET.parse(infile).getroot()
except Exception:
return
for rdn in root.getiterator('root_directory'):
for rdn in root.getiterator("root_directory"):
attrib = rdn.attrib
if 'path' not in attrib:
if "path" not in attrib:
continue
path = attrib['path']
path = attrib["path"]
try:
self.add_path(Path(path))
except (AlreadyThereError, InvalidPathError):
pass
for sn in root.getiterator('state'):
for sn in root.getiterator("state"):
attrib = sn.attrib
if not ('path' in attrib and 'value' in attrib):
if not ("path" in attrib and "value" in attrib):
continue
path = attrib['path']
state = attrib['value']
path = attrib["path"]
state = attrib["value"]
self.states[Path(path)] = int(state)
def save_to_file(self, outfile):
@ -234,17 +246,17 @@ class Directories:
:param file outfile: path or file pointer to XML file to save to.
"""
with FileOrPath(outfile, 'wb') as fp:
root = ET.Element('directories')
with FileOrPath(outfile, "wb") as fp:
root = ET.Element("directories")
for root_path in self:
root_path_node = ET.SubElement(root, 'root_directory')
root_path_node.set('path', str(root_path))
root_path_node = ET.SubElement(root, "root_directory")
root_path_node.set("path", str(root_path))
for path, state in self.states.items():
state_node = ET.SubElement(root, 'state')
state_node.set('path', str(path))
state_node.set('value', str(state))
state_node = ET.SubElement(root, "state")
state_node.set("path", str(path))
state_node.set("value", str(state))
tree = ET.ElementTree(root)
tree.write(fp, encoding='utf-8')
tree.write(fp, encoding="utf-8")
def set_state(self, path, state):
"""Set the state of folder at ``path``.
@ -259,4 +271,3 @@ class Directories:
if path.is_parent_of(iter_path):
del self.states[iter_path]
self.states[path] = state

View File

@ -17,25 +17,26 @@ from hscommon.util import flatten, multi_replace
from hscommon.trans import tr
from hscommon.jobprogress import job
(
WEIGHT_WORDS,
MATCH_SIMILAR_WORDS,
NO_FIELD_ORDER,
) = range(3)
(WEIGHT_WORDS, MATCH_SIMILAR_WORDS, NO_FIELD_ORDER,) = range(3)
JOB_REFRESH_RATE = 100
def getwords(s):
# We decompose the string so that ascii letters with accents can be part of the word.
s = normalize('NFD', s)
s = multi_replace(s, "-_&+():;\\[]{}.,<>/?~!@#$*", ' ').lower()
s = ''.join(c for c in s if c in string.ascii_letters + string.digits + string.whitespace)
return [_f for _f in s.split(' ') if _f] # remove empty elements
s = normalize("NFD", s)
s = multi_replace(s, "-_&+():;\\[]{}.,<>/?~!@#$*", " ").lower()
s = "".join(
c for c in s if c in string.ascii_letters + string.digits + string.whitespace
)
return [_f for _f in s.split(" ") if _f] # remove empty elements
def getfields(s):
fields = [getwords(field) for field in s.split(' - ')]
fields = [getwords(field) for field in s.split(" - ")]
return [_f for _f in fields if _f]
def unpack_fields(fields):
result = []
for field in fields:
@ -45,6 +46,7 @@ def unpack_fields(fields):
result.append(field)
return result
def compare(first, second, flags=()):
"""Returns the % of words that match between ``first`` and ``second``
@ -55,11 +57,11 @@ def compare(first, second, flags=()):
return 0
if any(isinstance(element, list) for element in first):
return compare_fields(first, second, flags)
second = second[:] #We must use a copy of second because we remove items from it
second = second[:] # We must use a copy of second because we remove items from it
match_similar = MATCH_SIMILAR_WORDS in flags
weight_words = WEIGHT_WORDS in flags
joined = first + second
total_count = (sum(len(word) for word in joined) if weight_words else len(joined))
total_count = sum(len(word) for word in joined) if weight_words else len(joined)
match_count = 0
in_order = True
for word in first:
@ -71,12 +73,13 @@ def compare(first, second, flags=()):
if second[0] != word:
in_order = False
second.remove(word)
match_count += (len(word) if weight_words else 1)
match_count += len(word) if weight_words else 1
result = round(((match_count * 2) / total_count) * 100)
if (result == 100) and (not in_order):
result = 99 # We cannot consider a match exact unless the ordering is the same
result = 99 # We cannot consider a match exact unless the ordering is the same
return result
def compare_fields(first, second, flags=()):
"""Returns the score for the lowest matching :ref:`fields`.
@ -87,7 +90,7 @@ def compare_fields(first, second, flags=()):
return 0
if NO_FIELD_ORDER in flags:
results = []
#We don't want to remove field directly in the list. We must work on a copy.
# We don't want to remove field directly in the list. We must work on a copy.
second = second[:]
for field1 in first:
max = 0
@ -101,9 +104,12 @@ def compare_fields(first, second, flags=()):
if matched_field:
second.remove(matched_field)
else:
results = [compare(field1, field2, flags) for field1, field2 in zip(first, second)]
results = [
compare(field1, field2, flags) for field1, field2 in zip(first, second)
]
return min(results) if results else 0
def build_word_dict(objects, j=job.nulljob):
"""Returns a dict of objects mapped by their words.
@ -113,11 +119,14 @@ def build_word_dict(objects, j=job.nulljob):
The result will be a dict with words as keys, lists of objects as values.
"""
result = defaultdict(set)
for object in j.iter_with_progress(objects, 'Prepared %d/%d files', JOB_REFRESH_RATE):
for object in j.iter_with_progress(
objects, "Prepared %d/%d files", JOB_REFRESH_RATE
):
for word in unpack_fields(object.words):
result[word].add(object)
return result
def merge_similar_words(word_dict):
"""Take all keys in ``word_dict`` that are similar, and merge them together.
@ -126,7 +135,7 @@ def merge_similar_words(word_dict):
a word equal to the other.
"""
keys = list(word_dict.keys())
keys.sort(key=len)# we want the shortest word to stay
keys.sort(key=len) # we want the shortest word to stay
while keys:
key = keys.pop(0)
similars = difflib.get_close_matches(key, keys, 100, 0.8)
@ -138,6 +147,7 @@ def merge_similar_words(word_dict):
del word_dict[similar]
keys.remove(similar)
def reduce_common_words(word_dict, threshold):
"""Remove all objects from ``word_dict`` values where the object count >= ``threshold``
@ -146,7 +156,9 @@ def reduce_common_words(word_dict, threshold):
The exception to this removal are the objects where all the words of the object are common.
Because if we remove them, we will miss some duplicates!
"""
uncommon_words = set(word for word, objects in word_dict.items() if len(objects) < threshold)
uncommon_words = set(
word for word, objects in word_dict.items() if len(objects) < threshold
)
for word, objects in list(word_dict.items()):
if len(objects) < threshold:
continue
@ -159,11 +171,13 @@ def reduce_common_words(word_dict, threshold):
else:
del word_dict[word]
# Writing docstrings in a namedtuple is tricky. From Python 3.3, it's possible to set __doc__, but
# some research allowed me to find a more elegant solution, which is what is done here. See
# http://stackoverflow.com/questions/1606436/adding-docstrings-to-namedtuples-in-python
class Match(namedtuple('Match', 'first second percentage')):
class Match(namedtuple("Match", "first second percentage")):
"""Represents a match between two :class:`~core.fs.File`.
Regarless of the matching method, when two files are determined to match, a Match pair is created,
@ -182,16 +196,24 @@ class Match(namedtuple('Match', 'first second percentage')):
their match level according to the scan method which found the match. int from 1 to 100. For
exact scan methods, such as Contents scans, this will always be 100.
"""
__slots__ = ()
def get_match(first, second, flags=()):
#it is assumed here that first and second both have a "words" attribute
# it is assumed here that first and second both have a "words" attribute
percentage = compare(first.words, second.words, flags)
return Match(first, second, percentage)
def getmatches(
objects, min_match_percentage=0, match_similar_words=False, weight_words=False,
no_field_order=False, j=job.nulljob):
objects,
min_match_percentage=0,
match_similar_words=False,
weight_words=False,
no_field_order=False,
j=job.nulljob,
):
"""Returns a list of :class:`Match` within ``objects`` after fuzzily matching their words.
:param objects: List of :class:`~core.fs.File` to match.
@ -206,7 +228,7 @@ def getmatches(
j = j.start_subjob(2)
sj = j.start_subjob(2)
for o in objects:
if not hasattr(o, 'words'):
if not hasattr(o, "words"):
o.words = getwords(o.name)
word_dict = build_word_dict(objects, sj)
reduce_common_words(word_dict, COMMON_WORD_THRESHOLD)
@ -241,11 +263,15 @@ def getmatches(
except MemoryError:
# This is the place where the memory usage is at its peak during the scan.
# Just continue the process with an incomplete list of matches.
del compared # This should give us enough room to call logging.
logging.warning('Memory Overflow. Matches: %d. Word dict: %d' % (len(result), len(word_dict)))
del compared # This should give us enough room to call logging.
logging.warning(
"Memory Overflow. Matches: %d. Word dict: %d"
% (len(result), len(word_dict))
)
return result
return result
def getmatches_by_contents(files, j=job.nulljob):
"""Returns a list of :class:`Match` within ``files`` if their contents is the same.
@ -263,13 +289,14 @@ def getmatches_by_contents(files, j=job.nulljob):
for group in possible_matches:
for first, second in itertools.combinations(group, 2):
if first.is_ref and second.is_ref:
continue # Don't spend time comparing two ref pics together.
continue # Don't spend time comparing two ref pics together.
if first.md5partial == second.md5partial:
if first.md5 == second.md5:
result.append(Match(first, second, 100))
j.add_progress(desc=tr("%d matches found") % len(result))
return result
class Group:
"""A group of :class:`~core.fs.File` that match together.
@ -297,7 +324,8 @@ class Group:
Average match percentage of match pairs containing :attr:`ref`.
"""
#---Override
# ---Override
def __init__(self):
self._clear()
@ -313,7 +341,7 @@ class Group:
def __len__(self):
return len(self.ordered)
#---Private
# ---Private
def _clear(self):
self._percentage = None
self._matches_for_ref = None
@ -328,7 +356,7 @@ class Group:
self._matches_for_ref = [match for match in self.matches if ref in match]
return self._matches_for_ref
#---Public
# ---Public
def add_match(self, match):
"""Adds ``match`` to internal match list and possibly add duplicates to the group.
@ -339,6 +367,7 @@ class Group:
:param tuple match: pair of :class:`~core.fs.File` to add
"""
def add_candidate(item, match):