Format files with black
- Format all files with black - Update tox.ini flake8 arguments to be compatible - Add black to requirements-extra.txt - Reduce ignored flake8 rules and fix a few violations
This commit is contained in:
parent
359d6498f7
commit
7ba8aa3514
143
build.py
143
build.py
|
@ -13,129 +13,165 @@ from setuptools import setup, Extension
|
|||
|
||||
from hscommon import sphinxgen
|
||||
from hscommon.build import (
|
||||
add_to_pythonpath, print_and_do, move_all, fix_qt_resource_file,
|
||||
add_to_pythonpath,
|
||||
print_and_do,
|
||||
move_all,
|
||||
fix_qt_resource_file,
|
||||
)
|
||||
from hscommon import loc
|
||||
|
||||
|
||||
def parse_args():
|
||||
usage = "usage: %prog [options]"
|
||||
parser = OptionParser(usage=usage)
|
||||
parser.add_option(
|
||||
'--clean', action='store_true', dest='clean',
|
||||
help="Clean build folder before building"
|
||||
"--clean",
|
||||
action="store_true",
|
||||
dest="clean",
|
||||
help="Clean build folder before building",
|
||||
)
|
||||
parser.add_option(
|
||||
'--doc', action='store_true', dest='doc',
|
||||
help="Build only the help file"
|
||||
"--doc", action="store_true", dest="doc", help="Build only the help file"
|
||||
)
|
||||
parser.add_option(
|
||||
'--loc', action='store_true', dest='loc',
|
||||
help="Build only localization"
|
||||
"--loc", action="store_true", dest="loc", help="Build only localization"
|
||||
)
|
||||
parser.add_option(
|
||||
'--updatepot', action='store_true', dest='updatepot',
|
||||
help="Generate .pot files from source code."
|
||||
"--updatepot",
|
||||
action="store_true",
|
||||
dest="updatepot",
|
||||
help="Generate .pot files from source code.",
|
||||
)
|
||||
parser.add_option(
|
||||
'--mergepot', action='store_true', dest='mergepot',
|
||||
help="Update all .po files based on .pot files."
|
||||
"--mergepot",
|
||||
action="store_true",
|
||||
dest="mergepot",
|
||||
help="Update all .po files based on .pot files.",
|
||||
)
|
||||
parser.add_option(
|
||||
'--normpo', action='store_true', dest='normpo',
|
||||
help="Normalize all PO files (do this before commit)."
|
||||
"--normpo",
|
||||
action="store_true",
|
||||
dest="normpo",
|
||||
help="Normalize all PO files (do this before commit).",
|
||||
)
|
||||
(options, args) = parser.parse_args()
|
||||
return options
|
||||
|
||||
|
||||
def build_help():
|
||||
print("Generating Help")
|
||||
current_path = op.abspath('.')
|
||||
help_basepath = op.join(current_path, 'help', 'en')
|
||||
help_destpath = op.join(current_path, 'build', 'help')
|
||||
changelog_path = op.join(current_path, 'help', 'changelog')
|
||||
current_path = op.abspath(".")
|
||||
help_basepath = op.join(current_path, "help", "en")
|
||||
help_destpath = op.join(current_path, "build", "help")
|
||||
changelog_path = op.join(current_path, "help", "changelog")
|
||||
tixurl = "https://github.com/hsoft/dupeguru/issues/{}"
|
||||
confrepl = {'language': 'en'}
|
||||
changelogtmpl = op.join(current_path, 'help', 'changelog.tmpl')
|
||||
conftmpl = op.join(current_path, 'help', 'conf.tmpl')
|
||||
sphinxgen.gen(help_basepath, help_destpath, changelog_path, tixurl, confrepl, conftmpl, changelogtmpl)
|
||||
confrepl = {"language": "en"}
|
||||
changelogtmpl = op.join(current_path, "help", "changelog.tmpl")
|
||||
conftmpl = op.join(current_path, "help", "conf.tmpl")
|
||||
sphinxgen.gen(
|
||||
help_basepath,
|
||||
help_destpath,
|
||||
changelog_path,
|
||||
tixurl,
|
||||
confrepl,
|
||||
conftmpl,
|
||||
changelogtmpl,
|
||||
)
|
||||
|
||||
|
||||
def build_qt_localizations():
|
||||
loc.compile_all_po(op.join('qtlib', 'locale'))
|
||||
loc.merge_locale_dir(op.join('qtlib', 'locale'), 'locale')
|
||||
loc.compile_all_po(op.join("qtlib", "locale"))
|
||||
loc.merge_locale_dir(op.join("qtlib", "locale"), "locale")
|
||||
|
||||
|
||||
def build_localizations():
|
||||
loc.compile_all_po('locale')
|
||||
loc.compile_all_po("locale")
|
||||
build_qt_localizations()
|
||||
locale_dest = op.join('build', 'locale')
|
||||
locale_dest = op.join("build", "locale")
|
||||
if op.exists(locale_dest):
|
||||
shutil.rmtree(locale_dest)
|
||||
shutil.copytree('locale', locale_dest, ignore=shutil.ignore_patterns('*.po', '*.pot'))
|
||||
shutil.copytree(
|
||||
"locale", locale_dest, ignore=shutil.ignore_patterns("*.po", "*.pot")
|
||||
)
|
||||
|
||||
|
||||
def build_updatepot():
|
||||
print("Building .pot files from source files")
|
||||
print("Building core.pot")
|
||||
loc.generate_pot(['core'], op.join('locale', 'core.pot'), ['tr'])
|
||||
loc.generate_pot(["core"], op.join("locale", "core.pot"), ["tr"])
|
||||
print("Building columns.pot")
|
||||
loc.generate_pot(['core'], op.join('locale', 'columns.pot'), ['coltr'])
|
||||
loc.generate_pot(["core"], op.join("locale", "columns.pot"), ["coltr"])
|
||||
print("Building ui.pot")
|
||||
# When we're not under OS X, we don't want to overwrite ui.pot because it contains Cocoa locs
|
||||
# We want to merge the generated pot with the old pot in the most preserving way possible.
|
||||
ui_packages = ['qt', op.join('cocoa', 'inter')]
|
||||
loc.generate_pot(ui_packages, op.join('locale', 'ui.pot'), ['tr'], merge=True)
|
||||
ui_packages = ["qt", op.join("cocoa", "inter")]
|
||||
loc.generate_pot(ui_packages, op.join("locale", "ui.pot"), ["tr"], merge=True)
|
||||
print("Building qtlib.pot")
|
||||
loc.generate_pot(['qtlib'], op.join('qtlib', 'locale', 'qtlib.pot'), ['tr'])
|
||||
loc.generate_pot(["qtlib"], op.join("qtlib", "locale", "qtlib.pot"), ["tr"])
|
||||
|
||||
|
||||
def build_mergepot():
|
||||
print("Updating .po files using .pot files")
|
||||
loc.merge_pots_into_pos('locale')
|
||||
loc.merge_pots_into_pos(op.join('qtlib', 'locale'))
|
||||
loc.merge_pots_into_pos(op.join('cocoalib', 'locale'))
|
||||
loc.merge_pots_into_pos("locale")
|
||||
loc.merge_pots_into_pos(op.join("qtlib", "locale"))
|
||||
loc.merge_pots_into_pos(op.join("cocoalib", "locale"))
|
||||
|
||||
|
||||
def build_normpo():
|
||||
loc.normalize_all_pos('locale')
|
||||
loc.normalize_all_pos(op.join('qtlib', 'locale'))
|
||||
loc.normalize_all_pos(op.join('cocoalib', 'locale'))
|
||||
loc.normalize_all_pos("locale")
|
||||
loc.normalize_all_pos(op.join("qtlib", "locale"))
|
||||
loc.normalize_all_pos(op.join("cocoalib", "locale"))
|
||||
|
||||
|
||||
def build_pe_modules():
|
||||
print("Building PE Modules")
|
||||
exts = [
|
||||
Extension(
|
||||
"_block",
|
||||
[op.join('core', 'pe', 'modules', 'block.c'), op.join('core', 'pe', 'modules', 'common.c')]
|
||||
[
|
||||
op.join("core", "pe", "modules", "block.c"),
|
||||
op.join("core", "pe", "modules", "common.c"),
|
||||
],
|
||||
),
|
||||
Extension(
|
||||
"_cache",
|
||||
[op.join('core', 'pe', 'modules', 'cache.c'), op.join('core', 'pe', 'modules', 'common.c')]
|
||||
[
|
||||
op.join("core", "pe", "modules", "cache.c"),
|
||||
op.join("core", "pe", "modules", "common.c"),
|
||||
],
|
||||
),
|
||||
]
|
||||
exts.append(Extension("_block_qt", [op.join('qt', 'pe', 'modules', 'block.c')]))
|
||||
exts.append(Extension("_block_qt", [op.join("qt", "pe", "modules", "block.c")]))
|
||||
setup(
|
||||
script_args=['build_ext', '--inplace'],
|
||||
ext_modules=exts,
|
||||
script_args=["build_ext", "--inplace"], ext_modules=exts,
|
||||
)
|
||||
move_all('_block_qt*', op.join('qt', 'pe'))
|
||||
move_all('_block*', op.join('core', 'pe'))
|
||||
move_all('_cache*', op.join('core', 'pe'))
|
||||
move_all("_block_qt*", op.join("qt", "pe"))
|
||||
move_all("_block*", op.join("core", "pe"))
|
||||
move_all("_cache*", op.join("core", "pe"))
|
||||
|
||||
|
||||
def build_normal():
|
||||
print("Building dupeGuru with UI qt")
|
||||
add_to_pythonpath('.')
|
||||
add_to_pythonpath(".")
|
||||
print("Building dupeGuru")
|
||||
build_pe_modules()
|
||||
print("Building localizations")
|
||||
build_localizations()
|
||||
print("Building Qt stuff")
|
||||
print_and_do("pyrcc5 {0} > {1}".format(op.join('qt', 'dg.qrc'), op.join('qt', 'dg_rc.py')))
|
||||
fix_qt_resource_file(op.join('qt', 'dg_rc.py'))
|
||||
print_and_do(
|
||||
"pyrcc5 {0} > {1}".format(op.join("qt", "dg.qrc"), op.join("qt", "dg_rc.py"))
|
||||
)
|
||||
fix_qt_resource_file(op.join("qt", "dg_rc.py"))
|
||||
build_help()
|
||||
|
||||
|
||||
def main():
|
||||
options = parse_args()
|
||||
if options.clean:
|
||||
if op.exists('build'):
|
||||
shutil.rmtree('build')
|
||||
if not op.exists('build'):
|
||||
os.mkdir('build')
|
||||
if op.exists("build"):
|
||||
shutil.rmtree("build")
|
||||
if not op.exists("build"):
|
||||
os.mkdir("build")
|
||||
if options.doc:
|
||||
build_help()
|
||||
elif options.loc:
|
||||
|
@ -149,5 +185,6 @@ def main():
|
|||
else:
|
||||
build_normal()
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
@ -1,3 +1,2 @@
|
|||
__version__ = '4.0.4'
|
||||
__appname__ = 'dupeGuru'
|
||||
|
||||
__version__ = "4.0.4"
|
||||
__appname__ = "dupeGuru"
|
||||
|
|
227
core/app.py
227
core/app.py
|
@ -34,8 +34,8 @@ from .gui.ignore_list_dialog import IgnoreListDialog
|
|||
from .gui.problem_dialog import ProblemDialog
|
||||
from .gui.stats_label import StatsLabel
|
||||
|
||||
HAD_FIRST_LAUNCH_PREFERENCE = 'HadFirstLaunch'
|
||||
DEBUG_MODE_PREFERENCE = 'DebugMode'
|
||||
HAD_FIRST_LAUNCH_PREFERENCE = "HadFirstLaunch"
|
||||
DEBUG_MODE_PREFERENCE = "DebugMode"
|
||||
|
||||
MSG_NO_MARKED_DUPES = tr("There are no marked duplicates. Nothing has been done.")
|
||||
MSG_NO_SELECTED_DUPES = tr("There are no selected duplicates. Nothing has been done.")
|
||||
|
@ -44,23 +44,27 @@ MSG_MANY_FILES_TO_OPEN = tr(
|
|||
"files are opened with, doing so can create quite a mess. Continue?"
|
||||
)
|
||||
|
||||
|
||||
class DestType:
|
||||
Direct = 0
|
||||
Relative = 1
|
||||
Absolute = 2
|
||||
|
||||
|
||||
class JobType:
|
||||
Scan = 'job_scan'
|
||||
Load = 'job_load'
|
||||
Move = 'job_move'
|
||||
Copy = 'job_copy'
|
||||
Delete = 'job_delete'
|
||||
Scan = "job_scan"
|
||||
Load = "job_load"
|
||||
Move = "job_move"
|
||||
Copy = "job_copy"
|
||||
Delete = "job_delete"
|
||||
|
||||
|
||||
class AppMode:
|
||||
Standard = 0
|
||||
Music = 1
|
||||
Picture = 2
|
||||
|
||||
|
||||
JOBID2TITLE = {
|
||||
JobType.Scan: tr("Scanning for duplicates"),
|
||||
JobType.Load: tr("Loading"),
|
||||
|
@ -69,6 +73,7 @@ JOBID2TITLE = {
|
|||
JobType.Delete: tr("Sending to Trash"),
|
||||
}
|
||||
|
||||
|
||||
class DupeGuru(Broadcaster):
|
||||
"""Holds everything together.
|
||||
|
||||
|
@ -100,7 +105,8 @@ class DupeGuru(Broadcaster):
|
|||
|
||||
Instance of :mod:`meta-gui <core.gui>` table listing the results from :attr:`results`
|
||||
"""
|
||||
#--- View interface
|
||||
|
||||
# --- View interface
|
||||
# get_default(key_name)
|
||||
# set_default(key_name, value)
|
||||
# show_message(msg)
|
||||
|
@ -116,7 +122,7 @@ class DupeGuru(Broadcaster):
|
|||
|
||||
NAME = PROMPT_NAME = "dupeGuru"
|
||||
|
||||
PICTURE_CACHE_TYPE = 'sqlite' # set to 'shelve' for a ShelveCache
|
||||
PICTURE_CACHE_TYPE = "sqlite" # set to 'shelve' for a ShelveCache
|
||||
|
||||
def __init__(self, view):
|
||||
if view.get_default(DEBUG_MODE_PREFERENCE):
|
||||
|
@ -124,7 +130,9 @@ class DupeGuru(Broadcaster):
|
|||
logging.debug("Debug mode enabled")
|
||||
Broadcaster.__init__(self)
|
||||
self.view = view
|
||||
self.appdata = desktop.special_folder_path(desktop.SpecialFolder.AppData, appname=self.NAME)
|
||||
self.appdata = desktop.special_folder_path(
|
||||
desktop.SpecialFolder.AppData, appname=self.NAME
|
||||
)
|
||||
if not op.exists(self.appdata):
|
||||
os.makedirs(self.appdata)
|
||||
self.app_mode = AppMode.Standard
|
||||
|
@ -136,11 +144,11 @@ class DupeGuru(Broadcaster):
|
|||
# sent to the scanner. They don't have default values because those defaults values are
|
||||
# defined in the scanner class.
|
||||
self.options = {
|
||||
'escape_filter_regexp': True,
|
||||
'clean_empty_dirs': False,
|
||||
'ignore_hardlink_matches': False,
|
||||
'copymove_dest_type': DestType.Relative,
|
||||
'picture_cache_type': self.PICTURE_CACHE_TYPE
|
||||
"escape_filter_regexp": True,
|
||||
"clean_empty_dirs": False,
|
||||
"ignore_hardlink_matches": False,
|
||||
"copymove_dest_type": DestType.Relative,
|
||||
"picture_cache_type": self.PICTURE_CACHE_TYPE,
|
||||
}
|
||||
self.selected_dupes = []
|
||||
self.details_panel = DetailsPanel(self)
|
||||
|
@ -155,7 +163,7 @@ class DupeGuru(Broadcaster):
|
|||
for child in children:
|
||||
child.connect()
|
||||
|
||||
#--- Private
|
||||
# --- Private
|
||||
def _recreate_result_table(self):
|
||||
if self.result_table is not None:
|
||||
self.result_table.disconnect()
|
||||
|
@ -169,26 +177,30 @@ class DupeGuru(Broadcaster):
|
|||
self.view.create_results_window()
|
||||
|
||||
def _get_picture_cache_path(self):
|
||||
cache_type = self.options['picture_cache_type']
|
||||
cache_name = 'cached_pictures.shelve' if cache_type == 'shelve' else 'cached_pictures.db'
|
||||
cache_type = self.options["picture_cache_type"]
|
||||
cache_name = (
|
||||
"cached_pictures.shelve" if cache_type == "shelve" else "cached_pictures.db"
|
||||
)
|
||||
return op.join(self.appdata, cache_name)
|
||||
|
||||
def _get_dupe_sort_key(self, dupe, get_group, key, delta):
|
||||
if self.app_mode in (AppMode.Music, AppMode.Picture):
|
||||
if key == 'folder_path':
|
||||
dupe_folder_path = getattr(dupe, 'display_folder_path', dupe.folder_path)
|
||||
if key == "folder_path":
|
||||
dupe_folder_path = getattr(
|
||||
dupe, "display_folder_path", dupe.folder_path
|
||||
)
|
||||
return str(dupe_folder_path).lower()
|
||||
if self.app_mode == AppMode.Picture:
|
||||
if delta and key == 'dimensions':
|
||||
if delta and key == "dimensions":
|
||||
r = cmp_value(dupe, key)
|
||||
ref_value = cmp_value(get_group().ref, key)
|
||||
return get_delta_dimensions(r, ref_value)
|
||||
if key == 'marked':
|
||||
if key == "marked":
|
||||
return self.results.is_marked(dupe)
|
||||
if key == 'percentage':
|
||||
if key == "percentage":
|
||||
m = get_group().get_match_of(dupe)
|
||||
return m.percentage
|
||||
elif key == 'dupe_count':
|
||||
elif key == "dupe_count":
|
||||
return 0
|
||||
else:
|
||||
result = cmp_value(dupe, key)
|
||||
|
@ -203,21 +215,25 @@ class DupeGuru(Broadcaster):
|
|||
|
||||
def _get_group_sort_key(self, group, key):
|
||||
if self.app_mode in (AppMode.Music, AppMode.Picture):
|
||||
if key == 'folder_path':
|
||||
dupe_folder_path = getattr(group.ref, 'display_folder_path', group.ref.folder_path)
|
||||
if key == "folder_path":
|
||||
dupe_folder_path = getattr(
|
||||
group.ref, "display_folder_path", group.ref.folder_path
|
||||
)
|
||||
return str(dupe_folder_path).lower()
|
||||
if key == 'percentage':
|
||||
if key == "percentage":
|
||||
return group.percentage
|
||||
if key == 'dupe_count':
|
||||
if key == "dupe_count":
|
||||
return len(group)
|
||||
if key == 'marked':
|
||||
if key == "marked":
|
||||
return len([dupe for dupe in group.dupes if self.results.is_marked(dupe)])
|
||||
return cmp_value(group.ref, key)
|
||||
|
||||
def _do_delete(self, j, link_deleted, use_hardlinks, direct_deletion):
|
||||
def op(dupe):
|
||||
j.add_progress()
|
||||
return self._do_delete_dupe(dupe, link_deleted, use_hardlinks, direct_deletion)
|
||||
return self._do_delete_dupe(
|
||||
dupe, link_deleted, use_hardlinks, direct_deletion
|
||||
)
|
||||
|
||||
j.start_job(self.results.mark_count)
|
||||
self.results.perform_on_marked(op, True)
|
||||
|
@ -233,7 +249,7 @@ class DupeGuru(Broadcaster):
|
|||
else:
|
||||
os.remove(str_path)
|
||||
else:
|
||||
send2trash(str_path) # Raises OSError when there's a problem
|
||||
send2trash(str_path) # Raises OSError when there's a problem
|
||||
if link_deleted:
|
||||
group = self.results.get_group_of_duplicate(dupe)
|
||||
ref = group.ref
|
||||
|
@ -258,8 +274,9 @@ class DupeGuru(Broadcaster):
|
|||
|
||||
def _get_export_data(self):
|
||||
columns = [
|
||||
col for col in self.result_table.columns.ordered_columns
|
||||
if col.visible and col.name != 'marked'
|
||||
col
|
||||
for col in self.result_table.columns.ordered_columns
|
||||
if col.visible and col.name != "marked"
|
||||
]
|
||||
colnames = [col.display for col in columns]
|
||||
rows = []
|
||||
|
@ -273,10 +290,11 @@ class DupeGuru(Broadcaster):
|
|||
|
||||
def _results_changed(self):
|
||||
self.selected_dupes = [
|
||||
d for d in self.selected_dupes
|
||||
d
|
||||
for d in self.selected_dupes
|
||||
if self.results.get_group_of_duplicate(d) is not None
|
||||
]
|
||||
self.notify('results_changed')
|
||||
self.notify("results_changed")
|
||||
|
||||
def _start_job(self, jobid, func, args=()):
|
||||
title = JOBID2TITLE[jobid]
|
||||
|
@ -310,7 +328,9 @@ class DupeGuru(Broadcaster):
|
|||
msg = {
|
||||
JobType.Copy: tr("All marked files were copied successfully."),
|
||||
JobType.Move: tr("All marked files were moved successfully."),
|
||||
JobType.Delete: tr("All marked files were successfully sent to Trash."),
|
||||
JobType.Delete: tr(
|
||||
"All marked files were successfully sent to Trash."
|
||||
),
|
||||
}[jobid]
|
||||
self.view.show_message(msg)
|
||||
|
||||
|
@ -341,9 +361,9 @@ class DupeGuru(Broadcaster):
|
|||
if dupes == self.selected_dupes:
|
||||
return
|
||||
self.selected_dupes = dupes
|
||||
self.notify('dupes_selected')
|
||||
self.notify("dupes_selected")
|
||||
|
||||
#--- Protected
|
||||
# --- Protected
|
||||
def _get_fileclasses(self):
|
||||
if self.app_mode == AppMode.Picture:
|
||||
return [pe.photo.PLAT_SPECIFIC_PHOTO_CLASS]
|
||||
|
@ -360,7 +380,7 @@ class DupeGuru(Broadcaster):
|
|||
else:
|
||||
return prioritize.all_categories()
|
||||
|
||||
#--- Public
|
||||
# --- Public
|
||||
def add_directory(self, d):
|
||||
"""Adds folder ``d`` to :attr:`directories`.
|
||||
|
||||
|
@ -370,7 +390,7 @@ class DupeGuru(Broadcaster):
|
|||
"""
|
||||
try:
|
||||
self.directories.add_path(Path(d))
|
||||
self.notify('directories_changed')
|
||||
self.notify("directories_changed")
|
||||
except directories.AlreadyThereError:
|
||||
self.view.show_message(tr("'{}' already is in the list.").format(d))
|
||||
except directories.InvalidPathError:
|
||||
|
@ -383,7 +403,9 @@ class DupeGuru(Broadcaster):
|
|||
if not dupes:
|
||||
self.view.show_message(MSG_NO_SELECTED_DUPES)
|
||||
return
|
||||
msg = tr("All selected %d matches are going to be ignored in all subsequent scans. Continue?")
|
||||
msg = tr(
|
||||
"All selected %d matches are going to be ignored in all subsequent scans. Continue?"
|
||||
)
|
||||
if not self.view.ask_yes_no(msg % len(dupes)):
|
||||
return
|
||||
for dupe in dupes:
|
||||
|
@ -400,22 +422,22 @@ class DupeGuru(Broadcaster):
|
|||
:param str filter: filter to apply
|
||||
"""
|
||||
self.results.apply_filter(None)
|
||||
if self.options['escape_filter_regexp']:
|
||||
filter = escape(filter, set('()[]\\.|+?^'))
|
||||
filter = escape(filter, '*', '.')
|
||||
if self.options["escape_filter_regexp"]:
|
||||
filter = escape(filter, set("()[]\\.|+?^"))
|
||||
filter = escape(filter, "*", ".")
|
||||
self.results.apply_filter(filter)
|
||||
self._results_changed()
|
||||
|
||||
def clean_empty_dirs(self, path):
|
||||
if self.options['clean_empty_dirs']:
|
||||
while delete_if_empty(path, ['.DS_Store']):
|
||||
if self.options["clean_empty_dirs"]:
|
||||
while delete_if_empty(path, [".DS_Store"]):
|
||||
path = path.parent()
|
||||
|
||||
def clear_picture_cache(self):
|
||||
try:
|
||||
os.remove(self._get_picture_cache_path())
|
||||
except FileNotFoundError:
|
||||
pass # we don't care
|
||||
pass # we don't care
|
||||
|
||||
def copy_or_move(self, dupe, copy: bool, destination: str, dest_type: DestType):
|
||||
source_path = dupe.path
|
||||
|
@ -444,6 +466,7 @@ class DupeGuru(Broadcaster):
|
|||
|
||||
:param bool copy: If True, duplicates will be copied instead of moved
|
||||
"""
|
||||
|
||||
def do(j):
|
||||
def op(dupe):
|
||||
j.add_progress()
|
||||
|
@ -459,7 +482,7 @@ class DupeGuru(Broadcaster):
|
|||
prompt = tr("Select a directory to {} marked files to").format(opname)
|
||||
destination = self.view.select_dest_folder(prompt)
|
||||
if destination:
|
||||
desttype = self.options['copymove_dest_type']
|
||||
desttype = self.options["copymove_dest_type"]
|
||||
jobid = JobType.Copy if copy else JobType.Move
|
||||
self._start_job(jobid, do)
|
||||
|
||||
|
@ -472,8 +495,9 @@ class DupeGuru(Broadcaster):
|
|||
if not self.deletion_options.show(self.results.mark_count):
|
||||
return
|
||||
args = [
|
||||
self.deletion_options.link_deleted, self.deletion_options.use_hardlinks,
|
||||
self.deletion_options.direct
|
||||
self.deletion_options.link_deleted,
|
||||
self.deletion_options.use_hardlinks,
|
||||
self.deletion_options.direct,
|
||||
]
|
||||
logging.debug("Starting deletion job with args %r", args)
|
||||
self._start_job(JobType.Delete, self._do_delete, args=args)
|
||||
|
@ -495,7 +519,9 @@ class DupeGuru(Broadcaster):
|
|||
The columns and their order in the resulting CSV file is determined in the same way as in
|
||||
:meth:`export_to_xhtml`.
|
||||
"""
|
||||
dest_file = self.view.select_dest_file(tr("Select a destination for your exported CSV"), 'csv')
|
||||
dest_file = self.view.select_dest_file(
|
||||
tr("Select a destination for your exported CSV"), "csv"
|
||||
)
|
||||
if dest_file:
|
||||
colnames, rows = self._get_export_data()
|
||||
try:
|
||||
|
@ -505,13 +531,16 @@ class DupeGuru(Broadcaster):
|
|||
|
||||
def get_display_info(self, dupe, group, delta=False):
|
||||
def empty_data():
|
||||
return {c.name: '---' for c in self.result_table.COLUMNS[1:]}
|
||||
return {c.name: "---" for c in self.result_table.COLUMNS[1:]}
|
||||
|
||||
if (dupe is None) or (group is None):
|
||||
return empty_data()
|
||||
try:
|
||||
return dupe.get_display_info(group, delta)
|
||||
except Exception as e:
|
||||
logging.warning("Exception on GetDisplayInfo for %s: %s", str(dupe.path), str(e))
|
||||
logging.warning(
|
||||
"Exception on GetDisplayInfo for %s: %s", str(dupe.path), str(e)
|
||||
)
|
||||
return empty_data()
|
||||
|
||||
def invoke_custom_command(self):
|
||||
|
@ -521,9 +550,11 @@ class DupeGuru(Broadcaster):
|
|||
is replaced with that dupe's ref file. If there's no selection, the command is not invoked.
|
||||
If the dupe is a ref, ``%d`` and ``%r`` will be the same.
|
||||
"""
|
||||
cmd = self.view.get_default('CustomCommand')
|
||||
cmd = self.view.get_default("CustomCommand")
|
||||
if not cmd:
|
||||
msg = tr("You have no custom command set up. Set it up in your preferences.")
|
||||
msg = tr(
|
||||
"You have no custom command set up. Set it up in your preferences."
|
||||
)
|
||||
self.view.show_message(msg)
|
||||
return
|
||||
if not self.selected_dupes:
|
||||
|
@ -531,8 +562,8 @@ class DupeGuru(Broadcaster):
|
|||
dupe = self.selected_dupes[0]
|
||||
group = self.results.get_group_of_duplicate(dupe)
|
||||
ref = group.ref
|
||||
cmd = cmd.replace('%d', str(dupe.path))
|
||||
cmd = cmd.replace('%r', str(ref.path))
|
||||
cmd = cmd.replace("%d", str(dupe.path))
|
||||
cmd = cmd.replace("%r", str(ref.path))
|
||||
match = re.match(r'"([^"]+)"(.*)', cmd)
|
||||
if match is not None:
|
||||
# This code here is because subprocess. Popen doesn't seem to accept, under Windows,
|
||||
|
@ -551,9 +582,9 @@ class DupeGuru(Broadcaster):
|
|||
is persistent data, is the same as when the last session was closed (when :meth:`save` was
|
||||
called).
|
||||
"""
|
||||
self.directories.load_from_file(op.join(self.appdata, 'last_directories.xml'))
|
||||
self.notify('directories_changed')
|
||||
p = op.join(self.appdata, 'ignore_list.xml')
|
||||
self.directories.load_from_file(op.join(self.appdata, "last_directories.xml"))
|
||||
self.notify("directories_changed")
|
||||
p = op.join(self.appdata, "ignore_list.xml")
|
||||
self.ignore_list.load_from_xml(p)
|
||||
self.ignore_list_dialog.refresh()
|
||||
|
||||
|
@ -562,8 +593,10 @@ class DupeGuru(Broadcaster):
|
|||
|
||||
:param str filename: path of the XML file (created with :meth:`save_as`) to load
|
||||
"""
|
||||
|
||||
def do(j):
|
||||
self.results.load_from_xml(filename, self._get_file, j)
|
||||
|
||||
self._start_job(JobType.Load, do)
|
||||
|
||||
def make_selected_reference(self):
|
||||
|
@ -588,35 +621,36 @@ class DupeGuru(Broadcaster):
|
|||
if not self.result_table.power_marker:
|
||||
if changed_groups:
|
||||
self.selected_dupes = [
|
||||
d for d in self.selected_dupes
|
||||
d
|
||||
for d in self.selected_dupes
|
||||
if self.results.get_group_of_duplicate(d).ref is d
|
||||
]
|
||||
self.notify('results_changed')
|
||||
self.notify("results_changed")
|
||||
else:
|
||||
# If we're in "Dupes Only" mode (previously called Power Marker), things are a bit
|
||||
# different. The refs are not shown in the table, and if our operation is successful,
|
||||
# this means that there's no way to follow our dupe selection. Then, the best thing to
|
||||
# do is to keep our selection index-wise (different dupe selection, but same index
|
||||
# selection).
|
||||
self.notify('results_changed_but_keep_selection')
|
||||
self.notify("results_changed_but_keep_selection")
|
||||
|
||||
def mark_all(self):
|
||||
"""Set all dupes in the results as marked.
|
||||
"""
|
||||
self.results.mark_all()
|
||||
self.notify('marking_changed')
|
||||
self.notify("marking_changed")
|
||||
|
||||
def mark_none(self):
|
||||
"""Set all dupes in the results as unmarked.
|
||||
"""
|
||||
self.results.mark_none()
|
||||
self.notify('marking_changed')
|
||||
self.notify("marking_changed")
|
||||
|
||||
def mark_invert(self):
|
||||
"""Invert the marked state of all dupes in the results.
|
||||
"""
|
||||
self.results.mark_invert()
|
||||
self.notify('marking_changed')
|
||||
self.notify("marking_changed")
|
||||
|
||||
def mark_dupe(self, dupe, marked):
|
||||
"""Change marked status of ``dupe``.
|
||||
|
@ -629,7 +663,7 @@ class DupeGuru(Broadcaster):
|
|||
self.results.mark(dupe)
|
||||
else:
|
||||
self.results.unmark(dupe)
|
||||
self.notify('marking_changed')
|
||||
self.notify("marking_changed")
|
||||
|
||||
def open_selected(self):
|
||||
"""Open :attr:`selected_dupes` with their associated application.
|
||||
|
@ -656,7 +690,7 @@ class DupeGuru(Broadcaster):
|
|||
indexes = sorted(indexes, reverse=True)
|
||||
for index in indexes:
|
||||
del self.directories[index]
|
||||
self.notify('directories_changed')
|
||||
self.notify("directories_changed")
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
|
@ -669,7 +703,7 @@ class DupeGuru(Broadcaster):
|
|||
:type duplicates: list of :class:`~core.fs.File`
|
||||
"""
|
||||
self.results.remove_duplicates(self.without_ref(duplicates))
|
||||
self.notify('results_changed_but_keep_selection')
|
||||
self.notify("results_changed_but_keep_selection")
|
||||
|
||||
def remove_marked(self):
|
||||
"""Removed marked duplicates from the results (without touching the files themselves).
|
||||
|
@ -724,7 +758,9 @@ class DupeGuru(Broadcaster):
|
|||
if group.prioritize(key_func=sort_key):
|
||||
count += 1
|
||||
self._results_changed()
|
||||
msg = tr("{} duplicate groups were changed by the re-prioritization.").format(count)
|
||||
msg = tr("{} duplicate groups were changed by the re-prioritization.").format(
|
||||
count
|
||||
)
|
||||
self.view.show_message(msg)
|
||||
|
||||
def reveal_selected(self):
|
||||
|
@ -734,10 +770,10 @@ class DupeGuru(Broadcaster):
|
|||
def save(self):
|
||||
if not op.exists(self.appdata):
|
||||
os.makedirs(self.appdata)
|
||||
self.directories.save_to_file(op.join(self.appdata, 'last_directories.xml'))
|
||||
p = op.join(self.appdata, 'ignore_list.xml')
|
||||
self.directories.save_to_file(op.join(self.appdata, "last_directories.xml"))
|
||||
p = op.join(self.appdata, "ignore_list.xml")
|
||||
self.ignore_list.save_to_xml(p)
|
||||
self.notify('save_session')
|
||||
self.notify("save_session")
|
||||
|
||||
def save_as(self, filename):
|
||||
"""Save results in ``filename``.
|
||||
|
@ -756,7 +792,9 @@ class DupeGuru(Broadcaster):
|
|||
"""
|
||||
scanner = self.SCANNER_CLASS()
|
||||
if not self.directories.has_any_file():
|
||||
self.view.show_message(tr("The selected directories contain no scannable file."))
|
||||
self.view.show_message(
|
||||
tr("The selected directories contain no scannable file.")
|
||||
)
|
||||
return
|
||||
# Send relevant options down to the scanner instance
|
||||
for k, v in self.options.items():
|
||||
|
@ -771,12 +809,16 @@ class DupeGuru(Broadcaster):
|
|||
def do(j):
|
||||
j.set_progress(0, tr("Collecting files to scan"))
|
||||
if scanner.scan_type == ScanType.Folders:
|
||||
files = list(self.directories.get_folders(folderclass=se.fs.Folder, j=j))
|
||||
files = list(
|
||||
self.directories.get_folders(folderclass=se.fs.Folder, j=j)
|
||||
)
|
||||
else:
|
||||
files = list(self.directories.get_files(fileclasses=self.fileclasses, j=j))
|
||||
if self.options['ignore_hardlink_matches']:
|
||||
files = list(
|
||||
self.directories.get_files(fileclasses=self.fileclasses, j=j)
|
||||
)
|
||||
if self.options["ignore_hardlink_matches"]:
|
||||
files = self._remove_hardlink_dupes(files)
|
||||
logging.info('Scanning %d files' % len(files))
|
||||
logging.info("Scanning %d files" % len(files))
|
||||
self.results.groups = scanner.get_dupe_groups(files, self.ignore_list, j)
|
||||
self.discarded_file_count = scanner.discarded_file_count
|
||||
|
||||
|
@ -792,12 +834,16 @@ class DupeGuru(Broadcaster):
|
|||
markfunc = self.results.mark
|
||||
for dupe in selected:
|
||||
markfunc(dupe)
|
||||
self.notify('marking_changed')
|
||||
self.notify("marking_changed")
|
||||
|
||||
def without_ref(self, dupes):
|
||||
"""Returns ``dupes`` with all reference elements removed.
|
||||
"""
|
||||
return [dupe for dupe in dupes if self.results.get_group_of_duplicate(dupe).ref is not dupe]
|
||||
return [
|
||||
dupe
|
||||
for dupe in dupes
|
||||
if self.results.get_group_of_duplicate(dupe).ref is not dupe
|
||||
]
|
||||
|
||||
def get_default(self, key, fallback_value=None):
|
||||
result = nonone(self.view.get_default(key), fallback_value)
|
||||
|
@ -812,7 +858,7 @@ class DupeGuru(Broadcaster):
|
|||
def set_default(self, key, value):
|
||||
self.view.set_default(key, value)
|
||||
|
||||
#--- Properties
|
||||
# --- Properties
|
||||
@property
|
||||
def stat_line(self):
|
||||
result = self.results.stat_line
|
||||
|
@ -836,12 +882,21 @@ class DupeGuru(Broadcaster):
|
|||
@property
|
||||
def METADATA_TO_READ(self):
|
||||
if self.app_mode == AppMode.Picture:
|
||||
return ['size', 'mtime', 'dimensions', 'exif_timestamp']
|
||||
return ["size", "mtime", "dimensions", "exif_timestamp"]
|
||||
elif self.app_mode == AppMode.Music:
|
||||
return [
|
||||
'size', 'mtime', 'duration', 'bitrate', 'samplerate', 'title', 'artist',
|
||||
'album', 'genre', 'year', 'track', 'comment'
|
||||
"size",
|
||||
"mtime",
|
||||
"duration",
|
||||
"bitrate",
|
||||
"samplerate",
|
||||
"title",
|
||||
"artist",
|
||||
"album",
|
||||
"genre",
|
||||
"year",
|
||||
"track",
|
||||
"comment",
|
||||
]
|
||||
else:
|
||||
return ['size', 'mtime']
|
||||
|
||||
return ["size", "mtime"]
|
||||
|
|
|
@ -15,12 +15,13 @@ from hscommon.util import FileOrPath
|
|||
from . import fs
|
||||
|
||||
__all__ = [
|
||||
'Directories',
|
||||
'DirectoryState',
|
||||
'AlreadyThereError',
|
||||
'InvalidPathError',
|
||||
"Directories",
|
||||
"DirectoryState",
|
||||
"AlreadyThereError",
|
||||
"InvalidPathError",
|
||||
]
|
||||
|
||||
|
||||
class DirectoryState:
|
||||
"""Enum describing how a folder should be considered.
|
||||
|
||||
|
@ -28,16 +29,20 @@ class DirectoryState:
|
|||
* DirectoryState.Reference: Scan files, but make sure never to delete any of them
|
||||
* DirectoryState.Excluded: Don't scan this folder
|
||||
"""
|
||||
|
||||
Normal = 0
|
||||
Reference = 1
|
||||
Excluded = 2
|
||||
|
||||
|
||||
class AlreadyThereError(Exception):
|
||||
"""The path being added is already in the directory list"""
|
||||
|
||||
|
||||
class InvalidPathError(Exception):
|
||||
"""The path being added is invalid"""
|
||||
|
||||
|
||||
class Directories:
|
||||
"""Holds user folder selection.
|
||||
|
||||
|
@ -47,7 +52,8 @@ class Directories:
|
|||
Then, when the user starts the scan, :meth:`get_files` is called to retrieve all files (wrapped
|
||||
in :mod:`core.fs`) that have to be scanned according to the chosen folders/states.
|
||||
"""
|
||||
#---Override
|
||||
|
||||
# ---Override
|
||||
def __init__(self):
|
||||
self._dirs = []
|
||||
# {path: state}
|
||||
|
@ -68,10 +74,10 @@ class Directories:
|
|||
def __len__(self):
|
||||
return len(self._dirs)
|
||||
|
||||
#---Private
|
||||
# ---Private
|
||||
def _default_state_for_path(self, path):
|
||||
# Override this in subclasses to specify the state of some special folders.
|
||||
if path.name.startswith('.'): # hidden
|
||||
if path.name.startswith("."): # hidden
|
||||
return DirectoryState.Excluded
|
||||
|
||||
def _get_files(self, from_path, fileclasses, j):
|
||||
|
@ -83,11 +89,13 @@ class Directories:
|
|||
# Recursively get files from folders with lots of subfolder is expensive. However, there
|
||||
# might be a subfolder in this path that is not excluded. What we want to do is to skim
|
||||
# through self.states and see if we must continue, or we can stop right here to save time
|
||||
if not any(p[:len(root)] == root for p in self.states):
|
||||
if not any(p[: len(root)] == root for p in self.states):
|
||||
del dirs[:]
|
||||
try:
|
||||
if state != DirectoryState.Excluded:
|
||||
found_files = [fs.get_file(root + f, fileclasses=fileclasses) for f in files]
|
||||
found_files = [
|
||||
fs.get_file(root + f, fileclasses=fileclasses) for f in files
|
||||
]
|
||||
found_files = [f for f in found_files if f is not None]
|
||||
# In some cases, directories can be considered as files by dupeGuru, which is
|
||||
# why we have this line below. In fact, there only one case: Bundle files under
|
||||
|
@ -97,7 +105,11 @@ class Directories:
|
|||
if f is not None:
|
||||
found_files.append(f)
|
||||
dirs.remove(d)
|
||||
logging.debug("Collected %d files in folder %s", len(found_files), str(from_path))
|
||||
logging.debug(
|
||||
"Collected %d files in folder %s",
|
||||
len(found_files),
|
||||
str(from_path),
|
||||
)
|
||||
for file in found_files:
|
||||
file.is_ref = state == DirectoryState.Reference
|
||||
yield file
|
||||
|
@ -118,7 +130,7 @@ class Directories:
|
|||
except (EnvironmentError, fs.InvalidPath):
|
||||
pass
|
||||
|
||||
#---Public
|
||||
# ---Public
|
||||
def add_path(self, path):
|
||||
"""Adds ``path`` to self, if not already there.
|
||||
|
||||
|
@ -212,21 +224,21 @@ class Directories:
|
|||
root = ET.parse(infile).getroot()
|
||||
except Exception:
|
||||
return
|
||||
for rdn in root.getiterator('root_directory'):
|
||||
for rdn in root.getiterator("root_directory"):
|
||||
attrib = rdn.attrib
|
||||
if 'path' not in attrib:
|
||||
if "path" not in attrib:
|
||||
continue
|
||||
path = attrib['path']
|
||||
path = attrib["path"]
|
||||
try:
|
||||
self.add_path(Path(path))
|
||||
except (AlreadyThereError, InvalidPathError):
|
||||
pass
|
||||
for sn in root.getiterator('state'):
|
||||
for sn in root.getiterator("state"):
|
||||
attrib = sn.attrib
|
||||
if not ('path' in attrib and 'value' in attrib):
|
||||
if not ("path" in attrib and "value" in attrib):
|
||||
continue
|
||||
path = attrib['path']
|
||||
state = attrib['value']
|
||||
path = attrib["path"]
|
||||
state = attrib["value"]
|
||||
self.states[Path(path)] = int(state)
|
||||
|
||||
def save_to_file(self, outfile):
|
||||
|
@ -234,17 +246,17 @@ class Directories:
|
|||
|
||||
:param file outfile: path or file pointer to XML file to save to.
|
||||
"""
|
||||
with FileOrPath(outfile, 'wb') as fp:
|
||||
root = ET.Element('directories')
|
||||
with FileOrPath(outfile, "wb") as fp:
|
||||
root = ET.Element("directories")
|
||||
for root_path in self:
|
||||
root_path_node = ET.SubElement(root, 'root_directory')
|
||||
root_path_node.set('path', str(root_path))
|
||||
root_path_node = ET.SubElement(root, "root_directory")
|
||||
root_path_node.set("path", str(root_path))
|
||||
for path, state in self.states.items():
|
||||
state_node = ET.SubElement(root, 'state')
|
||||
state_node.set('path', str(path))
|
||||
state_node.set('value', str(state))
|
||||
state_node = ET.SubElement(root, "state")
|
||||
state_node.set("path", str(path))
|
||||
state_node.set("value", str(state))
|
||||
tree = ET.ElementTree(root)
|
||||
tree.write(fp, encoding='utf-8')
|
||||
tree.write(fp, encoding="utf-8")
|
||||
|
||||
def set_state(self, path, state):
|
||||
"""Set the state of folder at ``path``.
|
||||
|
@ -259,4 +271,3 @@ class Directories:
|
|||
if path.is_parent_of(iter_path):
|
||||
del self.states[iter_path]
|
||||
self.states[path] = state
|
||||
|
||||
|
|
112
core/engine.py
112
core/engine.py
|
@ -17,25 +17,26 @@ from hscommon.util import flatten, multi_replace
|
|||
from hscommon.trans import tr
|
||||
from hscommon.jobprogress import job
|
||||
|
||||
(
|
||||
WEIGHT_WORDS,
|
||||
MATCH_SIMILAR_WORDS,
|
||||
NO_FIELD_ORDER,
|
||||
) = range(3)
|
||||
(WEIGHT_WORDS, MATCH_SIMILAR_WORDS, NO_FIELD_ORDER,) = range(3)
|
||||
|
||||
JOB_REFRESH_RATE = 100
|
||||
|
||||
|
||||
def getwords(s):
|
||||
# We decompose the string so that ascii letters with accents can be part of the word.
|
||||
s = normalize('NFD', s)
|
||||
s = multi_replace(s, "-_&+():;\\[]{}.,<>/?~!@#$*", ' ').lower()
|
||||
s = ''.join(c for c in s if c in string.ascii_letters + string.digits + string.whitespace)
|
||||
return [_f for _f in s.split(' ') if _f] # remove empty elements
|
||||
s = normalize("NFD", s)
|
||||
s = multi_replace(s, "-_&+():;\\[]{}.,<>/?~!@#$*", " ").lower()
|
||||
s = "".join(
|
||||
c for c in s if c in string.ascii_letters + string.digits + string.whitespace
|
||||
)
|
||||
return [_f for _f in s.split(" ") if _f] # remove empty elements
|
||||
|
||||
|
||||
def getfields(s):
|
||||
fields = [getwords(field) for field in s.split(' - ')]
|
||||
fields = [getwords(field) for field in s.split(" - ")]
|
||||
return [_f for _f in fields if _f]
|
||||
|
||||
|
||||
def unpack_fields(fields):
|
||||
result = []
|
||||
for field in fields:
|
||||
|
@ -45,6 +46,7 @@ def unpack_fields(fields):
|
|||
result.append(field)
|
||||
return result
|
||||
|
||||
|
||||
def compare(first, second, flags=()):
|
||||
"""Returns the % of words that match between ``first`` and ``second``
|
||||
|
||||
|
@ -55,11 +57,11 @@ def compare(first, second, flags=()):
|
|||
return 0
|
||||
if any(isinstance(element, list) for element in first):
|
||||
return compare_fields(first, second, flags)
|
||||
second = second[:] #We must use a copy of second because we remove items from it
|
||||
second = second[:] # We must use a copy of second because we remove items from it
|
||||
match_similar = MATCH_SIMILAR_WORDS in flags
|
||||
weight_words = WEIGHT_WORDS in flags
|
||||
joined = first + second
|
||||
total_count = (sum(len(word) for word in joined) if weight_words else len(joined))
|
||||
total_count = sum(len(word) for word in joined) if weight_words else len(joined)
|
||||
match_count = 0
|
||||
in_order = True
|
||||
for word in first:
|
||||
|
@ -71,12 +73,13 @@ def compare(first, second, flags=()):
|
|||
if second[0] != word:
|
||||
in_order = False
|
||||
second.remove(word)
|
||||
match_count += (len(word) if weight_words else 1)
|
||||
match_count += len(word) if weight_words else 1
|
||||
result = round(((match_count * 2) / total_count) * 100)
|
||||
if (result == 100) and (not in_order):
|
||||
result = 99 # We cannot consider a match exact unless the ordering is the same
|
||||
result = 99 # We cannot consider a match exact unless the ordering is the same
|
||||
return result
|
||||
|
||||
|
||||
def compare_fields(first, second, flags=()):
|
||||
"""Returns the score for the lowest matching :ref:`fields`.
|
||||
|
||||
|
@ -87,7 +90,7 @@ def compare_fields(first, second, flags=()):
|
|||
return 0
|
||||
if NO_FIELD_ORDER in flags:
|
||||
results = []
|
||||
#We don't want to remove field directly in the list. We must work on a copy.
|
||||
# We don't want to remove field directly in the list. We must work on a copy.
|
||||
second = second[:]
|
||||
for field1 in first:
|
||||
max = 0
|
||||
|
@ -101,9 +104,12 @@ def compare_fields(first, second, flags=()):
|
|||
if matched_field:
|
||||
second.remove(matched_field)
|
||||
else:
|
||||
results = [compare(field1, field2, flags) for field1, field2 in zip(first, second)]
|
||||
results = [
|
||||
compare(field1, field2, flags) for field1, field2 in zip(first, second)
|
||||
]
|
||||
return min(results) if results else 0
|
||||
|
||||
|
||||
def build_word_dict(objects, j=job.nulljob):
|
||||
"""Returns a dict of objects mapped by their words.
|
||||
|
||||
|
@ -113,11 +119,14 @@ def build_word_dict(objects, j=job.nulljob):
|
|||
The result will be a dict with words as keys, lists of objects as values.
|
||||
"""
|
||||
result = defaultdict(set)
|
||||
for object in j.iter_with_progress(objects, 'Prepared %d/%d files', JOB_REFRESH_RATE):
|
||||
for object in j.iter_with_progress(
|
||||
objects, "Prepared %d/%d files", JOB_REFRESH_RATE
|
||||
):
|
||||
for word in unpack_fields(object.words):
|
||||
result[word].add(object)
|
||||
return result
|
||||
|
||||
|
||||
def merge_similar_words(word_dict):
|
||||
"""Take all keys in ``word_dict`` that are similar, and merge them together.
|
||||
|
||||
|
@ -126,7 +135,7 @@ def merge_similar_words(word_dict):
|
|||
a word equal to the other.
|
||||
"""
|
||||
keys = list(word_dict.keys())
|
||||
keys.sort(key=len)# we want the shortest word to stay
|
||||
keys.sort(key=len) # we want the shortest word to stay
|
||||
while keys:
|
||||
key = keys.pop(0)
|
||||
similars = difflib.get_close_matches(key, keys, 100, 0.8)
|
||||
|
@ -138,6 +147,7 @@ def merge_similar_words(word_dict):
|
|||
del word_dict[similar]
|
||||
keys.remove(similar)
|
||||
|
||||
|
||||
def reduce_common_words(word_dict, threshold):
|
||||
"""Remove all objects from ``word_dict`` values where the object count >= ``threshold``
|
||||
|
||||
|
@ -146,7 +156,9 @@ def reduce_common_words(word_dict, threshold):
|
|||
The exception to this removal are the objects where all the words of the object are common.
|
||||
Because if we remove them, we will miss some duplicates!
|
||||
"""
|
||||
uncommon_words = set(word for word, objects in word_dict.items() if len(objects) < threshold)
|
||||
uncommon_words = set(
|
||||
word for word, objects in word_dict.items() if len(objects) < threshold
|
||||
)
|
||||
for word, objects in list(word_dict.items()):
|
||||
if len(objects) < threshold:
|
||||
continue
|
||||
|
@ -159,11 +171,13 @@ def reduce_common_words(word_dict, threshold):
|
|||
else:
|
||||
del word_dict[word]
|
||||
|
||||
|
||||
# Writing docstrings in a namedtuple is tricky. From Python 3.3, it's possible to set __doc__, but
|
||||
# some research allowed me to find a more elegant solution, which is what is done here. See
|
||||
# http://stackoverflow.com/questions/1606436/adding-docstrings-to-namedtuples-in-python
|
||||
|
||||
class Match(namedtuple('Match', 'first second percentage')):
|
||||
|
||||
class Match(namedtuple("Match", "first second percentage")):
|
||||
"""Represents a match between two :class:`~core.fs.File`.
|
||||
|
||||
Regarless of the matching method, when two files are determined to match, a Match pair is created,
|
||||
|
@ -182,16 +196,24 @@ class Match(namedtuple('Match', 'first second percentage')):
|
|||
their match level according to the scan method which found the match. int from 1 to 100. For
|
||||
exact scan methods, such as Contents scans, this will always be 100.
|
||||
"""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
|
||||
def get_match(first, second, flags=()):
|
||||
#it is assumed here that first and second both have a "words" attribute
|
||||
# it is assumed here that first and second both have a "words" attribute
|
||||
percentage = compare(first.words, second.words, flags)
|
||||
return Match(first, second, percentage)
|
||||
|
||||
|
||||
def getmatches(
|
||||
objects, min_match_percentage=0, match_similar_words=False, weight_words=False,
|
||||
no_field_order=False, j=job.nulljob):
|
||||
objects,
|
||||
min_match_percentage=0,
|
||||
match_similar_words=False,
|
||||
weight_words=False,
|
||||
no_field_order=False,
|
||||
j=job.nulljob,
|
||||
):
|
||||
"""Returns a list of :class:`Match` within ``objects`` after fuzzily matching their words.
|
||||
|
||||
:param objects: List of :class:`~core.fs.File` to match.
|
||||
|
@ -206,7 +228,7 @@ def getmatches(
|
|||
j = j.start_subjob(2)
|
||||
sj = j.start_subjob(2)
|
||||
for o in objects:
|
||||
if not hasattr(o, 'words'):
|
||||
if not hasattr(o, "words"):
|
||||
o.words = getwords(o.name)
|
||||
word_dict = build_word_dict(objects, sj)
|
||||
reduce_common_words(word_dict, COMMON_WORD_THRESHOLD)
|
||||
|
@ -241,11 +263,15 @@ def getmatches(
|
|||
except MemoryError:
|
||||
# This is the place where the memory usage is at its peak during the scan.
|
||||
# Just continue the process with an incomplete list of matches.
|
||||
del compared # This should give us enough room to call logging.
|
||||
logging.warning('Memory Overflow. Matches: %d. Word dict: %d' % (len(result), len(word_dict)))
|
||||
del compared # This should give us enough room to call logging.
|
||||
logging.warning(
|
||||
"Memory Overflow. Matches: %d. Word dict: %d"
|
||||
% (len(result), len(word_dict))
|
||||
)
|
||||
return result
|
||||
return result
|
||||
|
||||
|
||||
def getmatches_by_contents(files, j=job.nulljob):
|
||||
"""Returns a list of :class:`Match` within ``files`` if their contents is the same.
|
||||
|
||||
|
@ -263,13 +289,14 @@ def getmatches_by_contents(files, j=job.nulljob):
|
|||
for group in possible_matches:
|
||||
for first, second in itertools.combinations(group, 2):
|
||||
if first.is_ref and second.is_ref:
|
||||
continue # Don't spend time comparing two ref pics together.
|
||||
continue # Don't spend time comparing two ref pics together.
|
||||
if first.md5partial == second.md5partial:
|
||||
if first.md5 == second.md5:
|
||||
result.append(Match(first, second, 100))
|
||||
j.add_progress(desc=tr("%d matches found") % len(result))
|
||||
return result
|
||||
|
||||
|
||||
class Group:
|
||||
"""A group of :class:`~core.fs.File` that match together.
|
||||
|
||||
|
@ -297,7 +324,8 @@ class Group:
|
|||
|
||||
Average match percentage of match pairs containing :attr:`ref`.
|
||||
"""
|
||||
#---Override
|
||||
|
||||
# ---Override
|
||||
def __init__(self):
|
||||
self._clear()
|
||||
|
||||
|
@ -313,7 +341,7 @@ class Group:
|
|||
def __len__(self):
|
||||
return len(self.ordered)
|
||||
|
||||
#---Private
|
||||
# ---Private
|
||||
def _clear(self):
|
||||
self._percentage = None
|
||||
self._matches_for_ref = None
|
||||
|
@ -328,7 +356,7 @@ class Group:
|
|||
self._matches_for_ref = [match for match in self.matches if ref in match]
|
||||
return self._matches_for_ref
|
||||
|
||||
#---Public
|
||||
# ---Public
|
||||
def add_match(self, match):
|
||||
"""Adds ``match`` to internal match list and possibly add duplicates to the group.
|
||||
|
||||
|
@ -339,6 +367,7 @@ class Group:
|
|||
|
||||
:param tuple match: pair of :class:`~core.fs.File` to add
|
||||
"""
|
||||
|
||||
def add_candidate(item, match):
|
||||