mirror of
https://github.com/arsenetar/dupeguru.git
synced 2025-03-23 19:24:36 +00:00
Format files with black
- Format all files with black - Update tox.ini flake8 arguments to be compatible - Add black to requirements-extra.txt - Reduce ignored flake8 rules and fix a few violations
This commit is contained in:
parent
359d6498f7
commit
7ba8aa3514
143
build.py
143
build.py
@ -13,129 +13,165 @@ from setuptools import setup, Extension
|
||||
|
||||
from hscommon import sphinxgen
|
||||
from hscommon.build import (
|
||||
add_to_pythonpath, print_and_do, move_all, fix_qt_resource_file,
|
||||
add_to_pythonpath,
|
||||
print_and_do,
|
||||
move_all,
|
||||
fix_qt_resource_file,
|
||||
)
|
||||
from hscommon import loc
|
||||
|
||||
|
||||
def parse_args():
|
||||
usage = "usage: %prog [options]"
|
||||
parser = OptionParser(usage=usage)
|
||||
parser.add_option(
|
||||
'--clean', action='store_true', dest='clean',
|
||||
help="Clean build folder before building"
|
||||
"--clean",
|
||||
action="store_true",
|
||||
dest="clean",
|
||||
help="Clean build folder before building",
|
||||
)
|
||||
parser.add_option(
|
||||
'--doc', action='store_true', dest='doc',
|
||||
help="Build only the help file"
|
||||
"--doc", action="store_true", dest="doc", help="Build only the help file"
|
||||
)
|
||||
parser.add_option(
|
||||
'--loc', action='store_true', dest='loc',
|
||||
help="Build only localization"
|
||||
"--loc", action="store_true", dest="loc", help="Build only localization"
|
||||
)
|
||||
parser.add_option(
|
||||
'--updatepot', action='store_true', dest='updatepot',
|
||||
help="Generate .pot files from source code."
|
||||
"--updatepot",
|
||||
action="store_true",
|
||||
dest="updatepot",
|
||||
help="Generate .pot files from source code.",
|
||||
)
|
||||
parser.add_option(
|
||||
'--mergepot', action='store_true', dest='mergepot',
|
||||
help="Update all .po files based on .pot files."
|
||||
"--mergepot",
|
||||
action="store_true",
|
||||
dest="mergepot",
|
||||
help="Update all .po files based on .pot files.",
|
||||
)
|
||||
parser.add_option(
|
||||
'--normpo', action='store_true', dest='normpo',
|
||||
help="Normalize all PO files (do this before commit)."
|
||||
"--normpo",
|
||||
action="store_true",
|
||||
dest="normpo",
|
||||
help="Normalize all PO files (do this before commit).",
|
||||
)
|
||||
(options, args) = parser.parse_args()
|
||||
return options
|
||||
|
||||
|
||||
def build_help():
|
||||
print("Generating Help")
|
||||
current_path = op.abspath('.')
|
||||
help_basepath = op.join(current_path, 'help', 'en')
|
||||
help_destpath = op.join(current_path, 'build', 'help')
|
||||
changelog_path = op.join(current_path, 'help', 'changelog')
|
||||
current_path = op.abspath(".")
|
||||
help_basepath = op.join(current_path, "help", "en")
|
||||
help_destpath = op.join(current_path, "build", "help")
|
||||
changelog_path = op.join(current_path, "help", "changelog")
|
||||
tixurl = "https://github.com/hsoft/dupeguru/issues/{}"
|
||||
confrepl = {'language': 'en'}
|
||||
changelogtmpl = op.join(current_path, 'help', 'changelog.tmpl')
|
||||
conftmpl = op.join(current_path, 'help', 'conf.tmpl')
|
||||
sphinxgen.gen(help_basepath, help_destpath, changelog_path, tixurl, confrepl, conftmpl, changelogtmpl)
|
||||
confrepl = {"language": "en"}
|
||||
changelogtmpl = op.join(current_path, "help", "changelog.tmpl")
|
||||
conftmpl = op.join(current_path, "help", "conf.tmpl")
|
||||
sphinxgen.gen(
|
||||
help_basepath,
|
||||
help_destpath,
|
||||
changelog_path,
|
||||
tixurl,
|
||||
confrepl,
|
||||
conftmpl,
|
||||
changelogtmpl,
|
||||
)
|
||||
|
||||
|
||||
def build_qt_localizations():
|
||||
loc.compile_all_po(op.join('qtlib', 'locale'))
|
||||
loc.merge_locale_dir(op.join('qtlib', 'locale'), 'locale')
|
||||
loc.compile_all_po(op.join("qtlib", "locale"))
|
||||
loc.merge_locale_dir(op.join("qtlib", "locale"), "locale")
|
||||
|
||||
|
||||
def build_localizations():
|
||||
loc.compile_all_po('locale')
|
||||
loc.compile_all_po("locale")
|
||||
build_qt_localizations()
|
||||
locale_dest = op.join('build', 'locale')
|
||||
locale_dest = op.join("build", "locale")
|
||||
if op.exists(locale_dest):
|
||||
shutil.rmtree(locale_dest)
|
||||
shutil.copytree('locale', locale_dest, ignore=shutil.ignore_patterns('*.po', '*.pot'))
|
||||
shutil.copytree(
|
||||
"locale", locale_dest, ignore=shutil.ignore_patterns("*.po", "*.pot")
|
||||
)
|
||||
|
||||
|
||||
def build_updatepot():
|
||||
print("Building .pot files from source files")
|
||||
print("Building core.pot")
|
||||
loc.generate_pot(['core'], op.join('locale', 'core.pot'), ['tr'])
|
||||
loc.generate_pot(["core"], op.join("locale", "core.pot"), ["tr"])
|
||||
print("Building columns.pot")
|
||||
loc.generate_pot(['core'], op.join('locale', 'columns.pot'), ['coltr'])
|
||||
loc.generate_pot(["core"], op.join("locale", "columns.pot"), ["coltr"])
|
||||
print("Building ui.pot")
|
||||
# When we're not under OS X, we don't want to overwrite ui.pot because it contains Cocoa locs
|
||||
# We want to merge the generated pot with the old pot in the most preserving way possible.
|
||||
ui_packages = ['qt', op.join('cocoa', 'inter')]
|
||||
loc.generate_pot(ui_packages, op.join('locale', 'ui.pot'), ['tr'], merge=True)
|
||||
ui_packages = ["qt", op.join("cocoa", "inter")]
|
||||
loc.generate_pot(ui_packages, op.join("locale", "ui.pot"), ["tr"], merge=True)
|
||||
print("Building qtlib.pot")
|
||||
loc.generate_pot(['qtlib'], op.join('qtlib', 'locale', 'qtlib.pot'), ['tr'])
|
||||
loc.generate_pot(["qtlib"], op.join("qtlib", "locale", "qtlib.pot"), ["tr"])
|
||||
|
||||
|
||||
def build_mergepot():
|
||||
print("Updating .po files using .pot files")
|
||||
loc.merge_pots_into_pos('locale')
|
||||
loc.merge_pots_into_pos(op.join('qtlib', 'locale'))
|
||||
loc.merge_pots_into_pos(op.join('cocoalib', 'locale'))
|
||||
loc.merge_pots_into_pos("locale")
|
||||
loc.merge_pots_into_pos(op.join("qtlib", "locale"))
|
||||
loc.merge_pots_into_pos(op.join("cocoalib", "locale"))
|
||||
|
||||
|
||||
def build_normpo():
|
||||
loc.normalize_all_pos('locale')
|
||||
loc.normalize_all_pos(op.join('qtlib', 'locale'))
|
||||
loc.normalize_all_pos(op.join('cocoalib', 'locale'))
|
||||
loc.normalize_all_pos("locale")
|
||||
loc.normalize_all_pos(op.join("qtlib", "locale"))
|
||||
loc.normalize_all_pos(op.join("cocoalib", "locale"))
|
||||
|
||||
|
||||
def build_pe_modules():
|
||||
print("Building PE Modules")
|
||||
exts = [
|
||||
Extension(
|
||||
"_block",
|
||||
[op.join('core', 'pe', 'modules', 'block.c'), op.join('core', 'pe', 'modules', 'common.c')]
|
||||
[
|
||||
op.join("core", "pe", "modules", "block.c"),
|
||||
op.join("core", "pe", "modules", "common.c"),
|
||||
],
|
||||
),
|
||||
Extension(
|
||||
"_cache",
|
||||
[op.join('core', 'pe', 'modules', 'cache.c'), op.join('core', 'pe', 'modules', 'common.c')]
|
||||
[
|
||||
op.join("core", "pe", "modules", "cache.c"),
|
||||
op.join("core", "pe", "modules", "common.c"),
|
||||
],
|
||||
),
|
||||
]
|
||||
exts.append(Extension("_block_qt", [op.join('qt', 'pe', 'modules', 'block.c')]))
|
||||
exts.append(Extension("_block_qt", [op.join("qt", "pe", "modules", "block.c")]))
|
||||
setup(
|
||||
script_args=['build_ext', '--inplace'],
|
||||
ext_modules=exts,
|
||||
script_args=["build_ext", "--inplace"], ext_modules=exts,
|
||||
)
|
||||
move_all('_block_qt*', op.join('qt', 'pe'))
|
||||
move_all('_block*', op.join('core', 'pe'))
|
||||
move_all('_cache*', op.join('core', 'pe'))
|
||||
move_all("_block_qt*", op.join("qt", "pe"))
|
||||
move_all("_block*", op.join("core", "pe"))
|
||||
move_all("_cache*", op.join("core", "pe"))
|
||||
|
||||
|
||||
def build_normal():
|
||||
print("Building dupeGuru with UI qt")
|
||||
add_to_pythonpath('.')
|
||||
add_to_pythonpath(".")
|
||||
print("Building dupeGuru")
|
||||
build_pe_modules()
|
||||
print("Building localizations")
|
||||
build_localizations()
|
||||
print("Building Qt stuff")
|
||||
print_and_do("pyrcc5 {0} > {1}".format(op.join('qt', 'dg.qrc'), op.join('qt', 'dg_rc.py')))
|
||||
fix_qt_resource_file(op.join('qt', 'dg_rc.py'))
|
||||
print_and_do(
|
||||
"pyrcc5 {0} > {1}".format(op.join("qt", "dg.qrc"), op.join("qt", "dg_rc.py"))
|
||||
)
|
||||
fix_qt_resource_file(op.join("qt", "dg_rc.py"))
|
||||
build_help()
|
||||
|
||||
|
||||
def main():
|
||||
options = parse_args()
|
||||
if options.clean:
|
||||
if op.exists('build'):
|
||||
shutil.rmtree('build')
|
||||
if not op.exists('build'):
|
||||
os.mkdir('build')
|
||||
if op.exists("build"):
|
||||
shutil.rmtree("build")
|
||||
if not op.exists("build"):
|
||||
os.mkdir("build")
|
||||
if options.doc:
|
||||
build_help()
|
||||
elif options.loc:
|
||||
@ -149,5 +185,6 @@ def main():
|
||||
else:
|
||||
build_normal()
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
@ -1,3 +1,2 @@
|
||||
__version__ = '4.0.4'
|
||||
__appname__ = 'dupeGuru'
|
||||
|
||||
__version__ = "4.0.4"
|
||||
__appname__ = "dupeGuru"
|
||||
|
227
core/app.py
227
core/app.py
@ -34,8 +34,8 @@ from .gui.ignore_list_dialog import IgnoreListDialog
|
||||
from .gui.problem_dialog import ProblemDialog
|
||||
from .gui.stats_label import StatsLabel
|
||||
|
||||
HAD_FIRST_LAUNCH_PREFERENCE = 'HadFirstLaunch'
|
||||
DEBUG_MODE_PREFERENCE = 'DebugMode'
|
||||
HAD_FIRST_LAUNCH_PREFERENCE = "HadFirstLaunch"
|
||||
DEBUG_MODE_PREFERENCE = "DebugMode"
|
||||
|
||||
MSG_NO_MARKED_DUPES = tr("There are no marked duplicates. Nothing has been done.")
|
||||
MSG_NO_SELECTED_DUPES = tr("There are no selected duplicates. Nothing has been done.")
|
||||
@ -44,23 +44,27 @@ MSG_MANY_FILES_TO_OPEN = tr(
|
||||
"files are opened with, doing so can create quite a mess. Continue?"
|
||||
)
|
||||
|
||||
|
||||
class DestType:
|
||||
Direct = 0
|
||||
Relative = 1
|
||||
Absolute = 2
|
||||
|
||||
|
||||
class JobType:
|
||||
Scan = 'job_scan'
|
||||
Load = 'job_load'
|
||||
Move = 'job_move'
|
||||
Copy = 'job_copy'
|
||||
Delete = 'job_delete'
|
||||
Scan = "job_scan"
|
||||
Load = "job_load"
|
||||
Move = "job_move"
|
||||
Copy = "job_copy"
|
||||
Delete = "job_delete"
|
||||
|
||||
|
||||
class AppMode:
|
||||
Standard = 0
|
||||
Music = 1
|
||||
Picture = 2
|
||||
|
||||
|
||||
JOBID2TITLE = {
|
||||
JobType.Scan: tr("Scanning for duplicates"),
|
||||
JobType.Load: tr("Loading"),
|
||||
@ -69,6 +73,7 @@ JOBID2TITLE = {
|
||||
JobType.Delete: tr("Sending to Trash"),
|
||||
}
|
||||
|
||||
|
||||
class DupeGuru(Broadcaster):
|
||||
"""Holds everything together.
|
||||
|
||||
@ -100,7 +105,8 @@ class DupeGuru(Broadcaster):
|
||||
|
||||
Instance of :mod:`meta-gui <core.gui>` table listing the results from :attr:`results`
|
||||
"""
|
||||
#--- View interface
|
||||
|
||||
# --- View interface
|
||||
# get_default(key_name)
|
||||
# set_default(key_name, value)
|
||||
# show_message(msg)
|
||||
@ -116,7 +122,7 @@ class DupeGuru(Broadcaster):
|
||||
|
||||
NAME = PROMPT_NAME = "dupeGuru"
|
||||
|
||||
PICTURE_CACHE_TYPE = 'sqlite' # set to 'shelve' for a ShelveCache
|
||||
PICTURE_CACHE_TYPE = "sqlite" # set to 'shelve' for a ShelveCache
|
||||
|
||||
def __init__(self, view):
|
||||
if view.get_default(DEBUG_MODE_PREFERENCE):
|
||||
@ -124,7 +130,9 @@ class DupeGuru(Broadcaster):
|
||||
logging.debug("Debug mode enabled")
|
||||
Broadcaster.__init__(self)
|
||||
self.view = view
|
||||
self.appdata = desktop.special_folder_path(desktop.SpecialFolder.AppData, appname=self.NAME)
|
||||
self.appdata = desktop.special_folder_path(
|
||||
desktop.SpecialFolder.AppData, appname=self.NAME
|
||||
)
|
||||
if not op.exists(self.appdata):
|
||||
os.makedirs(self.appdata)
|
||||
self.app_mode = AppMode.Standard
|
||||
@ -136,11 +144,11 @@ class DupeGuru(Broadcaster):
|
||||
# sent to the scanner. They don't have default values because those defaults values are
|
||||
# defined in the scanner class.
|
||||
self.options = {
|
||||
'escape_filter_regexp': True,
|
||||
'clean_empty_dirs': False,
|
||||
'ignore_hardlink_matches': False,
|
||||
'copymove_dest_type': DestType.Relative,
|
||||
'picture_cache_type': self.PICTURE_CACHE_TYPE
|
||||
"escape_filter_regexp": True,
|
||||
"clean_empty_dirs": False,
|
||||
"ignore_hardlink_matches": False,
|
||||
"copymove_dest_type": DestType.Relative,
|
||||
"picture_cache_type": self.PICTURE_CACHE_TYPE,
|
||||
}
|
||||
self.selected_dupes = []
|
||||
self.details_panel = DetailsPanel(self)
|
||||
@ -155,7 +163,7 @@ class DupeGuru(Broadcaster):
|
||||
for child in children:
|
||||
child.connect()
|
||||
|
||||
#--- Private
|
||||
# --- Private
|
||||
def _recreate_result_table(self):
|
||||
if self.result_table is not None:
|
||||
self.result_table.disconnect()
|
||||
@ -169,26 +177,30 @@ class DupeGuru(Broadcaster):
|
||||
self.view.create_results_window()
|
||||
|
||||
def _get_picture_cache_path(self):
|
||||
cache_type = self.options['picture_cache_type']
|
||||
cache_name = 'cached_pictures.shelve' if cache_type == 'shelve' else 'cached_pictures.db'
|
||||
cache_type = self.options["picture_cache_type"]
|
||||
cache_name = (
|
||||
"cached_pictures.shelve" if cache_type == "shelve" else "cached_pictures.db"
|
||||
)
|
||||
return op.join(self.appdata, cache_name)
|
||||
|
||||
def _get_dupe_sort_key(self, dupe, get_group, key, delta):
|
||||
if self.app_mode in (AppMode.Music, AppMode.Picture):
|
||||
if key == 'folder_path':
|
||||
dupe_folder_path = getattr(dupe, 'display_folder_path', dupe.folder_path)
|
||||
if key == "folder_path":
|
||||
dupe_folder_path = getattr(
|
||||
dupe, "display_folder_path", dupe.folder_path
|
||||
)
|
||||
return str(dupe_folder_path).lower()
|
||||
if self.app_mode == AppMode.Picture:
|
||||
if delta and key == 'dimensions':
|
||||
if delta and key == "dimensions":
|
||||
r = cmp_value(dupe, key)
|
||||
ref_value = cmp_value(get_group().ref, key)
|
||||
return get_delta_dimensions(r, ref_value)
|
||||
if key == 'marked':
|
||||
if key == "marked":
|
||||
return self.results.is_marked(dupe)
|
||||
if key == 'percentage':
|
||||
if key == "percentage":
|
||||
m = get_group().get_match_of(dupe)
|
||||
return m.percentage
|
||||
elif key == 'dupe_count':
|
||||
elif key == "dupe_count":
|
||||
return 0
|
||||
else:
|
||||
result = cmp_value(dupe, key)
|
||||
@ -203,21 +215,25 @@ class DupeGuru(Broadcaster):
|
||||
|
||||
def _get_group_sort_key(self, group, key):
|
||||
if self.app_mode in (AppMode.Music, AppMode.Picture):
|
||||
if key == 'folder_path':
|
||||
dupe_folder_path = getattr(group.ref, 'display_folder_path', group.ref.folder_path)
|
||||
if key == "folder_path":
|
||||
dupe_folder_path = getattr(
|
||||
group.ref, "display_folder_path", group.ref.folder_path
|
||||
)
|
||||
return str(dupe_folder_path).lower()
|
||||
if key == 'percentage':
|
||||
if key == "percentage":
|
||||
return group.percentage
|
||||
if key == 'dupe_count':
|
||||
if key == "dupe_count":
|
||||
return len(group)
|
||||
if key == 'marked':
|
||||
if key == "marked":
|
||||
return len([dupe for dupe in group.dupes if self.results.is_marked(dupe)])
|
||||
return cmp_value(group.ref, key)
|
||||
|
||||
def _do_delete(self, j, link_deleted, use_hardlinks, direct_deletion):
|
||||
def op(dupe):
|
||||
j.add_progress()
|
||||
return self._do_delete_dupe(dupe, link_deleted, use_hardlinks, direct_deletion)
|
||||
return self._do_delete_dupe(
|
||||
dupe, link_deleted, use_hardlinks, direct_deletion
|
||||
)
|
||||
|
||||
j.start_job(self.results.mark_count)
|
||||
self.results.perform_on_marked(op, True)
|
||||
@ -233,7 +249,7 @@ class DupeGuru(Broadcaster):
|
||||
else:
|
||||
os.remove(str_path)
|
||||
else:
|
||||
send2trash(str_path) # Raises OSError when there's a problem
|
||||
send2trash(str_path) # Raises OSError when there's a problem
|
||||
if link_deleted:
|
||||
group = self.results.get_group_of_duplicate(dupe)
|
||||
ref = group.ref
|
||||
@ -258,8 +274,9 @@ class DupeGuru(Broadcaster):
|
||||
|
||||
def _get_export_data(self):
|
||||
columns = [
|
||||
col for col in self.result_table.columns.ordered_columns
|
||||
if col.visible and col.name != 'marked'
|
||||
col
|
||||
for col in self.result_table.columns.ordered_columns
|
||||
if col.visible and col.name != "marked"
|
||||
]
|
||||
colnames = [col.display for col in columns]
|
||||
rows = []
|
||||
@ -273,10 +290,11 @@ class DupeGuru(Broadcaster):
|
||||
|
||||
def _results_changed(self):
|
||||
self.selected_dupes = [
|
||||
d for d in self.selected_dupes
|
||||
d
|
||||
for d in self.selected_dupes
|
||||
if self.results.get_group_of_duplicate(d) is not None
|
||||
]
|
||||
self.notify('results_changed')
|
||||
self.notify("results_changed")
|
||||
|
||||
def _start_job(self, jobid, func, args=()):
|
||||
title = JOBID2TITLE[jobid]
|
||||
@ -310,7 +328,9 @@ class DupeGuru(Broadcaster):
|
||||
msg = {
|
||||
JobType.Copy: tr("All marked files were copied successfully."),
|
||||
JobType.Move: tr("All marked files were moved successfully."),
|
||||
JobType.Delete: tr("All marked files were successfully sent to Trash."),
|
||||
JobType.Delete: tr(
|
||||
"All marked files were successfully sent to Trash."
|
||||
),
|
||||
}[jobid]
|
||||
self.view.show_message(msg)
|
||||
|
||||
@ -341,9 +361,9 @@ class DupeGuru(Broadcaster):
|
||||
if dupes == self.selected_dupes:
|
||||
return
|
||||
self.selected_dupes = dupes
|
||||
self.notify('dupes_selected')
|
||||
self.notify("dupes_selected")
|
||||
|
||||
#--- Protected
|
||||
# --- Protected
|
||||
def _get_fileclasses(self):
|
||||
if self.app_mode == AppMode.Picture:
|
||||
return [pe.photo.PLAT_SPECIFIC_PHOTO_CLASS]
|
||||
@ -360,7 +380,7 @@ class DupeGuru(Broadcaster):
|
||||
else:
|
||||
return prioritize.all_categories()
|
||||
|
||||
#--- Public
|
||||
# --- Public
|
||||
def add_directory(self, d):
|
||||
"""Adds folder ``d`` to :attr:`directories`.
|
||||
|
||||
@ -370,7 +390,7 @@ class DupeGuru(Broadcaster):
|
||||
"""
|
||||
try:
|
||||
self.directories.add_path(Path(d))
|
||||
self.notify('directories_changed')
|
||||
self.notify("directories_changed")
|
||||
except directories.AlreadyThereError:
|
||||
self.view.show_message(tr("'{}' already is in the list.").format(d))
|
||||
except directories.InvalidPathError:
|
||||
@ -383,7 +403,9 @@ class DupeGuru(Broadcaster):
|
||||
if not dupes:
|
||||
self.view.show_message(MSG_NO_SELECTED_DUPES)
|
||||
return
|
||||
msg = tr("All selected %d matches are going to be ignored in all subsequent scans. Continue?")
|
||||
msg = tr(
|
||||
"All selected %d matches are going to be ignored in all subsequent scans. Continue?"
|
||||
)
|
||||
if not self.view.ask_yes_no(msg % len(dupes)):
|
||||
return
|
||||
for dupe in dupes:
|
||||
@ -400,22 +422,22 @@ class DupeGuru(Broadcaster):
|
||||
:param str filter: filter to apply
|
||||
"""
|
||||
self.results.apply_filter(None)
|
||||
if self.options['escape_filter_regexp']:
|
||||
filter = escape(filter, set('()[]\\.|+?^'))
|
||||
filter = escape(filter, '*', '.')
|
||||
if self.options["escape_filter_regexp"]:
|
||||
filter = escape(filter, set("()[]\\.|+?^"))
|
||||
filter = escape(filter, "*", ".")
|
||||
self.results.apply_filter(filter)
|
||||
self._results_changed()
|
||||
|
||||
def clean_empty_dirs(self, path):
|
||||
if self.options['clean_empty_dirs']:
|
||||
while delete_if_empty(path, ['.DS_Store']):
|
||||
if self.options["clean_empty_dirs"]:
|
||||
while delete_if_empty(path, [".DS_Store"]):
|
||||
path = path.parent()
|
||||
|
||||
def clear_picture_cache(self):
|
||||
try:
|
||||
os.remove(self._get_picture_cache_path())
|
||||
except FileNotFoundError:
|
||||
pass # we don't care
|
||||
pass # we don't care
|
||||
|
||||
def copy_or_move(self, dupe, copy: bool, destination: str, dest_type: DestType):
|
||||
source_path = dupe.path
|
||||
@ -444,6 +466,7 @@ class DupeGuru(Broadcaster):
|
||||
|
||||
:param bool copy: If True, duplicates will be copied instead of moved
|
||||
"""
|
||||
|
||||
def do(j):
|
||||
def op(dupe):
|
||||
j.add_progress()
|
||||
@ -459,7 +482,7 @@ class DupeGuru(Broadcaster):
|
||||
prompt = tr("Select a directory to {} marked files to").format(opname)
|
||||
destination = self.view.select_dest_folder(prompt)
|
||||
if destination:
|
||||
desttype = self.options['copymove_dest_type']
|
||||
desttype = self.options["copymove_dest_type"]
|
||||
jobid = JobType.Copy if copy else JobType.Move
|
||||
self._start_job(jobid, do)
|
||||
|
||||
@ -472,8 +495,9 @@ class DupeGuru(Broadcaster):
|
||||
if not self.deletion_options.show(self.results.mark_count):
|
||||
return
|
||||
args = [
|
||||
self.deletion_options.link_deleted, self.deletion_options.use_hardlinks,
|
||||
self.deletion_options.direct
|
||||
self.deletion_options.link_deleted,
|
||||
self.deletion_options.use_hardlinks,
|
||||
self.deletion_options.direct,
|
||||
]
|
||||
logging.debug("Starting deletion job with args %r", args)
|
||||
self._start_job(JobType.Delete, self._do_delete, args=args)
|
||||
@ -495,7 +519,9 @@ class DupeGuru(Broadcaster):
|
||||
The columns and their order in the resulting CSV file is determined in the same way as in
|
||||
:meth:`export_to_xhtml`.
|
||||
"""
|
||||
dest_file = self.view.select_dest_file(tr("Select a destination for your exported CSV"), 'csv')
|
||||
dest_file = self.view.select_dest_file(
|
||||
tr("Select a destination for your exported CSV"), "csv"
|
||||
)
|
||||
if dest_file:
|
||||
colnames, rows = self._get_export_data()
|
||||
try:
|
||||
@ -505,13 +531,16 @@ class DupeGuru(Broadcaster):
|
||||
|
||||
def get_display_info(self, dupe, group, delta=False):
|
||||
def empty_data():
|
||||
return {c.name: '---' for c in self.result_table.COLUMNS[1:]}
|
||||
return {c.name: "---" for c in self.result_table.COLUMNS[1:]}
|
||||
|
||||
if (dupe is None) or (group is None):
|
||||
return empty_data()
|
||||
try:
|
||||
return dupe.get_display_info(group, delta)
|
||||
except Exception as e:
|
||||
logging.warning("Exception on GetDisplayInfo for %s: %s", str(dupe.path), str(e))
|
||||
logging.warning(
|
||||
"Exception on GetDisplayInfo for %s: %s", str(dupe.path), str(e)
|
||||
)
|
||||
return empty_data()
|
||||
|
||||
def invoke_custom_command(self):
|
||||
@ -521,9 +550,11 @@ class DupeGuru(Broadcaster):
|
||||
is replaced with that dupe's ref file. If there's no selection, the command is not invoked.
|
||||
If the dupe is a ref, ``%d`` and ``%r`` will be the same.
|
||||
"""
|
||||
cmd = self.view.get_default('CustomCommand')
|
||||
cmd = self.view.get_default("CustomCommand")
|
||||
if not cmd:
|
||||
msg = tr("You have no custom command set up. Set it up in your preferences.")
|
||||
msg = tr(
|
||||
"You have no custom command set up. Set it up in your preferences."
|
||||
)
|
||||
self.view.show_message(msg)
|
||||
return
|
||||
if not self.selected_dupes:
|
||||
@ -531,8 +562,8 @@ class DupeGuru(Broadcaster):
|
||||
dupe = self.selected_dupes[0]
|
||||
group = self.results.get_group_of_duplicate(dupe)
|
||||
ref = group.ref
|
||||
cmd = cmd.replace('%d', str(dupe.path))
|
||||
cmd = cmd.replace('%r', str(ref.path))
|
||||
cmd = cmd.replace("%d", str(dupe.path))
|
||||
cmd = cmd.replace("%r", str(ref.path))
|
||||
match = re.match(r'"([^"]+)"(.*)', cmd)
|
||||
if match is not None:
|
||||
# This code here is because subprocess. Popen doesn't seem to accept, under Windows,
|
||||
@ -551,9 +582,9 @@ class DupeGuru(Broadcaster):
|
||||
is persistent data, is the same as when the last session was closed (when :meth:`save` was
|
||||
called).
|
||||
"""
|
||||
self.directories.load_from_file(op.join(self.appdata, 'last_directories.xml'))
|
||||
self.notify('directories_changed')
|
||||
p = op.join(self.appdata, 'ignore_list.xml')
|
||||
self.directories.load_from_file(op.join(self.appdata, "last_directories.xml"))
|
||||
self.notify("directories_changed")
|
||||
p = op.join(self.appdata, "ignore_list.xml")
|
||||
self.ignore_list.load_from_xml(p)
|
||||
self.ignore_list_dialog.refresh()
|
||||
|
||||
@ -562,8 +593,10 @@ class DupeGuru(Broadcaster):
|
||||
|
||||
:param str filename: path of the XML file (created with :meth:`save_as`) to load
|
||||
"""
|
||||
|
||||
def do(j):
|
||||
self.results.load_from_xml(filename, self._get_file, j)
|
||||
|
||||
self._start_job(JobType.Load, do)
|
||||
|
||||
def make_selected_reference(self):
|
||||
@ -588,35 +621,36 @@ class DupeGuru(Broadcaster):
|
||||
if not self.result_table.power_marker:
|
||||
if changed_groups:
|
||||
self.selected_dupes = [
|
||||
d for d in self.selected_dupes
|
||||
d
|
||||
for d in self.selected_dupes
|
||||
if self.results.get_group_of_duplicate(d).ref is d
|
||||
]
|
||||
self.notify('results_changed')
|
||||
self.notify("results_changed")
|
||||
else:
|
||||
# If we're in "Dupes Only" mode (previously called Power Marker), things are a bit
|
||||
# different. The refs are not shown in the table, and if our operation is successful,
|
||||
# this means that there's no way to follow our dupe selection. Then, the best thing to
|
||||
# do is to keep our selection index-wise (different dupe selection, but same index
|
||||
# selection).
|
||||
self.notify('results_changed_but_keep_selection')
|
||||
self.notify("results_changed_but_keep_selection")
|
||||
|
||||
def mark_all(self):
|
||||
"""Set all dupes in the results as marked.
|
||||
"""
|
||||
self.results.mark_all()
|
||||
self.notify('marking_changed')
|
||||
self.notify("marking_changed")
|
||||
|
||||
def mark_none(self):
|
||||
"""Set all dupes in the results as unmarked.
|
||||
"""
|
||||
self.results.mark_none()
|
||||
self.notify('marking_changed')
|
||||
self.notify("marking_changed")
|
||||
|
||||
def mark_invert(self):
|
||||
"""Invert the marked state of all dupes in the results.
|
||||
"""
|
||||
self.results.mark_invert()
|
||||
self.notify('marking_changed')
|
||||
self.notify("marking_changed")
|
||||
|
||||
def mark_dupe(self, dupe, marked):
|
||||
"""Change marked status of ``dupe``.
|
||||
@ -629,7 +663,7 @@ class DupeGuru(Broadcaster):
|
||||
self.results.mark(dupe)
|
||||
else:
|
||||
self.results.unmark(dupe)
|
||||
self.notify('marking_changed')
|
||||
self.notify("marking_changed")
|
||||
|
||||
def open_selected(self):
|
||||
"""Open :attr:`selected_dupes` with their associated application.
|
||||
@ -656,7 +690,7 @@ class DupeGuru(Broadcaster):
|
||||
indexes = sorted(indexes, reverse=True)
|
||||
for index in indexes:
|
||||
del self.directories[index]
|
||||
self.notify('directories_changed')
|
||||
self.notify("directories_changed")
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
@ -669,7 +703,7 @@ class DupeGuru(Broadcaster):
|
||||
:type duplicates: list of :class:`~core.fs.File`
|
||||
"""
|
||||
self.results.remove_duplicates(self.without_ref(duplicates))
|
||||
self.notify('results_changed_but_keep_selection')
|
||||
self.notify("results_changed_but_keep_selection")
|
||||
|
||||
def remove_marked(self):
|
||||
"""Removed marked duplicates from the results (without touching the files themselves).
|
||||
@ -724,7 +758,9 @@ class DupeGuru(Broadcaster):
|
||||
if group.prioritize(key_func=sort_key):
|
||||
count += 1
|
||||
self._results_changed()
|
||||
msg = tr("{} duplicate groups were changed by the re-prioritization.").format(count)
|
||||
msg = tr("{} duplicate groups were changed by the re-prioritization.").format(
|
||||
count
|
||||
)
|
||||
self.view.show_message(msg)
|
||||
|
||||
def reveal_selected(self):
|
||||
@ -734,10 +770,10 @@ class DupeGuru(Broadcaster):
|
||||
def save(self):
|
||||
if not op.exists(self.appdata):
|
||||
os.makedirs(self.appdata)
|
||||
self.directories.save_to_file(op.join(self.appdata, 'last_directories.xml'))
|
||||
p = op.join(self.appdata, 'ignore_list.xml')
|
||||
self.directories.save_to_file(op.join(self.appdata, "last_directories.xml"))
|
||||
p = op.join(self.appdata, "ignore_list.xml")
|
||||
self.ignore_list.save_to_xml(p)
|
||||
self.notify('save_session')
|
||||
self.notify("save_session")
|
||||
|
||||
def save_as(self, filename):
|
||||
"""Save results in ``filename``.
|
||||
@ -756,7 +792,9 @@ class DupeGuru(Broadcaster):
|
||||
"""
|
||||
scanner = self.SCANNER_CLASS()
|
||||
if not self.directories.has_any_file():
|
||||
self.view.show_message(tr("The selected directories contain no scannable file."))
|
||||
self.view.show_message(
|
||||
tr("The selected directories contain no scannable file.")
|
||||
)
|
||||
return
|
||||
# Send relevant options down to the scanner instance
|
||||
for k, v in self.options.items():
|
||||
@ -771,12 +809,16 @@ class DupeGuru(Broadcaster):
|
||||
def do(j):
|
||||
j.set_progress(0, tr("Collecting files to scan"))
|
||||
if scanner.scan_type == ScanType.Folders:
|
||||
files = list(self.directories.get_folders(folderclass=se.fs.Folder, j=j))
|
||||
files = list(
|
||||
self.directories.get_folders(folderclass=se.fs.Folder, j=j)
|
||||
)
|
||||
else:
|
||||
files = list(self.directories.get_files(fileclasses=self.fileclasses, j=j))
|
||||
if self.options['ignore_hardlink_matches']:
|
||||
files = list(
|
||||
self.directories.get_files(fileclasses=self.fileclasses, j=j)
|
||||
)
|
||||
if self.options["ignore_hardlink_matches"]:
|
||||
files = self._remove_hardlink_dupes(files)
|
||||
logging.info('Scanning %d files' % len(files))
|
||||
logging.info("Scanning %d files" % len(files))
|
||||
self.results.groups = scanner.get_dupe_groups(files, self.ignore_list, j)
|
||||
self.discarded_file_count = scanner.discarded_file_count
|
||||
|
||||
@ -792,12 +834,16 @@ class DupeGuru(Broadcaster):
|
||||
markfunc = self.results.mark
|
||||
for dupe in selected:
|
||||
markfunc(dupe)
|
||||
self.notify('marking_changed')
|
||||
self.notify("marking_changed")
|
||||
|
||||
def without_ref(self, dupes):
|
||||
"""Returns ``dupes`` with all reference elements removed.
|
||||
"""
|
||||
return [dupe for dupe in dupes if self.results.get_group_of_duplicate(dupe).ref is not dupe]
|
||||
return [
|
||||
dupe
|
||||
for dupe in dupes
|
||||
if self.results.get_group_of_duplicate(dupe).ref is not dupe
|
||||
]
|
||||
|
||||
def get_default(self, key, fallback_value=None):
|
||||
result = nonone(self.view.get_default(key), fallback_value)
|
||||
@ -812,7 +858,7 @@ class DupeGuru(Broadcaster):
|
||||
def set_default(self, key, value):
|
||||
self.view.set_default(key, value)
|
||||
|
||||
#--- Properties
|
||||
# --- Properties
|
||||
@property
|
||||
def stat_line(self):
|
||||
result = self.results.stat_line
|
||||
@ -836,12 +882,21 @@ class DupeGuru(Broadcaster):
|
||||
@property
|
||||
def METADATA_TO_READ(self):
|
||||
if self.app_mode == AppMode.Picture:
|
||||
return ['size', 'mtime', 'dimensions', 'exif_timestamp']
|
||||
return ["size", "mtime", "dimensions", "exif_timestamp"]
|
||||
elif self.app_mode == AppMode.Music:
|
||||
return [
|
||||
'size', 'mtime', 'duration', 'bitrate', 'samplerate', 'title', 'artist',
|
||||
'album', 'genre', 'year', 'track', 'comment'
|
||||
"size",
|
||||
"mtime",
|
||||
"duration",
|
||||
"bitrate",
|
||||
"samplerate",
|
||||
"title",
|
||||
"artist",
|
||||
"album",
|
||||
"genre",
|
||||
"year",
|
||||
"track",
|
||||
"comment",
|
||||
]
|
||||
else:
|
||||
return ['size', 'mtime']
|
||||
|
||||
return ["size", "mtime"]
|
||||
|
@ -15,12 +15,13 @@ from hscommon.util import FileOrPath
|
||||
from . import fs
|
||||
|
||||
__all__ = [
|
||||
'Directories',
|
||||
'DirectoryState',
|
||||
'AlreadyThereError',
|
||||
'InvalidPathError',
|
||||
"Directories",
|
||||
"DirectoryState",
|
||||
"AlreadyThereError",
|
||||
"InvalidPathError",
|
||||
]
|
||||
|
||||
|
||||
class DirectoryState:
|
||||
"""Enum describing how a folder should be considered.
|
||||
|
||||
@ -28,16 +29,20 @@ class DirectoryState:
|
||||
* DirectoryState.Reference: Scan files, but make sure never to delete any of them
|
||||
* DirectoryState.Excluded: Don't scan this folder
|
||||
"""
|
||||
|
||||
Normal = 0
|
||||
Reference = 1
|
||||
Excluded = 2
|
||||
|
||||
|
||||
class AlreadyThereError(Exception):
|
||||
"""The path being added is already in the directory list"""
|
||||
|
||||
|
||||
class InvalidPathError(Exception):
|
||||
"""The path being added is invalid"""
|
||||
|
||||
|
||||
class Directories:
|
||||
"""Holds user folder selection.
|
||||
|
||||
@ -47,7 +52,8 @@ class Directories:
|
||||
Then, when the user starts the scan, :meth:`get_files` is called to retrieve all files (wrapped
|
||||
in :mod:`core.fs`) that have to be scanned according to the chosen folders/states.
|
||||
"""
|
||||
#---Override
|
||||
|
||||
# ---Override
|
||||
def __init__(self):
|
||||
self._dirs = []
|
||||
# {path: state}
|
||||
@ -68,10 +74,10 @@ class Directories:
|
||||
def __len__(self):
|
||||
return len(self._dirs)
|
||||
|
||||
#---Private
|
||||
# ---Private
|
||||
def _default_state_for_path(self, path):
|
||||
# Override this in subclasses to specify the state of some special folders.
|
||||
if path.name.startswith('.'): # hidden
|
||||
if path.name.startswith("."): # hidden
|
||||
return DirectoryState.Excluded
|
||||
|
||||
def _get_files(self, from_path, fileclasses, j):
|
||||
@ -83,11 +89,13 @@ class Directories:
|
||||
# Recursively get files from folders with lots of subfolder is expensive. However, there
|
||||
# might be a subfolder in this path that is not excluded. What we want to do is to skim
|
||||
# through self.states and see if we must continue, or we can stop right here to save time
|
||||
if not any(p[:len(root)] == root for p in self.states):
|
||||
if not any(p[: len(root)] == root for p in self.states):
|
||||
del dirs[:]
|
||||
try:
|
||||
if state != DirectoryState.Excluded:
|
||||
found_files = [fs.get_file(root + f, fileclasses=fileclasses) for f in files]
|
||||
found_files = [
|
||||
fs.get_file(root + f, fileclasses=fileclasses) for f in files
|
||||
]
|
||||
found_files = [f for f in found_files if f is not None]
|
||||
# In some cases, directories can be considered as files by dupeGuru, which is
|
||||
# why we have this line below. In fact, there only one case: Bundle files under
|
||||
@ -97,7 +105,11 @@ class Directories:
|
||||
if f is not None:
|
||||
found_files.append(f)
|
||||
dirs.remove(d)
|
||||
logging.debug("Collected %d files in folder %s", len(found_files), str(from_path))
|
||||
logging.debug(
|
||||
"Collected %d files in folder %s",
|
||||
len(found_files),
|
||||
str(from_path),
|
||||
)
|
||||
for file in found_files:
|
||||
file.is_ref = state == DirectoryState.Reference
|
||||
yield file
|
||||
@ -118,7 +130,7 @@ class Directories:
|
||||
except (EnvironmentError, fs.InvalidPath):
|
||||
pass
|
||||
|
||||
#---Public
|
||||
# ---Public
|
||||
def add_path(self, path):
|
||||
"""Adds ``path`` to self, if not already there.
|
||||
|
||||
@ -212,21 +224,21 @@ class Directories:
|
||||
root = ET.parse(infile).getroot()
|
||||
except Exception:
|
||||
return
|
||||
for rdn in root.getiterator('root_directory'):
|
||||
for rdn in root.getiterator("root_directory"):
|
||||
attrib = rdn.attrib
|
||||
if 'path' not in attrib:
|
||||
if "path" not in attrib:
|
||||
continue
|
||||
path = attrib['path']
|
||||
path = attrib["path"]
|
||||
try:
|
||||
self.add_path(Path(path))
|
||||
except (AlreadyThereError, InvalidPathError):
|
||||
pass
|
||||
for sn in root.getiterator('state'):
|
||||
for sn in root.getiterator("state"):
|
||||
attrib = sn.attrib
|
||||
if not ('path' in attrib and 'value' in attrib):
|
||||
if not ("path" in attrib and "value" in attrib):
|
||||
continue
|
||||
path = attrib['path']
|
||||
state = attrib['value']
|
||||
path = attrib["path"]
|
||||
state = attrib["value"]
|
||||
self.states[Path(path)] = int(state)
|
||||
|
||||
def save_to_file(self, outfile):
|
||||
@ -234,17 +246,17 @@ class Directories:
|
||||
|
||||
:param file outfile: path or file pointer to XML file to save to.
|
||||
"""
|
||||
with FileOrPath(outfile, 'wb') as fp:
|
||||
root = ET.Element('directories')
|
||||
with FileOrPath(outfile, "wb") as fp:
|
||||
root = ET.Element("directories")
|
||||
for root_path in self:
|
||||
root_path_node = ET.SubElement(root, 'root_directory')
|
||||
root_path_node.set('path', str(root_path))
|
||||
root_path_node = ET.SubElement(root, "root_directory")
|
||||
root_path_node.set("path", str(root_path))
|
||||
for path, state in self.states.items():
|
||||
state_node = ET.SubElement(root, 'state')
|
||||
state_node.set('path', str(path))
|
||||
state_node.set('value', str(state))
|
||||
state_node = ET.SubElement(root, "state")
|
||||
state_node.set("path", str(path))
|
||||
state_node.set("value", str(state))
|
||||
tree = ET.ElementTree(root)
|
||||
tree.write(fp, encoding='utf-8')
|
||||
tree.write(fp, encoding="utf-8")
|
||||
|
||||
def set_state(self, path, state):
|
||||
"""Set the state of folder at ``path``.
|
||||
@ -259,4 +271,3 @@ class Directories:
|
||||
if path.is_parent_of(iter_path):
|
||||
del self.states[iter_path]
|
||||
self.states[path] = state
|
||||
|
||||
|
112
core/engine.py
112
core/engine.py
@ -17,25 +17,26 @@ from hscommon.util import flatten, multi_replace
|
||||
from hscommon.trans import tr
|
||||
from hscommon.jobprogress import job
|
||||
|
||||
(
|
||||
WEIGHT_WORDS,
|
||||
MATCH_SIMILAR_WORDS,
|
||||
NO_FIELD_ORDER,
|
||||
) = range(3)
|
||||
(WEIGHT_WORDS, MATCH_SIMILAR_WORDS, NO_FIELD_ORDER,) = range(3)
|
||||
|
||||
JOB_REFRESH_RATE = 100
|
||||
|
||||
|
||||
def getwords(s):
|
||||
# We decompose the string so that ascii letters with accents can be part of the word.
|
||||
s = normalize('NFD', s)
|
||||
s = multi_replace(s, "-_&+():;\\[]{}.,<>/?~!@#$*", ' ').lower()
|
||||
s = ''.join(c for c in s if c in string.ascii_letters + string.digits + string.whitespace)
|
||||
return [_f for _f in s.split(' ') if _f] # remove empty elements
|
||||
s = normalize("NFD", s)
|
||||
s = multi_replace(s, "-_&+():;\\[]{}.,<>/?~!@#$*", " ").lower()
|
||||
s = "".join(
|
||||
c for c in s if c in string.ascii_letters + string.digits + string.whitespace
|
||||
)
|
||||
return [_f for _f in s.split(" ") if _f] # remove empty elements
|
||||
|
||||
|
||||
def getfields(s):
|
||||
fields = [getwords(field) for field in s.split(' - ')]
|
||||
fields = [getwords(field) for field in s.split(" - ")]
|
||||
return [_f for _f in fields if _f]
|
||||
|
||||
|
||||
def unpack_fields(fields):
|
||||
result = []
|
||||
for field in fields:
|
||||
@ -45,6 +46,7 @@ def unpack_fields(fields):
|
||||
result.append(field)
|
||||
return result
|
||||
|
||||
|
||||
def compare(first, second, flags=()):
|
||||
"""Returns the % of words that match between ``first`` and ``second``
|
||||
|
||||
@ -55,11 +57,11 @@ def compare(first, second, flags=()):
|
||||
return 0
|
||||
if any(isinstance(element, list) for element in first):
|
||||
return compare_fields(first, second, flags)
|
||||
second = second[:] #We must use a copy of second because we remove items from it
|
||||
second = second[:] # We must use a copy of second because we remove items from it
|
||||
match_similar = MATCH_SIMILAR_WORDS in flags
|
||||
weight_words = WEIGHT_WORDS in flags
|
||||
joined = first + second
|
||||
total_count = (sum(len(word) for word in joined) if weight_words else len(joined))
|
||||
total_count = sum(len(word) for word in joined) if weight_words else len(joined)
|
||||
match_count = 0
|
||||
in_order = True
|
||||
for word in first:
|
||||
@ -71,12 +73,13 @@ def compare(first, second, flags=()):
|
||||
if second[0] != word:
|
||||
in_order = False
|
||||
second.remove(word)
|
||||
match_count += (len(word) if weight_words else 1)
|
||||
match_count += len(word) if weight_words else 1
|
||||
result = round(((match_count * 2) / total_count) * 100)
|
||||
if (result == 100) and (not in_order):
|
||||
result = 99 # We cannot consider a match exact unless the ordering is the same
|
||||
result = 99 # We cannot consider a match exact unless the ordering is the same
|
||||
return result
|
||||
|
||||
|
||||
def compare_fields(first, second, flags=()):
|
||||
"""Returns the score for the lowest matching :ref:`fields`.
|
||||
|
||||
@ -87,7 +90,7 @@ def compare_fields(first, second, flags=()):
|
||||
return 0
|
||||
if NO_FIELD_ORDER in flags:
|
||||
results = []
|
||||
#We don't want to remove field directly in the list. We must work on a copy.
|
||||
# We don't want to remove field directly in the list. We must work on a copy.
|
||||
second = second[:]
|
||||
for field1 in first:
|
||||
max = 0
|
||||
@ -101,9 +104,12 @@ def compare_fields(first, second, flags=()):
|
||||
if matched_field:
|
||||
second.remove(matched_field)
|
||||
else:
|
||||
results = [compare(field1, field2, flags) for field1, field2 in zip(first, second)]
|
||||
results = [
|
||||
compare(field1, field2, flags) for field1, field2 in zip(first, second)
|
||||
]
|
||||
return min(results) if results else 0
|
||||
|
||||
|
||||
def build_word_dict(objects, j=job.nulljob):
|
||||
"""Returns a dict of objects mapped by their words.
|
||||
|
||||
@ -113,11 +119,14 @@ def build_word_dict(objects, j=job.nulljob):
|
||||
The result will be a dict with words as keys, lists of objects as values.
|
||||
"""
|
||||
result = defaultdict(set)
|
||||
for object in j.iter_with_progress(objects, 'Prepared %d/%d files', JOB_REFRESH_RATE):
|
||||
for object in j.iter_with_progress(
|
||||
objects, "Prepared %d/%d files", JOB_REFRESH_RATE
|
||||
):
|
||||
for word in unpack_fields(object.words):
|
||||
result[word].add(object)
|
||||
return result
|
||||
|
||||
|
||||
def merge_similar_words(word_dict):
|
||||
"""Take all keys in ``word_dict`` that are similar, and merge them together.
|
||||
|
||||
@ -126,7 +135,7 @@ def merge_similar_words(word_dict):
|
||||
a word equal to the other.
|
||||
"""
|
||||
keys = list(word_dict.keys())
|
||||
keys.sort(key=len)# we want the shortest word to stay
|
||||
keys.sort(key=len) # we want the shortest word to stay
|
||||
while keys:
|
||||
key = keys.pop(0)
|
||||
similars = difflib.get_close_matches(key, keys, 100, 0.8)
|
||||
@ -138,6 +147,7 @@ def merge_similar_words(word_dict):
|
||||
del word_dict[similar]
|
||||
keys.remove(similar)
|
||||
|
||||
|
||||
def reduce_common_words(word_dict, threshold):
|
||||
"""Remove all objects from ``word_dict`` values where the object count >= ``threshold``
|
||||
|
||||
@ -146,7 +156,9 @@ def reduce_common_words(word_dict, threshold):
|
||||
The exception to this removal are the objects where all the words of the object are common.
|
||||
Because if we remove them, we will miss some duplicates!
|
||||
"""
|
||||
uncommon_words = set(word for word, objects in word_dict.items() if len(objects) < threshold)
|
||||
uncommon_words = set(
|
||||
word for word, objects in word_dict.items() if len(objects) < threshold
|
||||
)
|
||||
for word, objects in list(word_dict.items()):
|
||||
if len(objects) < threshold:
|
||||
continue
|
||||
@ -159,11 +171,13 @@ def reduce_common_words(word_dict, threshold):
|
||||
else:
|
||||
del word_dict[word]
|
||||
|
||||
|
||||
# Writing docstrings in a namedtuple is tricky. From Python 3.3, it's possible to set __doc__, but
|
||||
# some research allowed me to find a more elegant solution, which is what is done here. See
|
||||
# http://stackoverflow.com/questions/1606436/adding-docstrings-to-namedtuples-in-python
|
||||
|
||||
class Match(namedtuple('Match', 'first second percentage')):
|
||||
|
||||
class Match(namedtuple("Match", "first second percentage")):
|
||||
"""Represents a match between two :class:`~core.fs.File`.
|
||||
|
||||
Regarless of the matching method, when two files are determined to match, a Match pair is created,
|
||||
@ -182,16 +196,24 @@ class Match(namedtuple('Match', 'first second percentage')):
|
||||
their match level according to the scan method which found the match. int from 1 to 100. For
|
||||
exact scan methods, such as Contents scans, this will always be 100.
|
||||
"""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
|
||||
def get_match(first, second, flags=()):
|
||||
#it is assumed here that first and second both have a "words" attribute
|
||||
# it is assumed here that first and second both have a "words" attribute
|
||||
percentage = compare(first.words, second.words, flags)
|
||||
return Match(first, second, percentage)
|
||||
|
||||
|
||||
def getmatches(
|
||||
objects, min_match_percentage=0, match_similar_words=False, weight_words=False,
|
||||
no_field_order=False, j=job.nulljob):
|
||||
objects,
|
||||
min_match_percentage=0,
|
||||
match_similar_words=False,
|
||||
weight_words=False,
|
||||
no_field_order=False,
|
||||
j=job.nulljob,
|
||||
):
|
||||
"""Returns a list of :class:`Match` within ``objects`` after fuzzily matching their words.
|
||||
|
||||
:param objects: List of :class:`~core.fs.File` to match.
|
||||
@ -206,7 +228,7 @@ def getmatches(
|
||||
j = j.start_subjob(2)
|
||||
sj = j.start_subjob(2)
|
||||
for o in objects:
|
||||
if not hasattr(o, 'words'):
|
||||
if not hasattr(o, "words"):
|
||||
o.words = getwords(o.name)
|
||||
word_dict = build_word_dict(objects, sj)
|
||||
reduce_common_words(word_dict, COMMON_WORD_THRESHOLD)
|
||||
@ -241,11 +263,15 @@ def getmatches(
|
||||
except MemoryError:
|
||||
# This is the place where the memory usage is at its peak during the scan.
|
||||
# Just continue the process with an incomplete list of matches.
|
||||
del compared # This should give us enough room to call logging.
|
||||
logging.warning('Memory Overflow. Matches: %d. Word dict: %d' % (len(result), len(word_dict)))
|
||||
del compared # This should give us enough room to call logging.
|
||||
logging.warning(
|
||||
"Memory Overflow. Matches: %d. Word dict: %d"
|
||||
% (len(result), len(word_dict))
|
||||
)
|
||||
return result
|
||||
return result
|
||||
|
||||
|
||||
def getmatches_by_contents(files, j=job.nulljob):
|
||||
"""Returns a list of :class:`Match` within ``files`` if their contents is the same.
|
||||
|
||||
@ -263,13 +289,14 @@ def getmatches_by_contents(files, j=job.nulljob):
|
||||
for group in possible_matches:
|
||||
for first, second in itertools.combinations(group, 2):
|
||||
if first.is_ref and second.is_ref:
|
||||
continue # Don't spend time comparing two ref pics together.
|
||||
continue # Don't spend time comparing two ref pics together.
|
||||
if first.md5partial == second.md5partial:
|
||||
if first.md5 == second.md5:
|
||||
result.append(Match(first, second, 100))
|
||||
j.add_progress(desc=tr("%d matches found") % len(result))
|
||||
return result
|
||||
|
||||
|
||||
class Group:
|
||||
"""A group of :class:`~core.fs.File` that match together.
|
||||
|
||||
@ -297,7 +324,8 @@ class Group:
|
||||
|
||||
Average match percentage of match pairs containing :attr:`ref`.
|
||||
"""
|
||||
#---Override
|
||||
|
||||
# ---Override
|
||||
def __init__(self):
|
||||
self._clear()
|
||||
|
||||
@ -313,7 +341,7 @@ class Group:
|
||||
def __len__(self):
|
||||
return len(self.ordered)
|
||||
|
||||
#---Private
|
||||
# ---Private
|
||||
def _clear(self):
|
||||
self._percentage = None
|
||||
self._matches_for_ref = None
|
||||
@ -328,7 +356,7 @@ class Group:
|
||||
self._matches_for_ref = [match for match in self.matches if ref in match]
|
||||
return self._matches_for_ref
|
||||
|
||||
#---Public
|
||||
# ---Public
|
||||
def add_match(self, match):
|
||||
"""Adds ``match`` to internal match list and possibly add duplicates to the group.
|
||||
|
||||
@ -339,6 +367,7 @@ class Group:
|
||||
|
||||
:param tuple match: pair of :class:`~core.fs.File` to add
|
||||
"""
|
||||
|
||||
def add_candidate(item, match):
|
||||
matches = self.candidates[item]
|
||||
matches.add(match)
|
||||
@ -362,7 +391,11 @@ class Group:
|
||||
|
||||
You can call this after the duplicate scanning process to free a bit of memory.
|
||||
"""
|
||||
discarded = set(m for m in self.matches if not all(obj in self.unordered for obj in [m.first, m.second]))
|
||||
discarded = set(
|
||||
m
|
||||
for m in self.matches
|
||||
if not all(obj in self.unordered for obj in [m.first, m.second])
|
||||
)
|
||||
self.matches -= discarded
|
||||
self.candidates = defaultdict(set)
|
||||
return discarded
|
||||
@ -409,7 +442,9 @@ class Group:
|
||||
self.unordered.remove(item)
|
||||
self._percentage = None
|
||||
self._matches_for_ref = None
|
||||
if (len(self) > 1) and any(not getattr(item, 'is_ref', False) for item in self):
|
||||
if (len(self) > 1) and any(
|
||||
not getattr(item, "is_ref", False) for item in self
|
||||
):
|
||||
if discard_matches:
|
||||
self.matches = set(m for m in self.matches if item not in m)
|
||||
else:
|
||||
@ -438,7 +473,9 @@ class Group:
|
||||
if self._percentage is None:
|
||||
if self.dupes:
|
||||
matches = self._get_matches_for_ref()
|
||||
self._percentage = sum(match.percentage for match in matches) // len(matches)
|
||||
self._percentage = sum(match.percentage for match in matches) // len(
|
||||
matches
|
||||
)
|
||||
else:
|
||||
self._percentage = 0
|
||||
return self._percentage
|
||||
@ -485,7 +522,7 @@ def get_groups(matches):
|
||||
del dupe2group
|
||||
del matches
|
||||
# should free enough memory to continue
|
||||
logging.warning('Memory Overflow. Groups: {0}'.format(len(groups)))
|
||||
logging.warning("Memory Overflow. Groups: {0}".format(len(groups)))
|
||||
# Now that we have a group, we have to discard groups' matches and see if there're any "orphan"
|
||||
# matches, that is, matches that were candidate in a group but that none of their 2 files were
|
||||
# accepted in the group. With these orphan groups, it's safe to build additional groups
|
||||
@ -493,9 +530,12 @@ def get_groups(matches):
|
||||
orphan_matches = []
|
||||
for group in groups:
|
||||
orphan_matches += {
|
||||
m for m in group.discard_matches()
|
||||
m
|
||||
for m in group.discard_matches()
|
||||
if not any(obj in matched_files for obj in [m.first, m.second])
|
||||
}
|
||||
if groups and orphan_matches:
|
||||
groups += get_groups(orphan_matches) # no job, as it isn't supposed to take a long time
|
||||
groups += get_groups(
|
||||
orphan_matches
|
||||
) # no job, as it isn't supposed to take a long time
|
||||
return groups
|
||||
|
@ -114,36 +114,42 @@ ROW_TEMPLATE = """
|
||||
|
||||
CELL_TEMPLATE = """<td>{value}</td>"""
|
||||
|
||||
|
||||
def export_to_xhtml(colnames, rows):
|
||||
# a row is a list of values with the first value being a flag indicating if the row should be indented
|
||||
if rows:
|
||||
assert len(rows[0]) == len(colnames) + 1 # + 1 is for the "indented" flag
|
||||
colheaders = ''.join(COLHEADERS_TEMPLATE.format(name=name) for name in colnames)
|
||||
assert len(rows[0]) == len(colnames) + 1 # + 1 is for the "indented" flag
|
||||
colheaders = "".join(COLHEADERS_TEMPLATE.format(name=name) for name in colnames)
|
||||
rendered_rows = []
|
||||
previous_group_id = None
|
||||
for row in rows:
|
||||
# [2:] is to remove the indented flag + filename
|
||||
if row[0] != previous_group_id:
|
||||
# We've just changed dupe group, which means that this dupe is a ref. We don't indent it.
|
||||
indented = ''
|
||||
indented = ""
|
||||
else:
|
||||
indented = 'indented'
|
||||
indented = "indented"
|
||||
filename = row[1]
|
||||
cells = ''.join(CELL_TEMPLATE.format(value=value) for value in row[2:])
|
||||
rendered_rows.append(ROW_TEMPLATE.format(indented=indented, filename=filename, cells=cells))
|
||||
cells = "".join(CELL_TEMPLATE.format(value=value) for value in row[2:])
|
||||
rendered_rows.append(
|
||||
ROW_TEMPLATE.format(indented=indented, filename=filename, cells=cells)
|
||||
)
|
||||
previous_group_id = row[0]
|
||||
rendered_rows = ''.join(rendered_rows)
|
||||
rendered_rows = "".join(rendered_rows)
|
||||
# The main template can't use format because the css code uses {}
|
||||
content = MAIN_TEMPLATE.replace('$colheaders', colheaders).replace('$rows', rendered_rows)
|
||||
content = MAIN_TEMPLATE.replace("$colheaders", colheaders).replace(
|
||||
"$rows", rendered_rows
|
||||
)
|
||||
folder = mkdtemp()
|
||||
destpath = op.join(folder, 'export.htm')
|
||||
fp = open(destpath, 'wt', encoding='utf-8')
|
||||
destpath = op.join(folder, "export.htm")
|
||||
fp = open(destpath, "wt", encoding="utf-8")
|
||||
fp.write(content)
|
||||
fp.close()
|
||||
return destpath
|
||||
|
||||
|
||||
def export_to_csv(dest, colnames, rows):
|
||||
writer = csv.writer(open(dest, 'wt', encoding='utf-8'))
|
||||
writer = csv.writer(open(dest, "wt", encoding="utf-8"))
|
||||
writer.writerow(["Group ID"] + colnames)
|
||||
for row in rows:
|
||||
writer.writerow(row)
|
||||
|
80
core/fs.py
80
core/fs.py
@ -17,19 +17,20 @@ import logging
|
||||
from hscommon.util import nonone, get_file_ext
|
||||
|
||||
__all__ = [
|
||||
'File',
|
||||
'Folder',
|
||||
'get_file',
|
||||
'get_files',
|
||||
'FSError',
|
||||
'AlreadyExistsError',
|
||||
'InvalidPath',
|
||||
'InvalidDestinationError',
|
||||
'OperationError',
|
||||
"File",
|
||||
"Folder",
|
||||
"get_file",
|
||||
"get_files",
|
||||
"FSError",
|
||||
"AlreadyExistsError",
|
||||
"InvalidPath",
|
||||
"InvalidDestinationError",
|
||||
"OperationError",
|
||||
]
|
||||
|
||||
NOT_SET = object()
|
||||
|
||||
|
||||
class FSError(Exception):
|
||||
cls_message = "An error has occured on '{name}' in '{parent}'"
|
||||
|
||||
@ -40,8 +41,8 @@ class FSError(Exception):
|
||||
elif isinstance(fsobject, File):
|
||||
name = fsobject.name
|
||||
else:
|
||||
name = ''
|
||||
parentname = str(parent) if parent is not None else ''
|
||||
name = ""
|
||||
parentname = str(parent) if parent is not None else ""
|
||||
Exception.__init__(self, message.format(name=name, parent=parentname))
|
||||
|
||||
|
||||
@ -49,32 +50,39 @@ class AlreadyExistsError(FSError):
|
||||
"The directory or file name we're trying to add already exists"
|
||||
cls_message = "'{name}' already exists in '{parent}'"
|
||||
|
||||
|
||||
class InvalidPath(FSError):
|
||||
"The path of self is invalid, and cannot be worked with."
|
||||
cls_message = "'{name}' is invalid."
|
||||
|
||||
|
||||
class InvalidDestinationError(FSError):
|
||||
"""A copy/move operation has been called, but the destination is invalid."""
|
||||
|
||||
cls_message = "'{name}' is an invalid destination for this operation."
|
||||
|
||||
|
||||
class OperationError(FSError):
|
||||
"""A copy/move/delete operation has been called, but the checkup after the
|
||||
operation shows that it didn't work."""
|
||||
|
||||
cls_message = "Operation on '{name}' failed."
|
||||
|
||||
|
||||
class File:
|
||||
"""Represents a file and holds metadata to be used for scanning.
|
||||
"""
|
||||
|
||||
INITIAL_INFO = {
|
||||
'size': 0,
|
||||
'mtime': 0,
|
||||
'md5': '',
|
||||
'md5partial': '',
|
||||
"size": 0,
|
||||
"mtime": 0,
|
||||
"md5": "",
|
||||
"md5partial": "",
|
||||
}
|
||||
# Slots for File make us save quite a bit of memory. In a memory test I've made with a lot of
|
||||
# files, I saved 35% memory usage with "unread" files (no _read_info() call) and gains become
|
||||
# even greater when we take into account read attributes (70%!). Yeah, it's worth it.
|
||||
__slots__ = ('path', 'is_ref', 'words') + tuple(INITIAL_INFO.keys())
|
||||
__slots__ = ("path", "is_ref", "words") + tuple(INITIAL_INFO.keys())
|
||||
|
||||
def __init__(self, path):
|
||||
self.path = path
|
||||
@ -90,25 +98,27 @@ class File:
|
||||
try:
|
||||
self._read_info(attrname)
|
||||
except Exception as e:
|
||||
logging.warning("An error '%s' was raised while decoding '%s'", e, repr(self.path))
|
||||
logging.warning(
|
||||
"An error '%s' was raised while decoding '%s'", e, repr(self.path)
|
||||
)
|
||||
result = object.__getattribute__(self, attrname)
|
||||
if result is NOT_SET:
|
||||
result = self.INITIAL_INFO[attrname]
|
||||
return result
|
||||
|
||||
#This offset is where we should start reading the file to get a partial md5
|
||||
#For audio file, it should be where audio data starts
|
||||
# This offset is where we should start reading the file to get a partial md5
|
||||
# For audio file, it should be where audio data starts
|
||||
def _get_md5partial_offset_and_size(self):
|
||||
return (0x4000, 0x4000) #16Kb
|
||||
return (0x4000, 0x4000) # 16Kb
|
||||
|
||||
def _read_info(self, field):
|
||||
if field in ('size', 'mtime'):
|
||||
if field in ("size", "mtime"):
|
||||
stats = self.path.stat()
|
||||
self.size = nonone(stats.st_size, 0)
|
||||
self.mtime = nonone(stats.st_mtime, 0)
|
||||
elif field == 'md5partial':
|
||||
elif field == "md5partial":
|
||||
try:
|
||||
fp = self.path.open('rb')
|
||||
fp = self.path.open("rb")
|
||||
offset, size = self._get_md5partial_offset_and_size()
|
||||
fp.seek(offset)
|
||||
partialdata = fp.read(size)
|
||||
@ -117,14 +127,14 @@ class File:
|
||||
fp.close()
|
||||
except Exception:
|
||||
pass
|
||||
elif field == 'md5':
|
||||
elif field == "md5":
|
||||
try:
|
||||
fp = self.path.open('rb')
|
||||
fp = self.path.open("rb")
|
||||
md5 = hashlib.md5()
|
||||
# The goal here is to not run out of memory on really big files. However, the chunk
|
||||
# size has to be large enough so that the python loop isn't too costly in terms of
|
||||
# CPU.
|
||||
CHUNK_SIZE = 1024 * 1024 # 1 mb
|
||||
CHUNK_SIZE = 1024 * 1024 # 1 mb
|
||||
filedata = fp.read(CHUNK_SIZE)
|
||||
while filedata:
|
||||
md5.update(filedata)
|
||||
@ -144,7 +154,7 @@ class File:
|
||||
for attrname in attrnames:
|
||||
getattr(self, attrname)
|
||||
|
||||
#--- Public
|
||||
# --- Public
|
||||
@classmethod
|
||||
def can_handle(cls, path):
|
||||
"""Returns whether this file wrapper class can handle ``path``.
|
||||
@ -170,7 +180,7 @@ class File:
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
#--- Properties
|
||||
# --- Properties
|
||||
@property
|
||||
def extension(self):
|
||||
return get_file_ext(self.name)
|
||||
@ -189,7 +199,8 @@ class Folder(File):
|
||||
|
||||
It has the size/md5 info of a File, but it's value are the sum of its subitems.
|
||||
"""
|
||||
__slots__ = File.__slots__ + ('_subfolders', )
|
||||
|
||||
__slots__ = File.__slots__ + ("_subfolders",)
|
||||
|
||||
def __init__(self, path):
|
||||
File.__init__(self, path)
|
||||
@ -201,12 +212,12 @@ class Folder(File):
|
||||
return folders + files
|
||||
|
||||
def _read_info(self, field):
|
||||
if field in {'size', 'mtime'}:
|
||||
if field in {"size", "mtime"}:
|
||||
size = sum((f.size for f in self._all_items()), 0)
|
||||
self.size = size
|
||||
stats = self.path.stat()
|
||||
self.mtime = nonone(stats.st_mtime, 0)
|
||||
elif field in {'md5', 'md5partial'}:
|
||||
elif field in {"md5", "md5partial"}:
|
||||
# What's sensitive here is that we must make sure that subfiles'
|
||||
# md5 are always added up in the same order, but we also want a
|
||||
# different md5 if a file gets moved in a different subdirectory.
|
||||
@ -214,7 +225,7 @@ class Folder(File):
|
||||
items = self._all_items()
|
||||
items.sort(key=lambda f: f.path)
|
||||
md5s = [getattr(f, field) for f in items]
|
||||
return b''.join(md5s)
|
||||
return b"".join(md5s)
|
||||
|
||||
md5 = hashlib.md5(get_dir_md5_concat())
|
||||
digest = md5.digest()
|
||||
@ -223,7 +234,9 @@ class Folder(File):
|
||||
@property
|
||||
def subfolders(self):
|
||||
if self._subfolders is None:
|
||||
subfolders = [p for p in self.path.listdir() if not p.islink() and p.isdir()]
|
||||
subfolders = [
|
||||
p for p in self.path.listdir() if not p.islink() and p.isdir()
|
||||
]
|
||||
self._subfolders = [self.__class__(p) for p in subfolders]
|
||||
return self._subfolders
|
||||
|
||||
@ -244,6 +257,7 @@ def get_file(path, fileclasses=[File]):
|
||||
if fileclass.can_handle(path):
|
||||
return fileclass(path)
|
||||
|
||||
|
||||
def get_files(path, fileclasses=[File]):
|
||||
"""Returns a list of :class:`File` for each file contained in ``path``.
|
||||
|
||||
|
@ -13,4 +13,3 @@ blue, which is supposed to be orange, does the sorting logic, holds selection, e
|
||||
|
||||
.. _cross-toolkit: http://www.hardcoded.net/articles/cross-toolkit-software
|
||||
"""
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
|
||||
from hscommon.notify import Listener
|
||||
|
||||
|
||||
class DupeGuruGUIObject(Listener):
|
||||
def __init__(self, app):
|
||||
Listener.__init__(self, app)
|
||||
@ -27,4 +28,3 @@ class DupeGuruGUIObject(Listener):
|
||||
|
||||
def results_changed_but_keep_selection(self):
|
||||
pass
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
# Created On: 2012-05-30
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
import os
|
||||
@ -10,42 +10,46 @@ import os
|
||||
from hscommon.gui.base import GUIObject
|
||||
from hscommon.trans import tr
|
||||
|
||||
|
||||
class DeletionOptionsView:
|
||||
"""Expected interface for :class:`DeletionOptions`'s view.
|
||||
|
||||
|
||||
*Not actually used in the code. For documentation purposes only.*
|
||||
|
||||
|
||||
Our view presents the user with an appropriate way (probably a mix of checkboxes and radio
|
||||
buttons) to set the different flags in :class:`DeletionOptions`. Note that
|
||||
:attr:`DeletionOptions.use_hardlinks` is only relevant if :attr:`DeletionOptions.link_deleted`
|
||||
is true. This is why we toggle the "enabled" state of that flag.
|
||||
|
||||
|
||||
We expect the view to set :attr:`DeletionOptions.link_deleted` immediately as the user changes
|
||||
its value because it will toggle :meth:`set_hardlink_option_enabled`
|
||||
|
||||
|
||||
Other than the flags, there's also a prompt message which has a dynamic content, defined by
|
||||
:meth:`update_msg`.
|
||||
"""
|
||||
|
||||
def update_msg(self, msg: str):
|
||||
"""Update the dialog's prompt with ``str``.
|
||||
"""
|
||||
|
||||
|
||||
def show(self):
|
||||
"""Show the dialog in a modal fashion.
|
||||
|
||||
|
||||
Returns whether the dialog was "accepted" (the user pressed OK).
|
||||
"""
|
||||
|
||||
|
||||
def set_hardlink_option_enabled(self, is_enabled: bool):
|
||||
"""Enable or disable the widget controlling :attr:`DeletionOptions.use_hardlinks`.
|
||||
"""
|
||||
|
||||
|
||||
class DeletionOptions(GUIObject):
|
||||
"""Present the user with deletion options before proceeding.
|
||||
|
||||
|
||||
When the user activates "Send to trash", we present him with a couple of options that changes
|
||||
the behavior of that deletion operation.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
GUIObject.__init__(self)
|
||||
#: Whether symlinks or hardlinks are used when doing :attr:`link_deleted`.
|
||||
@ -54,10 +58,10 @@ class DeletionOptions(GUIObject):
|
||||
#: Delete dupes directly and don't send to trash.
|
||||
#: *bool*. *get/set*
|
||||
self.direct = False
|
||||
|
||||
|
||||
def show(self, mark_count):
|
||||
"""Prompt the user with a modal dialog offering our deletion options.
|
||||
|
||||
|
||||
:param int mark_count: Number of dupes marked for deletion.
|
||||
:rtype: bool
|
||||
:returns: Whether the user accepted the dialog (we cancel deletion if false).
|
||||
@ -69,7 +73,7 @@ class DeletionOptions(GUIObject):
|
||||
msg = tr("You are sending {} file(s) to the Trash.").format(mark_count)
|
||||
self.view.update_msg(msg)
|
||||
return self.view.show()
|
||||
|
||||
|
||||
def supports_links(self):
|
||||
"""Returns whether our platform supports symlinks.
|
||||
"""
|
||||
@ -87,21 +91,19 @@ class DeletionOptions(GUIObject):
|
||||
except TypeError:
|
||||
# wrong number of arguments
|
||||
return True
|
||||
|
||||
|
||||
@property
|
||||
def link_deleted(self):
|
||||
"""Replace deleted dupes with symlinks (or hardlinks) to the dupe group reference.
|
||||
|
||||
|
||||
*bool*. *get/set*
|
||||
|
||||
|
||||
Whether the link is a symlink or hardlink is decided by :attr:`use_hardlinks`.
|
||||
"""
|
||||
return self._link_deleted
|
||||
|
||||
|
||||
@link_deleted.setter
|
||||
def link_deleted(self, value):
|
||||
self._link_deleted = value
|
||||
hardlinks_enabled = value and self.supports_links()
|
||||
self.view.set_hardlink_option_enabled(hardlinks_enabled)
|
||||
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
from hscommon.gui.base import GUIObject
|
||||
from .base import DupeGuruGUIObject
|
||||
|
||||
|
||||
class DetailsPanel(GUIObject, DupeGuruGUIObject):
|
||||
def __init__(self, app):
|
||||
GUIObject.__init__(self, multibind=True)
|
||||
@ -19,7 +20,7 @@ class DetailsPanel(GUIObject, DupeGuruGUIObject):
|
||||
self._refresh()
|
||||
self.view.refresh()
|
||||
|
||||
#--- Private
|
||||
# --- Private
|
||||
def _refresh(self):
|
||||
if self.app.selected_dupes:
|
||||
dupe = self.app.selected_dupes[0]
|
||||
@ -31,18 +32,19 @@ class DetailsPanel(GUIObject, DupeGuruGUIObject):
|
||||
# we don't want the two sides of the table to display the stats for the same file
|
||||
ref = group.ref if group is not None and group.ref is not dupe else None
|
||||
data2 = self.app.get_display_info(ref, group, False)
|
||||
columns = self.app.result_table.COLUMNS[1:] # first column is the 'marked' column
|
||||
columns = self.app.result_table.COLUMNS[
|
||||
1:
|
||||
] # first column is the 'marked' column
|
||||
self._table = [(c.display, data1[c.name], data2[c.name]) for c in columns]
|
||||
|
||||
#--- Public
|
||||
# --- Public
|
||||
def row_count(self):
|
||||
return len(self._table)
|
||||
|
||||
def row(self, row_index):
|
||||
return self._table[row_index]
|
||||
|
||||
#--- Event Handlers
|
||||
# --- Event Handlers
|
||||
def dupes_selected(self):
|
||||
self._refresh()
|
||||
self.view.refresh()
|
||||
|
||||
|
@ -1,9 +1,9 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2010-02-06
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from hscommon.gui.tree import Tree, Node
|
||||
@ -13,6 +13,7 @@ from .base import DupeGuruGUIObject
|
||||
|
||||
STATE_ORDER = [DirectoryState.Normal, DirectoryState.Reference, DirectoryState.Excluded]
|
||||
|
||||
|
||||
# Lazily loads children
|
||||
class DirectoryNode(Node):
|
||||
def __init__(self, tree, path, name):
|
||||
@ -21,29 +22,31 @@ class DirectoryNode(Node):
|
||||
self._directory_path = path
|
||||
self._loaded = False
|
||||
self._state = STATE_ORDER.index(self._tree.app.directories.get_state(path))
|
||||
|
||||
|
||||
def __len__(self):
|
||||
if not self._loaded:
|
||||
self._load()
|
||||
return Node.__len__(self)
|
||||
|
||||
|
||||
def _load(self):
|
||||
self.clear()
|
||||
subpaths = self._tree.app.directories.get_subfolders(self._directory_path)
|
||||
for path in subpaths:
|
||||
self.append(DirectoryNode(self._tree, path, path.name))
|
||||
self._loaded = True
|
||||
|
||||
|
||||
def update_all_states(self):
|
||||
self._state = STATE_ORDER.index(self._tree.app.directories.get_state(self._directory_path))
|
||||
self._state = STATE_ORDER.index(
|
||||
self._tree.app.directories.get_state(self._directory_path)
|
||||
)
|
||||
for node in self:
|
||||
node.update_all_states()
|
||||
|
||||
|
||||
# The state propery is an index to the combobox
|
||||
@property
|
||||
def state(self):
|
||||
return self._state
|
||||
|
||||
|
||||
@state.setter
|
||||
def state(self, value):
|
||||
if value == self._state:
|
||||
@ -52,29 +55,29 @@ class DirectoryNode(Node):
|
||||
state = STATE_ORDER[value]
|
||||
self._tree.app.directories.set_state(self._directory_path, state)
|
||||
self._tree.update_all_states()
|
||||
|
||||
|
||||
|
||||
class DirectoryTree(Tree, DupeGuruGUIObject):
|
||||
#--- model -> view calls:
|
||||
# --- model -> view calls:
|
||||
# refresh()
|
||||
# refresh_states() # when only states label need to be refreshed
|
||||
#
|
||||
def __init__(self, app):
|
||||
Tree.__init__(self)
|
||||
DupeGuruGUIObject.__init__(self, app)
|
||||
|
||||
|
||||
def _view_updated(self):
|
||||
self._refresh()
|
||||
self.view.refresh()
|
||||
|
||||
|
||||
def _refresh(self):
|
||||
self.clear()
|
||||
for path in self.app.directories:
|
||||
self.append(DirectoryNode(self, path, str(path)))
|
||||
|
||||
|
||||
def add_directory(self, path):
|
||||
self.app.add_directory(path)
|
||||
|
||||
|
||||
def remove_selected(self):
|
||||
selected_paths = self.selected_paths
|
||||
if not selected_paths:
|
||||
@ -90,18 +93,17 @@ class DirectoryTree(Tree, DupeGuruGUIObject):
|
||||
newstate = DirectoryState.Normal
|
||||
for node in nodes:
|
||||
node.state = newstate
|
||||
|
||||
|
||||
def select_all(self):
|
||||
self.selected_nodes = list(self)
|
||||
self.view.refresh()
|
||||
|
||||
|
||||
def update_all_states(self):
|
||||
for node in self:
|
||||
node.update_all_states()
|
||||
self.view.refresh_states()
|
||||
|
||||
#--- Event Handlers
|
||||
|
||||
# --- Event Handlers
|
||||
def directories_changed(self):
|
||||
self._refresh()
|
||||
self.view.refresh()
|
||||
|
||||
|
@ -8,8 +8,9 @@
|
||||
from hscommon.trans import tr
|
||||
from .ignore_list_table import IgnoreListTable
|
||||
|
||||
|
||||
class IgnoreListDialog:
|
||||
#--- View interface
|
||||
# --- View interface
|
||||
# show()
|
||||
#
|
||||
|
||||
@ -21,7 +22,9 @@ class IgnoreListDialog:
|
||||
def clear(self):
|
||||
if not self.ignore_list:
|
||||
return
|
||||
msg = tr("Do you really want to remove all %d items from the ignore list?") % len(self.ignore_list)
|
||||
msg = tr(
|
||||
"Do you really want to remove all %d items from the ignore list?"
|
||||
) % len(self.ignore_list)
|
||||
if self.app.view.ask_yes_no(msg):
|
||||
self.ignore_list.Clear()
|
||||
self.refresh()
|
||||
@ -36,4 +39,3 @@ class IgnoreListDialog:
|
||||
|
||||
def show(self):
|
||||
self.view.show()
|
||||
|
||||
|
@ -1,35 +1,36 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2012-03-13
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from hscommon.gui.table import GUITable, Row
|
||||
from hscommon.gui.column import Column, Columns
|
||||
from hscommon.trans import trget
|
||||
|
||||
coltr = trget('columns')
|
||||
coltr = trget("columns")
|
||||
|
||||
|
||||
class IgnoreListTable(GUITable):
|
||||
COLUMNS = [
|
||||
# the str concat below saves us needless localization.
|
||||
Column('path1', coltr("File Path") + " 1"),
|
||||
Column('path2', coltr("File Path") + " 2"),
|
||||
Column("path1", coltr("File Path") + " 1"),
|
||||
Column("path2", coltr("File Path") + " 2"),
|
||||
]
|
||||
|
||||
|
||||
def __init__(self, ignore_list_dialog):
|
||||
GUITable.__init__(self)
|
||||
self.columns = Columns(self)
|
||||
self.view = None
|
||||
self.dialog = ignore_list_dialog
|
||||
|
||||
#--- Override
|
||||
|
||||
# --- Override
|
||||
def _fill(self):
|
||||
for path1, path2 in self.dialog.ignore_list:
|
||||
self.append(IgnoreListRow(self, path1, path2))
|
||||
|
||||
|
||||
|
||||
class IgnoreListRow(Row):
|
||||
def __init__(self, table, path1, path2):
|
||||
@ -38,4 +39,3 @@ class IgnoreListRow(Row):
|
||||
self.path2_original = path2
|
||||
self.path1 = str(path1)
|
||||
self.path2 = str(path2)
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
from hscommon.gui.base import GUIObject
|
||||
from hscommon.gui.selectable_list import GUISelectableList
|
||||
|
||||
|
||||
class CriterionCategoryList(GUISelectableList):
|
||||
def __init__(self, dialog):
|
||||
self.dialog = dialog
|
||||
@ -18,6 +19,7 @@ class CriterionCategoryList(GUISelectableList):
|
||||
self.dialog.select_category(self.dialog.categories[self.selected_index])
|
||||
GUISelectableList._update_selection(self)
|
||||
|
||||
|
||||
class PrioritizationList(GUISelectableList):
|
||||
def __init__(self, dialog):
|
||||
self.dialog = dialog
|
||||
@ -41,6 +43,7 @@ class PrioritizationList(GUISelectableList):
|
||||
del prilist[i]
|
||||
self._refresh_contents()
|
||||
|
||||
|
||||
class PrioritizeDialog(GUIObject):
|
||||
def __init__(self, app):
|
||||
GUIObject.__init__(self)
|
||||
@ -52,15 +55,15 @@ class PrioritizeDialog(GUIObject):
|
||||
self.prioritizations = []
|
||||
self.prioritization_list = PrioritizationList(self)
|
||||
|
||||
#--- Override
|
||||
# --- Override
|
||||
def _view_updated(self):
|
||||
self.category_list.select(0)
|
||||
|
||||
#--- Private
|
||||
# --- Private
|
||||
def _sort_key(self, dupe):
|
||||
return tuple(crit.sort_key(dupe) for crit in self.prioritizations)
|
||||
|
||||
#--- Public
|
||||
# --- Public
|
||||
def select_category(self, category):
|
||||
self.criteria = category.criteria_list()
|
||||
self.criteria_list[:] = [c.display_value for c in self.criteria]
|
||||
|
@ -1,29 +1,29 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2010-04-12
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from hscommon import desktop
|
||||
|
||||
from .problem_table import ProblemTable
|
||||
|
||||
|
||||
class ProblemDialog:
|
||||
def __init__(self, app):
|
||||
self.app = app
|
||||
self._selected_dupe = None
|
||||
self.problem_table = ProblemTable(self)
|
||||
|
||||
|
||||
def refresh(self):
|
||||
self._selected_dupe = None
|
||||
self.problem_table.refresh()
|
||||
|
||||
|
||||
def reveal_selected_dupe(self):
|
||||
if self._selected_dupe is not None:
|
||||
desktop.reveal_path(self._selected_dupe.path)
|
||||
|
||||
|
||||
def select_dupe(self, dupe):
|
||||
self._selected_dupe = dupe
|
||||
|
||||
|
@ -1,39 +1,40 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2010-04-12
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from hscommon.gui.table import GUITable, Row
|
||||
from hscommon.gui.column import Column, Columns
|
||||
from hscommon.trans import trget
|
||||
|
||||
coltr = trget('columns')
|
||||
coltr = trget("columns")
|
||||
|
||||
|
||||
class ProblemTable(GUITable):
|
||||
COLUMNS = [
|
||||
Column('path', coltr("File Path")),
|
||||
Column('msg', coltr("Error Message")),
|
||||
Column("path", coltr("File Path")),
|
||||
Column("msg", coltr("Error Message")),
|
||||
]
|
||||
|
||||
|
||||
def __init__(self, problem_dialog):
|
||||
GUITable.__init__(self)
|
||||
self.columns = Columns(self)
|
||||
self.dialog = problem_dialog
|
||||
|
||||
#--- Override
|
||||
|
||||
# --- Override
|
||||
def _update_selection(self):
|
||||
row = self.selected_row
|
||||
dupe = row.dupe if row is not None else None
|
||||
self.dialog.select_dupe(dupe)
|
||||
|
||||
|
||||
def _fill(self):
|
||||
problems = self.dialog.app.results.problems
|
||||
for dupe, msg in problems:
|
||||
self.append(ProblemRow(self, dupe, msg))
|
||||
|
||||
|
||||
|
||||
class ProblemRow(Row):
|
||||
def __init__(self, table, dupe, msg):
|
||||
@ -41,4 +42,3 @@ class ProblemRow(Row):
|
||||
self.dupe = dupe
|
||||
self.msg = msg
|
||||
self.path = str(dupe.path)
|
||||
|
||||
|
@ -1,9 +1,9 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2010-02-11
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from operator import attrgetter
|
||||
@ -13,6 +13,7 @@ from hscommon.gui.column import Columns
|
||||
|
||||
from .base import DupeGuruGUIObject
|
||||
|
||||
|
||||
class DupeRow(Row):
|
||||
def __init__(self, table, group, dupe):
|
||||
Row.__init__(self, table)
|
||||
@ -22,14 +23,14 @@ class DupeRow(Row):
|
||||
self._data = None
|
||||
self._data_delta = None
|
||||
self._delta_columns = None
|
||||
|
||||
|
||||
def is_cell_delta(self, column_name):
|
||||
"""Returns whether a cell is in delta mode (orange color).
|
||||
|
||||
|
||||
If the result table is in delta mode, returns True if the column is one of the "delta
|
||||
columns", that is, one of the columns that display a a differential value rather than an
|
||||
absolute value.
|
||||
|
||||
|
||||
If not, returns True if the dupe's value is different from its ref value.
|
||||
"""
|
||||
if not self.table.delta_values:
|
||||
@ -42,62 +43,64 @@ class DupeRow(Row):
|
||||
dupe_info = self.data
|
||||
ref_info = self._group.ref.get_display_info(group=self._group, delta=False)
|
||||
for key, value in dupe_info.items():
|
||||
if (key not in self._delta_columns) and (ref_info[key].lower() != value.lower()):
|
||||
if (key not in self._delta_columns) and (
|
||||
ref_info[key].lower() != value.lower()
|
||||
):
|
||||
self._delta_columns.add(key)
|
||||
return column_name in self._delta_columns
|
||||
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
if self._data is None:
|
||||
self._data = self._app.get_display_info(self._dupe, self._group, False)
|
||||
return self._data
|
||||
|
||||
|
||||
@property
|
||||
def data_delta(self):
|
||||
if self._data_delta is None:
|
||||
self._data_delta = self._app.get_display_info(self._dupe, self._group, True)
|
||||
return self._data_delta
|
||||
|
||||
|
||||
@property
|
||||
def isref(self):
|
||||
return self._dupe is self._group.ref
|
||||
|
||||
|
||||
@property
|
||||
def markable(self):
|
||||
return self._app.results.is_markable(self._dupe)
|
||||
|
||||
|
||||
@property
|
||||
def marked(self):
|
||||
return self._app.results.is_marked(self._dupe)
|
||||
|
||||
|
||||
@marked.setter
|
||||
def marked(self, value):
|
||||
self._app.mark_dupe(self._dupe, value)
|
||||
|
||||
|
||||
|
||||
class ResultTable(GUITable, DupeGuruGUIObject):
|
||||
def __init__(self, app):
|
||||
GUITable.__init__(self)
|
||||
DupeGuruGUIObject.__init__(self, app)
|
||||
self.columns = Columns(self, prefaccess=app, savename='ResultTable')
|
||||
self.columns = Columns(self, prefaccess=app, savename="ResultTable")
|
||||
self._power_marker = False
|
||||
self._delta_values = False
|
||||
self._sort_descriptors = ('name', True)
|
||||
|
||||
#--- Override
|
||||
self._sort_descriptors = ("name", True)
|
||||
|
||||
# --- Override
|
||||
def _view_updated(self):
|
||||
self._refresh_with_view()
|
||||
|
||||
|
||||
def _restore_selection(self, previous_selection):
|
||||
if self.app.selected_dupes:
|
||||
to_find = set(self.app.selected_dupes)
|
||||
indexes = [i for i, r in enumerate(self) if r._dupe in to_find]
|
||||
self.selected_indexes = indexes
|
||||
|
||||
|
||||
def _update_selection(self):
|
||||
rows = self.selected_rows
|
||||
self.app._select_dupes(list(map(attrgetter('_dupe'), rows)))
|
||||
|
||||
self.app._select_dupes(list(map(attrgetter("_dupe"), rows)))
|
||||
|
||||
def _fill(self):
|
||||
if not self.power_marker:
|
||||
for group in self.app.results.groups:
|
||||
@ -108,22 +111,22 @@ class ResultTable(GUITable, DupeGuruGUIObject):
|
||||
for dupe in self.app.results.dupes:
|
||||
group = self.app.results.get_group_of_duplicate(dupe)
|
||||
self.append(DupeRow(self, group, dupe))
|
||||
|
||||
|
||||
def _refresh_with_view(self):
|
||||
self.refresh()
|
||||
self.view.show_selected_row()
|
||||
|
||||
#--- Public
|
||||
|
||||
# --- Public
|
||||
def get_row_value(self, index, column):
|
||||
try:
|
||||
row = self[index]
|
||||
except IndexError:
|
||||
return '---'
|
||||
return "---"
|
||||
if self.delta_values:
|
||||
return row.data_delta[column]
|
||||
else:
|
||||
return row.data[column]
|
||||
|
||||
|
||||
def rename_selected(self, newname):
|
||||
row = self.selected_row
|
||||
if row is None:
|
||||
@ -133,7 +136,7 @@ class ResultTable(GUITable, DupeGuruGUIObject):
|
||||
row._data = None
|
||||
row._data_delta = None
|
||||
return self.app.rename_selected(newname)
|
||||
|
||||
|
||||
def sort(self, key, asc):
|
||||
if self.power_marker:
|
||||
self.app.results.sort_dupes(key, asc, self.delta_values)
|
||||
@ -141,12 +144,12 @@ class ResultTable(GUITable, DupeGuruGUIObject):
|
||||
self.app.results.sort_groups(key, asc)
|
||||
self._sort_descriptors = (key, asc)
|
||||
self._refresh_with_view()
|
||||
|
||||
#--- Properties
|
||||
|
||||
# --- Properties
|
||||
@property
|
||||
def power_marker(self):
|
||||
return self._power_marker
|
||||
|
||||
|
||||
@power_marker.setter
|
||||
def power_marker(self, value):
|
||||
if value == self._power_marker:
|
||||
@ -155,29 +158,29 @@ class ResultTable(GUITable, DupeGuruGUIObject):
|
||||
key, asc = self._sort_descriptors
|
||||
self.sort(key, asc)
|
||||
# no need to refresh, it has happened in sort()
|
||||
|
||||
|
||||
@property
|
||||
def delta_values(self):
|
||||
return self._delta_values
|
||||
|
||||
|
||||
@delta_values.setter
|
||||
def delta_values(self, value):
|
||||
if value == self._delta_values:
|
||||
return
|
||||
self._delta_values = value
|
||||
self.refresh()
|
||||
|
||||
|
||||
@property
|
||||
def selected_dupe_count(self):
|
||||
return sum(1 for row in self.selected_rows if not row.isref)
|
||||
|
||||
#--- Event Handlers
|
||||
|
||||
# --- Event Handlers
|
||||
def marking_changed(self):
|
||||
self.view.invalidate_markings()
|
||||
|
||||
|
||||
def results_changed(self):
|
||||
self._refresh_with_view()
|
||||
|
||||
|
||||
def results_changed_but_keep_selection(self):
|
||||
# What we want to to here is that instead of restoring selected *dupes* after refresh, we
|
||||
# restore selected *paths*.
|
||||
@ -185,7 +188,6 @@ class ResultTable(GUITable, DupeGuruGUIObject):
|
||||
self.refresh(refresh_view=False)
|
||||
self.select(indexes)
|
||||
self.view.refresh()
|
||||
|
||||
|
||||
def save_session(self):
|
||||
self.columns.save_columns()
|
||||
|
||||
|
@ -1,21 +1,23 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2010-02-11
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from .base import DupeGuruGUIObject
|
||||
|
||||
|
||||
class StatsLabel(DupeGuruGUIObject):
|
||||
def _view_updated(self):
|
||||
self.view.refresh()
|
||||
|
||||
|
||||
@property
|
||||
def display(self):
|
||||
return self.app.stat_line
|
||||
|
||||
|
||||
def results_changed(self):
|
||||
self.view.refresh()
|
||||
|
||||
marking_changed = results_changed
|
||||
|
@ -10,13 +10,15 @@ from xml.etree import ElementTree as ET
|
||||
|
||||
from hscommon.util import FileOrPath
|
||||
|
||||
|
||||
class IgnoreList:
|
||||
"""An ignore list implementation that is iterable, filterable and exportable to XML.
|
||||
|
||||
Call Ignore to add an ignore list entry, and AreIgnore to check if 2 items are in the list.
|
||||
When iterated, 2 sized tuples will be returned, the tuples containing 2 items ignored together.
|
||||
"""
|
||||
#---Override
|
||||
|
||||
# ---Override
|
||||
def __init__(self):
|
||||
self._ignored = {}
|
||||
self._count = 0
|
||||
@ -29,7 +31,7 @@ class IgnoreList:
|
||||
def __len__(self):
|
||||
return self._count
|
||||
|
||||
#---Public
|
||||
# ---Public
|
||||
def AreIgnored(self, first, second):
|
||||
def do_check(first, second):
|
||||
try:
|
||||
@ -99,14 +101,14 @@ class IgnoreList:
|
||||
root = ET.parse(infile).getroot()
|
||||
except Exception:
|
||||
return
|
||||
file_elems = (e for e in root if e.tag == 'file')
|
||||
file_elems = (e for e in root if e.tag == "file")
|
||||
for fn in file_elems:
|
||||
file_path = fn.get('path')
|
||||
file_path = fn.get("path")
|
||||
if not file_path:
|
||||
continue
|
||||
subfile_elems = (e for e in fn if e.tag == 'file')
|
||||
subfile_elems = (e for e in fn if e.tag == "file")
|
||||
for sfn in subfile_elems:
|
||||
subfile_path = sfn.get('path')
|
||||
subfile_path = sfn.get("path")
|
||||
if subfile_path:
|
||||
self.Ignore(file_path, subfile_path)
|
||||
|
||||
@ -115,15 +117,13 @@ class IgnoreList:
|
||||
|
||||
outfile can be a file object or a filename.
|
||||
"""
|
||||
root = ET.Element('ignore_list')
|
||||
root = ET.Element("ignore_list")
|
||||
for filename, subfiles in self._ignored.items():
|
||||
file_node = ET.SubElement(root, 'file')
|
||||
file_node.set('path', filename)
|
||||
file_node = ET.SubElement(root, "file")
|
||||
file_node.set("path", filename)
|
||||
for subfilename in subfiles:
|
||||
subfile_node = ET.SubElement(file_node, 'file')
|
||||
subfile_node.set('path', subfilename)
|
||||
subfile_node = ET.SubElement(file_node, "file")
|
||||
subfile_node.set("path", subfilename)
|
||||
tree = ET.ElementTree(root)
|
||||
with FileOrPath(outfile, 'wb') as fp:
|
||||
tree.write(fp, encoding='utf-8')
|
||||
|
||||
|
||||
with FileOrPath(outfile, "wb") as fp:
|
||||
tree.write(fp, encoding="utf-8")
|
||||
|
@ -2,40 +2,41 @@
|
||||
# Created On: 2006/02/23
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
|
||||
class Markable:
|
||||
def __init__(self):
|
||||
self.__marked = set()
|
||||
self.__inverted = False
|
||||
|
||||
#---Virtual
|
||||
#About did_mark and did_unmark: They only happen what an object is actually added/removed
|
||||
|
||||
# ---Virtual
|
||||
# About did_mark and did_unmark: They only happen what an object is actually added/removed
|
||||
# in self.__marked, and is not affected by __inverted. Thus, self.mark while __inverted
|
||||
#is True will launch _DidUnmark.
|
||||
# is True will launch _DidUnmark.
|
||||
def _did_mark(self, o):
|
||||
pass
|
||||
|
||||
|
||||
def _did_unmark(self, o):
|
||||
pass
|
||||
|
||||
|
||||
def _get_markable_count(self):
|
||||
return 0
|
||||
|
||||
|
||||
def _is_markable(self, o):
|
||||
return True
|
||||
|
||||
#---Protected
|
||||
|
||||
# ---Protected
|
||||
def _remove_mark_flag(self, o):
|
||||
try:
|
||||
self.__marked.remove(o)
|
||||
self._did_unmark(o)
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
#---Public
|
||||
pass
|
||||
|
||||
# ---Public
|
||||
def is_marked(self, o):
|
||||
if not self._is_markable(o):
|
||||
return False
|
||||
@ -43,31 +44,31 @@ class Markable:
|
||||
if self.__inverted:
|
||||
is_marked = not is_marked
|
||||
return is_marked
|
||||
|
||||
|
||||
def mark(self, o):
|
||||
if self.is_marked(o):
|
||||
return False
|
||||
if not self._is_markable(o):
|
||||
return False
|
||||
return self.mark_toggle(o)
|
||||
|
||||
|
||||
def mark_multiple(self, objects):
|
||||
for o in objects:
|
||||
self.mark(o)
|
||||
|
||||
|
||||
def mark_all(self):
|
||||
self.mark_none()
|
||||
self.__inverted = True
|
||||
|
||||
|
||||
def mark_invert(self):
|
||||
self.__inverted = not self.__inverted
|
||||
|
||||
|
||||
def mark_none(self):
|
||||
for o in self.__marked:
|
||||
self._did_unmark(o)
|
||||
self.__marked = set()
|
||||
self.__inverted = False
|
||||
|
||||
|
||||
def mark_toggle(self, o):
|
||||
try:
|
||||
self.__marked.remove(o)
|
||||
@ -78,32 +79,33 @@ class Markable:
|
||||
self.__marked.add(o)
|
||||
self._did_mark(o)
|
||||
return True
|
||||
|
||||
|
||||
def mark_toggle_multiple(self, objects):
|
||||
for o in objects:
|
||||
self.mark_toggle(o)
|
||||
|
||||
|
||||
def unmark(self, o):
|
||||
if not self.is_marked(o):
|
||||
return False
|
||||
return self.mark_toggle(o)
|
||||
|
||||
|
||||
def unmark_multiple(self, objects):
|
||||
for o in objects:
|
||||
self.unmark(o)
|
||||
|
||||
#--- Properties
|
||||
|
||||
# --- Properties
|
||||
@property
|
||||
def mark_count(self):
|
||||
if self.__inverted:
|
||||
return self._get_markable_count() - len(self.__marked)
|
||||
else:
|
||||
return len(self.__marked)
|
||||
|
||||
|
||||
@property
|
||||
def mark_inverted(self):
|
||||
return self.__inverted
|
||||
|
||||
|
||||
class MarkableList(list, Markable):
|
||||
def __init__(self):
|
||||
list.__init__(self)
|
||||
|
@ -1 +1 @@
|
||||
from . import fs, prioritize, result_table, scanner # noqa
|
||||
from . import fs, prioritize, result_table, scanner # noqa
|
||||
|
@ -13,25 +13,37 @@ from core.util import format_timestamp, format_perc, format_words, format_dupe_c
|
||||
from core import fs
|
||||
|
||||
TAG_FIELDS = {
|
||||
'audiosize', 'duration', 'bitrate', 'samplerate', 'title', 'artist',
|
||||
'album', 'genre', 'year', 'track', 'comment'
|
||||
"audiosize",
|
||||
"duration",
|
||||
"bitrate",
|
||||
"samplerate",
|
||||
"title",
|
||||
"artist",
|
||||
"album",
|
||||
"genre",
|
||||
"year",
|
||||
"track",
|
||||
"comment",
|
||||
}
|
||||
|
||||
|
||||
class MusicFile(fs.File):
|
||||
INITIAL_INFO = fs.File.INITIAL_INFO.copy()
|
||||
INITIAL_INFO.update({
|
||||
'audiosize': 0,
|
||||
'bitrate': 0,
|
||||
'duration': 0,
|
||||
'samplerate': 0,
|
||||
'artist': '',
|
||||
'album': '',
|
||||
'title': '',
|
||||
'genre': '',
|
||||
'comment': '',
|
||||
'year': '',
|
||||
'track': 0,
|
||||
})
|
||||
INITIAL_INFO.update(
|
||||
{
|
||||
"audiosize": 0,
|
||||
"bitrate": 0,
|
||||
"duration": 0,
|
||||
"samplerate": 0,
|
||||
"artist": "",
|
||||
"album": "",
|
||||
"title": "",
|
||||
"genre": "",
|
||||
"comment": "",
|
||||
"year": "",
|
||||
"track": 0,
|
||||
}
|
||||
)
|
||||
__slots__ = fs.File.__slots__ + tuple(INITIAL_INFO.keys())
|
||||
|
||||
@classmethod
|
||||
@ -60,26 +72,26 @@ class MusicFile(fs.File):
|
||||
else:
|
||||
percentage = group.percentage
|
||||
dupe_count = len(group.dupes)
|
||||
dupe_folder_path = getattr(self, 'display_folder_path', self.folder_path)
|
||||
dupe_folder_path = getattr(self, "display_folder_path", self.folder_path)
|
||||
return {
|
||||
'name': self.name,
|
||||
'folder_path': str(dupe_folder_path),
|
||||
'size': format_size(size, 2, 2, False),
|
||||
'duration': format_time(duration, with_hours=False),
|
||||
'bitrate': str(bitrate),
|
||||
'samplerate': str(samplerate),
|
||||
'extension': self.extension,
|
||||
'mtime': format_timestamp(mtime, delta and m),
|
||||
'title': self.title,
|
||||
'artist': self.artist,
|
||||
'album': self.album,
|
||||
'genre': self.genre,
|
||||
'year': self.year,
|
||||
'track': str(self.track),
|
||||
'comment': self.comment,
|
||||
'percentage': format_perc(percentage),
|
||||
'words': format_words(self.words) if hasattr(self, 'words') else '',
|
||||
'dupe_count': format_dupe_count(dupe_count),
|
||||
"name": self.name,
|
||||
"folder_path": str(dupe_folder_path),
|
||||
"size": format_size(size, 2, 2, False),
|
||||
"duration": format_time(duration, with_hours=False),
|
||||
"bitrate": str(bitrate),
|
||||
"samplerate": str(samplerate),
|
||||
"extension": self.extension,
|
||||
"mtime": format_timestamp(mtime, delta and m),
|
||||
"title": self.title,
|
||||
"artist": self.artist,
|
||||
"album": self.album,
|
||||
"genre": self.genre,
|
||||
"year": self.year,
|
||||
"track": str(self.track),
|
||||
"comment": self.comment,
|
||||
"percentage": format_perc(percentage),
|
||||
"words": format_words(self.words) if hasattr(self, "words") else "",
|
||||
"dupe_count": format_dupe_count(dupe_count),
|
||||
}
|
||||
|
||||
def _get_md5partial_offset_and_size(self):
|
||||
@ -101,4 +113,3 @@ class MusicFile(fs.File):
|
||||
self.comment = f.comment
|
||||
self.year = f.year
|
||||
self.track = f.track
|
||||
|
||||
|
@ -8,11 +8,16 @@
|
||||
from hscommon.trans import trget
|
||||
|
||||
from core.prioritize import (
|
||||
KindCategory, FolderCategory, FilenameCategory, NumericalCategory,
|
||||
SizeCategory, MtimeCategory
|
||||
KindCategory,
|
||||
FolderCategory,
|
||||
FilenameCategory,
|
||||
NumericalCategory,
|
||||
SizeCategory,
|
||||
MtimeCategory,
|
||||
)
|
||||
|
||||
coltr = trget('columns')
|
||||
coltr = trget("columns")
|
||||
|
||||
|
||||
class DurationCategory(NumericalCategory):
|
||||
NAME = coltr("Duration")
|
||||
@ -20,21 +25,29 @@ class DurationCategory(NumericalCategory):
|
||||
def extract_value(self, dupe):
|
||||
return dupe.duration
|
||||
|
||||
|
||||
class BitrateCategory(NumericalCategory):
|
||||
NAME = coltr("Bitrate")
|
||||
|
||||
def extract_value(self, dupe):
|
||||
return dupe.bitrate
|
||||
|
||||
|
||||
class SamplerateCategory(NumericalCategory):
|
||||
NAME = coltr("Samplerate")
|
||||
|
||||
def extract_value(self, dupe):
|
||||
return dupe.samplerate
|
||||
|
||||
|
||||
def all_categories():
|
||||
return [
|
||||
KindCategory, FolderCategory, FilenameCategory, SizeCategory, DurationCategory,
|
||||
BitrateCategory, SamplerateCategory, MtimeCategory
|
||||
KindCategory,
|
||||
FolderCategory,
|
||||
FilenameCategory,
|
||||
SizeCategory,
|
||||
DurationCategory,
|
||||
BitrateCategory,
|
||||
SamplerateCategory,
|
||||
MtimeCategory,
|
||||
]
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
# Created On: 2011-11-27
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from hscommon.gui.column import Column
|
||||
@ -10,28 +10,29 @@ from hscommon.trans import trget
|
||||
|
||||
from core.gui.result_table import ResultTable as ResultTableBase
|
||||
|
||||
coltr = trget('columns')
|
||||
coltr = trget("columns")
|
||||
|
||||
|
||||
class ResultTable(ResultTableBase):
|
||||
COLUMNS = [
|
||||
Column('marked', ''),
|
||||
Column('name', coltr("Filename")),
|
||||
Column('folder_path', coltr("Folder"), visible=False, optional=True),
|
||||
Column('size', coltr("Size (MB)"), optional=True),
|
||||
Column('duration', coltr("Time"), optional=True),
|
||||
Column('bitrate', coltr("Bitrate"), optional=True),
|
||||
Column('samplerate', coltr("Sample Rate"), visible=False, optional=True),
|
||||
Column('extension', coltr("Kind"), optional=True),
|
||||
Column('mtime', coltr("Modification"), visible=False, optional=True),
|
||||
Column('title', coltr("Title"), visible=False, optional=True),
|
||||
Column('artist', coltr("Artist"), visible=False, optional=True),
|
||||
Column('album', coltr("Album"), visible=False, optional=True),
|
||||
Column('genre', coltr("Genre"), visible=False, optional=True),
|
||||
Column('year', coltr("Year"), visible=False, optional=True),
|
||||
Column('track', coltr("Track Number"), visible=False, optional=True),
|
||||
Column('comment', coltr("Comment"), visible=False, optional=True),
|
||||
Column('percentage', coltr("Match %"), optional=True),
|
||||
Column('words', coltr("Words Used"), visible=False, optional=True),
|
||||
Column('dupe_count', coltr("Dupe Count"), visible=False, optional=True),
|
||||
Column("marked", ""),
|
||||
Column("name", coltr("Filename")),
|
||||
Column("folder_path", coltr("Folder"), visible=False, optional=True),
|
||||
Column("size", coltr("Size (MB)"), optional=True),
|
||||
Column("duration", coltr("Time"), optional=True),
|
||||
Column("bitrate", coltr("Bitrate"), optional=True),
|
||||
Column("samplerate", coltr("Sample Rate"), visible=False, optional=True),
|
||||
Column("extension", coltr("Kind"), optional=True),
|
||||
Column("mtime", coltr("Modification"), visible=False, optional=True),
|
||||
Column("title", coltr("Title"), visible=False, optional=True),
|
||||
Column("artist", coltr("Artist"), visible=False, optional=True),
|
||||
Column("album", coltr("Album"), visible=False, optional=True),
|
||||
Column("genre", coltr("Genre"), visible=False, optional=True),
|
||||
Column("year", coltr("Year"), visible=False, optional=True),
|
||||
Column("track", coltr("Track Number"), visible=False, optional=True),
|
||||
Column("comment", coltr("Comment"), visible=False, optional=True),
|
||||
Column("percentage", coltr("Match %"), optional=True),
|
||||
Column("words", coltr("Words Used"), visible=False, optional=True),
|
||||
Column("dupe_count", coltr("Dupe Count"), visible=False, optional=True),
|
||||
]
|
||||
DELTA_COLUMNS = {'size', 'duration', 'bitrate', 'samplerate', 'mtime'}
|
||||
DELTA_COLUMNS = {"size", "duration", "bitrate", "samplerate", "mtime"}
|
||||
|
@ -8,6 +8,7 @@ from hscommon.trans import tr
|
||||
|
||||
from core.scanner import Scanner as ScannerBase, ScanOption, ScanType
|
||||
|
||||
|
||||
class ScannerME(ScannerBase):
|
||||
@staticmethod
|
||||
def _key_func(dupe):
|
||||
@ -22,5 +23,3 @@ class ScannerME(ScannerBase):
|
||||
ScanOption(ScanType.Tag, tr("Tags")),
|
||||
ScanOption(ScanType.Contents, tr("Contents")),
|
||||
]
|
||||
|
||||
|
||||
|
@ -1 +1,12 @@
|
||||
from . import block, cache, exif, iphoto_plist, matchblock, matchexif, photo, prioritize, result_table, scanner # noqa
|
||||
from . import ( # noqa
|
||||
block,
|
||||
cache,
|
||||
exif,
|
||||
iphoto_plist,
|
||||
matchblock,
|
||||
matchexif,
|
||||
photo,
|
||||
prioritize,
|
||||
result_table,
|
||||
scanner,
|
||||
)
|
||||
|
@ -6,7 +6,7 @@
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from ._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2 # NOQA
|
||||
from ._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2 # NOQA
|
||||
|
||||
# Converted to C
|
||||
# def getblock(image):
|
||||
|
@ -4,7 +4,8 @@
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from ._cache import string_to_colors # noqa
|
||||
from ._cache import string_to_colors # noqa
|
||||
|
||||
|
||||
def colors_to_string(colors):
|
||||
"""Transform the 3 sized tuples 'colors' into a hex string.
|
||||
@ -12,7 +13,8 @@ def colors_to_string(colors):
|
||||
[(0,100,255)] --> 0064ff
|
||||
[(1,2,3),(4,5,6)] --> 010203040506
|
||||
"""
|
||||
return ''.join('%02x%02x%02x' % (r, g, b) for r, g, b in colors)
|
||||
return "".join("%02x%02x%02x" % (r, g, b) for r, g, b in colors)
|
||||
|
||||
|
||||
# This function is an important bottleneck of dupeGuru PE. It has been converted to C.
|
||||
# def string_to_colors(s):
|
||||
@ -23,4 +25,3 @@ def colors_to_string(colors):
|
||||
# number = int(s[i:i+6], 16)
|
||||
# result.append((number >> 16, (number >> 8) & 0xff, number & 0xff))
|
||||
# return result
|
||||
|
||||
|
@ -12,29 +12,36 @@ from collections import namedtuple
|
||||
|
||||
from .cache import string_to_colors, colors_to_string
|
||||
|
||||
|
||||
def wrap_path(path):
|
||||
return 'path:{}'.format(path)
|
||||
return "path:{}".format(path)
|
||||
|
||||
|
||||
def unwrap_path(key):
|
||||
return key[5:]
|
||||
|
||||
|
||||
def wrap_id(path):
|
||||
return 'id:{}'.format(path)
|
||||
return "id:{}".format(path)
|
||||
|
||||
|
||||
def unwrap_id(key):
|
||||
return int(key[3:])
|
||||
|
||||
CacheRow = namedtuple('CacheRow', 'id path blocks mtime')
|
||||
|
||||
CacheRow = namedtuple("CacheRow", "id path blocks mtime")
|
||||
|
||||
|
||||
class ShelveCache:
|
||||
"""A class to cache picture blocks in a shelve backend.
|
||||
"""
|
||||
|
||||
def __init__(self, db=None, readonly=False):
|
||||
self.istmp = db is None
|
||||
if self.istmp:
|
||||
self.dtmp = tempfile.mkdtemp()
|
||||
self.ftmp = db = op.join(self.dtmp, 'tmpdb')
|
||||
flag = 'r' if readonly else 'c'
|
||||
self.ftmp = db = op.join(self.dtmp, "tmpdb")
|
||||
flag = "r" if readonly else "c"
|
||||
self.shelve = shelve.open(db, flag)
|
||||
self.maxid = self._compute_maxid()
|
||||
|
||||
@ -54,10 +61,10 @@ class ShelveCache:
|
||||
return string_to_colors(self.shelve[skey].blocks)
|
||||
|
||||
def __iter__(self):
|
||||
return (unwrap_path(k) for k in self.shelve if k.startswith('path:'))
|
||||
return (unwrap_path(k) for k in self.shelve if k.startswith("path:"))
|
||||
|
||||
def __len__(self):
|
||||
return sum(1 for k in self.shelve if k.startswith('path:'))
|
||||
return sum(1 for k in self.shelve if k.startswith("path:"))
|
||||
|
||||
def __setitem__(self, path_str, blocks):
|
||||
blocks = colors_to_string(blocks)
|
||||
@ -74,7 +81,9 @@ class ShelveCache:
|
||||
self.shelve[wrap_id(rowid)] = wrap_path(path_str)
|
||||
|
||||
def _compute_maxid(self):
|
||||
return max((unwrap_id(k) for k in self.shelve if k.startswith('id:')), default=1)
|
||||
return max(
|
||||
(unwrap_id(k) for k in self.shelve if k.startswith("id:")), default=1
|
||||
)
|
||||
|
||||
def _get_new_id(self):
|
||||
self.maxid += 1
|
||||
@ -133,4 +142,3 @@ class ShelveCache:
|
||||
# #402 and #439. I don't think it hurts to silently ignore the error, so that's
|
||||
# what we do
|
||||
pass
|
||||
|
||||
|
@ -11,10 +11,12 @@ import sqlite3 as sqlite
|
||||
|
||||
from .cache import string_to_colors, colors_to_string
|
||||
|
||||
|
||||
class SqliteCache:
|
||||
"""A class to cache picture blocks in a sqlite backend.
|
||||
"""
|
||||
def __init__(self, db=':memory:', readonly=False):
|
||||
|
||||
def __init__(self, db=":memory:", readonly=False):
|
||||
# readonly is not used in the sqlite version of the cache
|
||||
self.dbname = db
|
||||
self.con = None
|
||||
@ -67,34 +69,40 @@ class SqliteCache:
|
||||
try:
|
||||
self.con.execute(sql, [blocks, mtime, path_str])
|
||||
except sqlite.OperationalError:
|
||||
logging.warning('Picture cache could not set value for key %r', path_str)
|
||||
logging.warning("Picture cache could not set value for key %r", path_str)
|
||||
except sqlite.DatabaseError as e:
|
||||
logging.warning('DatabaseError while setting value for key %r: %s', path_str, str(e))
|
||||
logging.warning(
|
||||
"DatabaseError while setting value for key %r: %s", path_str, str(e)
|
||||
)
|
||||
|
||||
def _create_con(self, second_try=False):
|
||||
def create_tables():
|
||||
logging.debug("Creating picture cache tables.")
|
||||
self.con.execute("drop table if exists pictures")
|
||||
self.con.execute("drop index if exists idx_path")
|
||||
self.con.execute("create table pictures(path TEXT, mtime INTEGER, blocks TEXT)")
|
||||
self.con.execute(
|
||||
"create table pictures(path TEXT, mtime INTEGER, blocks TEXT)"
|
||||
)
|
||||
self.con.execute("create index idx_path on pictures (path)")
|
||||
|
||||
self.con = sqlite.connect(self.dbname, isolation_level=None)
|
||||
try:
|
||||
self.con.execute("select path, mtime, blocks from pictures where 1=2")
|
||||
except sqlite.OperationalError: # new db
|
||||
except sqlite.OperationalError: # new db
|
||||
create_tables()
|
||||
except sqlite.DatabaseError as e: # corrupted db
|
||||
except sqlite.DatabaseError as e: # corrupted db
|
||||
if second_try:
|
||||
raise # Something really strange is happening
|
||||
logging.warning('Could not create picture cache because of an error: %s', str(e))
|
||||
raise # Something really strange is happening
|
||||
logging.warning(
|
||||
"Could not create picture cache because of an error: %s", str(e)
|
||||
)
|
||||
self.con.close()
|
||||
os.remove(self.dbname)
|
||||
self._create_con(second_try=True)
|
||||
|
||||
def clear(self):
|
||||
self.close()
|
||||
if self.dbname != ':memory:':
|
||||
if self.dbname != ":memory:":
|
||||
os.remove(self.dbname)
|
||||
self._create_con()
|
||||
|
||||
@ -117,7 +125,9 @@ class SqliteCache:
|
||||
raise ValueError(path)
|
||||
|
||||
def get_multiple(self, rowids):
|
||||
sql = "select rowid, blocks from pictures where rowid in (%s)" % ','.join(map(str, rowids))
|
||||
sql = "select rowid, blocks from pictures where rowid in (%s)" % ",".join(
|
||||
map(str, rowids)
|
||||
)
|
||||
cur = self.con.execute(sql)
|
||||
return ((rowid, string_to_colors(blocks)) for rowid, blocks in cur)
|
||||
|
||||
@ -138,6 +148,7 @@ class SqliteCache:
|
||||
continue
|
||||
todelete.append(rowid)
|
||||
if todelete:
|
||||
sql = "delete from pictures where rowid in (%s)" % ','.join(map(str, todelete))
|
||||
sql = "delete from pictures where rowid in (%s)" % ",".join(
|
||||
map(str, todelete)
|
||||
)
|
||||
self.con.execute(sql)
|
||||
|
||||
|
@ -83,17 +83,17 @@ EXIF_TAGS = {
|
||||
0xA003: "PixelYDimension",
|
||||
0xA004: "RelatedSoundFile",
|
||||
0xA005: "InteroperabilityIFDPointer",
|
||||
0xA20B: "FlashEnergy", # 0x920B in TIFF/EP
|
||||
0xA20C: "SpatialFrequencyResponse", # 0x920C - -
|
||||
0xA20E: "FocalPlaneXResolution", # 0x920E - -
|
||||
0xA20F: "FocalPlaneYResolution", # 0x920F - -
|
||||
0xA210: "FocalPlaneResolutionUnit", # 0x9210 - -
|
||||
0xA214: "SubjectLocation", # 0x9214 - -
|
||||
0xA215: "ExposureIndex", # 0x9215 - -
|
||||
0xA217: "SensingMethod", # 0x9217 - -
|
||||
0xA20B: "FlashEnergy", # 0x920B in TIFF/EP
|
||||
0xA20C: "SpatialFrequencyResponse", # 0x920C - -
|
||||
0xA20E: "FocalPlaneXResolution", # 0x920E - -
|
||||
0xA20F: "FocalPlaneYResolution", # 0x920F - -
|
||||
0xA210: "FocalPlaneResolutionUnit", # 0x9210 - -
|
||||
0xA214: "SubjectLocation", # 0x9214 - -
|
||||
0xA215: "ExposureIndex", # 0x9215 - -
|
||||
0xA217: "SensingMethod", # 0x9217 - -
|
||||
0xA300: "FileSource",
|
||||
0xA301: "SceneType",
|
||||
0xA302: "CFAPattern", # 0x828E in TIFF/EP
|
||||
0xA302: "CFAPattern", # 0x828E in TIFF/EP
|
||||
0xA401: "CustomRendered",
|
||||
0xA402: "ExposureMode",
|
||||
0xA403: "WhiteBalance",
|
||||
@ -148,17 +148,18 @@ GPS_TA0GS = {
|
||||
0x1B: "GPSProcessingMethod",
|
||||
0x1C: "GPSAreaInformation",
|
||||
0x1D: "GPSDateStamp",
|
||||
0x1E: "GPSDifferential"
|
||||
0x1E: "GPSDifferential",
|
||||
}
|
||||
|
||||
INTEL_ENDIAN = ord('I')
|
||||
MOTOROLA_ENDIAN = ord('M')
|
||||
INTEL_ENDIAN = ord("I")
|
||||
MOTOROLA_ENDIAN = ord("M")
|
||||
|
||||
# About MAX_COUNT: It's possible to have corrupted exif tags where the entry count is way too high
|
||||
# and thus makes us loop, not endlessly, but for heck of a long time for nothing. Therefore, we put
|
||||
# an arbitrary limit on the entry count we'll allow ourselves to read and any IFD reporting more
|
||||
# entries than that will be considered corrupt.
|
||||
MAX_COUNT = 0xffff
|
||||
MAX_COUNT = 0xFFFF
|
||||
|
||||
|
||||
def s2n_motorola(bytes):
|
||||
x = 0
|
||||
@ -166,6 +167,7 @@ def s2n_motorola(bytes):
|
||||
x = (x << 8) | c
|
||||
return x
|
||||
|
||||
|
||||
def s2n_intel(bytes):
|
||||
x = 0
|
||||
y = 0
|
||||
@ -174,13 +176,14 @@ def s2n_intel(bytes):
|
||||
y = y + 8
|
||||
return x
|
||||
|
||||
|
||||
class Fraction:
|
||||
def __init__(self, num, den):
|
||||
self.num = num
|
||||
self.den = den
|
||||
|
||||
def __repr__(self):
|
||||
return '%d/%d' % (self.num, self.den)
|
||||
return "%d/%d" % (self.num, self.den)
|
||||
|
||||
|
||||
class TIFF_file:
|
||||
@ -190,16 +193,22 @@ class TIFF_file:
|
||||
self.s2nfunc = s2n_intel if self.endian == INTEL_ENDIAN else s2n_motorola
|
||||
|
||||
def s2n(self, offset, length, signed=0, debug=False):
|
||||
slice = self.data[offset:offset+length]
|
||||
slice = self.data[offset : offset + length]
|
||||
val = self.s2nfunc(slice)
|
||||
# Sign extension ?
|
||||
if signed:
|
||||
msb = 1 << (8*length - 1)
|
||||
msb = 1 << (8 * length - 1)
|
||||
if val & msb:
|
||||
val = val - (msb << 1)
|
||||
if debug:
|
||||
logging.debug(self.endian)
|
||||
logging.debug("Slice for offset %d length %d: %r and value: %d", offset, length, slice, val)
|
||||
logging.debug(
|
||||
"Slice for offset %d length %d: %r and value: %d",
|
||||
offset,
|
||||
length,
|
||||
slice,
|
||||
val,
|
||||
)
|
||||
return val
|
||||
|
||||
def first_IFD(self):
|
||||
@ -225,30 +234,31 @@ class TIFF_file:
|
||||
return []
|
||||
a = []
|
||||
for i in range(entries):
|
||||
entry = ifd + 2 + 12*i
|
||||
entry = ifd + 2 + 12 * i
|
||||
tag = self.s2n(entry, 2)
|
||||
type = self.s2n(entry+2, 2)
|
||||
type = self.s2n(entry + 2, 2)
|
||||
if not 1 <= type <= 10:
|
||||
continue # not handled
|
||||
typelen = [1, 1, 2, 4, 8, 1, 1, 2, 4, 8][type-1]
|
||||
count = self.s2n(entry+4, 4)
|
||||
continue # not handled
|
||||
typelen = [1, 1, 2, 4, 8, 1, 1, 2, 4, 8][type - 1]
|
||||
count = self.s2n(entry + 4, 4)
|
||||
if count > MAX_COUNT:
|
||||
logging.debug("Probably corrupt. Aborting.")
|
||||
return []
|
||||
offset = entry+8
|
||||
if count*typelen > 4:
|
||||
offset = entry + 8
|
||||
if count * typelen > 4:
|
||||
offset = self.s2n(offset, 4)
|
||||
if type == 2:
|
||||
# Special case: nul-terminated ASCII string
|
||||
values = str(self.data[offset:offset+count-1], encoding='latin-1')
|
||||
values = str(self.data[offset : offset + count - 1], encoding="latin-1")
|
||||
else:
|
||||
values = []
|
||||
signed = (type == 6 or type >= 8)
|
||||
signed = type == 6 or type >= 8
|
||||
for j in range(count):
|
||||
if type in {5, 10}:
|
||||
# The type is either 5 or 10
|
||||
value_j = Fraction(self.s2n(offset, 4, signed),
|
||||
self.s2n(offset+4, 4, signed))
|
||||
value_j = Fraction(
|
||||
self.s2n(offset, 4, signed), self.s2n(offset + 4, 4, signed)
|
||||
)
|
||||
else:
|
||||
# Not a fraction
|
||||
value_j = self.s2n(offset, typelen, signed)
|
||||
@ -258,32 +268,37 @@ class TIFF_file:
|
||||
a.append((tag, type, values))
|
||||
return a
|
||||
|
||||
|
||||
def read_exif_header(fp):
|
||||
# If `fp`'s first bytes are not exif, it tries to find it in the next 4kb
|
||||
def isexif(data):
|
||||
return data[0:4] == b'\377\330\377\341' and data[6:10] == b'Exif'
|
||||
return data[0:4] == b"\377\330\377\341" and data[6:10] == b"Exif"
|
||||
|
||||
data = fp.read(12)
|
||||
if isexif(data):
|
||||
return data
|
||||
# ok, not exif, try to find it
|
||||
large_data = fp.read(4096)
|
||||
try:
|
||||
index = large_data.index(b'Exif')
|
||||
data = large_data[index-6:index+6]
|
||||
index = large_data.index(b"Exif")
|
||||
data = large_data[index - 6 : index + 6]
|
||||
# large_data omits the first 12 bytes, and the index is at the middle of the header, so we
|
||||
# must seek index + 18
|
||||
fp.seek(index+18)
|
||||
fp.seek(index + 18)
|
||||
return data
|
||||
except ValueError:
|
||||
raise ValueError("Not an Exif file")
|
||||
|
||||
|
||||
def get_fields(fp):
|
||||
data = read_exif_header(fp)
|
||||
length = data[4] * 256 + data[5]
|
||||
logging.debug("Exif header length: %d bytes", length)
|
||||
data = fp.read(length-8)
|
||||
data = fp.read(length - 8)
|
||||
data_format = data[0]
|
||||
logging.debug("%s format", {INTEL_ENDIAN: 'Intel', MOTOROLA_ENDIAN: 'Motorola'}[data_format])
|
||||
logging.debug(
|
||||
"%s format", {INTEL_ENDIAN: "Intel", MOTOROLA_ENDIAN: "Motorola"}[data_format]
|
||||
)
|
||||
T = TIFF_file(data)
|
||||
# There may be more than one IFD per file, but we only read the first one because others are
|
||||
# most likely thumbnails.
|
||||
@ -294,9 +309,9 @@ def get_fields(fp):
|
||||
try:
|
||||
stag = EXIF_TAGS[tag]
|
||||
except KeyError:
|
||||
stag = '0x%04X' % tag
|
||||
stag = "0x%04X" % tag
|
||||
if stag in result:
|
||||
return # don't overwrite data
|
||||
return # don't overwrite data
|
||||
result[stag] = values
|
||||
|
||||
logging.debug("IFD at offset %d", main_IFD_offset)
|
||||
|
@ -1,24 +1,26 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2014-03-15
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
import plistlib
|
||||
|
||||
|
||||
class IPhotoPlistParser(plistlib._PlistParser):
|
||||
"""A parser for iPhoto plists.
|
||||
|
||||
iPhoto plists tend to be malformed, so we have to subclass the built-in parser to be a bit more
|
||||
lenient.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
plistlib._PlistParser.__init__(self, use_builtin_types=True, dict_type=dict)
|
||||
# For debugging purposes, we remember the last bit of data to be analyzed so that we can
|
||||
# log it in case of an exception
|
||||
self.lastdata = ''
|
||||
self.lastdata = ""
|
||||
|
||||
def get_data(self):
|
||||
self.lastdata = plistlib._PlistParser.get_data(self)
|
||||
|
@ -48,14 +48,18 @@ except Exception:
|
||||
logging.warning("Had problems to determine cpu count on launch.")
|
||||
RESULTS_QUEUE_LIMIT = 8
|
||||
|
||||
|
||||
def get_cache(cache_path, readonly=False):
|
||||
if cache_path.endswith('shelve'):
|
||||
if cache_path.endswith("shelve"):
|
||||
from .cache_shelve import ShelveCache
|
||||
|
||||
return ShelveCache(cache_path, readonly=readonly)
|
||||
else:
|
||||
from .cache_sqlite import SqliteCache
|
||||
|
||||
return SqliteCache(cache_path, readonly=readonly)
|
||||
|
||||
|
||||
def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
|
||||
# The MemoryError handlers in there use logging without first caring about whether or not
|
||||
# there is enough memory left to carry on the operation because it is assumed that the
|
||||
@ -63,7 +67,7 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
|
||||
# time that MemoryError is raised.
|
||||
cache = get_cache(cache_path)
|
||||
cache.purge_outdated()
|
||||
prepared = [] # only pictures for which there was no error getting blocks
|
||||
prepared = [] # only pictures for which there was no error getting blocks
|
||||
try:
|
||||
for picture in j.iter_with_progress(pictures, tr("Analyzed %d/%d pictures")):
|
||||
if not picture.path:
|
||||
@ -77,7 +81,7 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
|
||||
picture.unicode_path = str(picture.path)
|
||||
logging.debug("Analyzing picture at %s", picture.unicode_path)
|
||||
if with_dimensions:
|
||||
picture.dimensions # pre-read dimensions
|
||||
picture.dimensions # pre-read dimensions
|
||||
try:
|
||||
if picture.unicode_path not in cache:
|
||||
blocks = picture.get_blocks(BLOCK_COUNT_PER_SIDE)
|
||||
@ -86,32 +90,45 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
|
||||
except (IOError, ValueError) as e:
|
||||
logging.warning(str(e))
|
||||
except MemoryError:
|
||||
logging.warning("Ran out of memory while reading %s of size %d", picture.unicode_path, picture.size)
|
||||
if picture.size < 10 * 1024 * 1024: # We're really running out of memory
|
||||
logging.warning(
|
||||
"Ran out of memory while reading %s of size %d",
|
||||
picture.unicode_path,
|
||||
picture.size,
|
||||
)
|
||||
if (
|
||||
picture.size < 10 * 1024 * 1024
|
||||
): # We're really running out of memory
|
||||
raise
|
||||
except MemoryError:
|
||||
logging.warning('Ran out of memory while preparing pictures')
|
||||
logging.warning("Ran out of memory while preparing pictures")
|
||||
cache.close()
|
||||
return prepared
|
||||
|
||||
|
||||
def get_chunks(pictures):
|
||||
min_chunk_count = multiprocessing.cpu_count() * 2 # have enough chunks to feed all subprocesses
|
||||
min_chunk_count = (
|
||||
multiprocessing.cpu_count() * 2
|
||||
) # have enough chunks to feed all subprocesses
|
||||
chunk_count = len(pictures) // DEFAULT_CHUNK_SIZE
|
||||
chunk_count = max(min_chunk_count, chunk_count)
|
||||
chunk_size = (len(pictures) // chunk_count) + 1
|
||||
chunk_size = max(MIN_CHUNK_SIZE, chunk_size)
|
||||
logging.info(
|
||||
"Creating %d chunks with a chunk size of %d for %d pictures", chunk_count,
|
||||
chunk_size, len(pictures)
|
||||
"Creating %d chunks with a chunk size of %d for %d pictures",
|
||||
chunk_count,
|
||||
chunk_size,
|
||||
len(pictures),
|
||||
)
|
||||
chunks = [pictures[i:i+chunk_size] for i in range(0, len(pictures), chunk_size)]
|
||||
chunks = [pictures[i : i + chunk_size] for i in range(0, len(pictures), chunk_size)]
|
||||
return chunks
|
||||
|
||||
|
||||
def get_match(first, second, percentage):
|
||||
if percentage < 0:
|
||||
percentage = 0
|
||||
return Match(first, second, percentage)
|
||||
|
||||
|
||||
def async_compare(ref_ids, other_ids, dbname, threshold, picinfo):
|
||||
# The list of ids in ref_ids have to be compared to the list of ids in other_ids. other_ids
|
||||
# can be None. In this case, ref_ids has to be compared with itself
|
||||
@ -142,6 +159,7 @@ def async_compare(ref_ids, other_ids, dbname, threshold, picinfo):
|
||||
cache.close()
|
||||
return results
|
||||
|
||||
|
||||
def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljob):
|
||||
def get_picinfo(p):
|
||||
if match_scaled:
|
||||
@ -160,11 +178,16 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
|
||||
async_results.remove(result)
|
||||
comparison_count += 1
|
||||
# About the NOQA below: I think there's a bug in pyflakes. To investigate...
|
||||
progress_msg = tr("Performed %d/%d chunk matches") % (comparison_count, len(comparisons_to_do)) # NOQA
|
||||
progress_msg = tr("Performed %d/%d chunk matches") % (
|
||||
comparison_count,
|
||||
len(comparisons_to_do),
|
||||
) # NOQA
|
||||
j.set_progress(comparison_count, progress_msg)
|
||||
|
||||
j = j.start_subjob([3, 7])
|
||||
pictures = prepare_pictures(pictures, cache_path, with_dimensions=not match_scaled, j=j)
|
||||
pictures = prepare_pictures(
|
||||
pictures, cache_path, with_dimensions=not match_scaled, j=j
|
||||
)
|
||||
j = j.start_subjob([9, 1], tr("Preparing for matching"))
|
||||
cache = get_cache(cache_path)
|
||||
id2picture = {}
|
||||
@ -175,7 +198,7 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
|
||||
except ValueError:
|
||||
pass
|
||||
cache.close()
|
||||
pictures = [p for p in pictures if hasattr(p, 'cache_id')]
|
||||
pictures = [p for p in pictures if hasattr(p, "cache_id")]
|
||||
pool = multiprocessing.Pool()
|
||||
async_results = []
|
||||
matches = []
|
||||
@ -203,9 +226,17 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
|
||||
# some wiggle room, log about the incident, and stop matching right here. We then process
|
||||
# the matches we have. The rest of the process doesn't allocate much and we should be
|
||||
# alright.
|
||||
del comparisons_to_do, chunks, pictures # some wiggle room for the next statements
|
||||
logging.warning("Ran out of memory when scanning! We had %d matches.", len(matches))
|
||||
del matches[-len(matches)//3:] # some wiggle room to ensure we don't run out of memory again.
|
||||
del (
|
||||
comparisons_to_do,
|
||||
chunks,
|
||||
pictures,
|
||||
) # some wiggle room for the next statements
|
||||
logging.warning(
|
||||
"Ran out of memory when scanning! We had %d matches.", len(matches)
|
||||
)
|
||||
del matches[
|
||||
-len(matches) // 3 :
|
||||
] # some wiggle room to ensure we don't run out of memory again.
|
||||
pool.close()
|
||||
result = []
|
||||
myiter = j.iter_with_progress(
|
||||
@ -220,10 +251,10 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
|
||||
if percentage == 100 and ref.md5 != other.md5:
|
||||
percentage = 99
|
||||
if percentage >= threshold:
|
||||
ref.dimensions # pre-read dimensions for display in results
|
||||
ref.dimensions # pre-read dimensions for display in results
|
||||
other.dimensions
|
||||
result.append(get_match(ref, other, percentage))
|
||||
return result
|
||||
|
||||
multiprocessing.freeze_support()
|
||||
|
||||
multiprocessing.freeze_support()
|
||||
|
@ -13,14 +13,15 @@ from hscommon.trans import tr
|
||||
|
||||
from core.engine import Match
|
||||
|
||||
|
||||
def getmatches(files, match_scaled, j):
|
||||
timestamp2pic = defaultdict(set)
|
||||
for picture in j.iter_with_progress(files, tr("Read EXIF of %d/%d pictures")):
|
||||
timestamp = picture.exif_timestamp
|
||||
if timestamp:
|
||||
timestamp2pic[timestamp].add(picture)
|
||||
if '0000:00:00 00:00:00' in timestamp2pic: # very likely false matches
|
||||
del timestamp2pic['0000:00:00 00:00:00']
|
||||
if "0000:00:00 00:00:00" in timestamp2pic: # very likely false matches
|
||||
del timestamp2pic["0000:00:00 00:00:00"]
|
||||
matches = []
|
||||
for pictures in timestamp2pic.values():
|
||||
for p1, p2 in combinations(pictures, 2):
|
||||
@ -28,4 +29,3 @@ def getmatches(files, match_scaled, j):
|
||||
continue
|
||||
matches.append(Match(p1, p2, 100))
|
||||
return matches
|
||||
|
||||
|
@ -14,23 +14,22 @@ from . import exif
|
||||
# This global value is set by the platform-specific subclasser of the Photo base class
|
||||
PLAT_SPECIFIC_PHOTO_CLASS = None
|
||||
|
||||
|
||||
def format_dimensions(dimensions):
|
||||
return '%d x %d' % (dimensions[0], dimensions[1])
|
||||
return "%d x %d" % (dimensions[0], dimensions[1])
|
||||
|
||||
|
||||
def get_delta_dimensions(value, ref_value):
|
||||
return (value[0]-ref_value[0], value[1]-ref_value[1])
|
||||
return (value[0] - ref_value[0], value[1] - ref_value[1])
|
||||
|
||||
|
||||
class Photo(fs.File):
|
||||
INITIAL_INFO = fs.File.INITIAL_INFO.copy()
|
||||
INITIAL_INFO.update({
|
||||
'dimensions': (0, 0),
|
||||
'exif_timestamp': '',
|
||||
})
|
||||
INITIAL_INFO.update({"dimensions": (0, 0), "exif_timestamp": ""})
|
||||
__slots__ = fs.File.__slots__ + tuple(INITIAL_INFO.keys())
|
||||
|
||||
# These extensions are supported on all platforms
|
||||
HANDLED_EXTS = {'png', 'jpg', 'jpeg', 'gif', 'bmp', 'tiff', 'tif'}
|
||||
HANDLED_EXTS = {"png", "jpg", "jpeg", "gif", "bmp", "tiff", "tif"}
|
||||
|
||||
def _plat_get_dimensions(self):
|
||||
raise NotImplementedError()
|
||||
@ -39,25 +38,25 @@ class Photo(fs.File):
|
||||
raise NotImplementedError()
|
||||
|
||||
def _get_orientation(self):
|
||||
if not hasattr(self, '_cached_orientation'):
|
||||
if not hasattr(self, "_cached_orientation"):
|
||||
try:
|
||||
with self.path.open('rb') as fp:
|
||||
with self.path.open("rb") as fp:
|
||||
exifdata = exif.get_fields(fp)
|
||||
# the value is a list (probably one-sized) of ints
|
||||
orientations = exifdata['Orientation']
|
||||
orientations = exifdata["Orientation"]
|
||||
self._cached_orientation = orientations[0]
|
||||
except Exception: # Couldn't read EXIF data, no transforms
|
||||
except Exception: # Couldn't read EXIF data, no transforms
|
||||
self._cached_orientation = 0
|
||||
return self._cached_orientation
|
||||
|
||||
def _get_exif_timestamp(self):
|
||||
try:
|
||||
with self.path.open('rb') as fp:
|
||||
with self.path.open("rb") as fp:
|
||||
exifdata = exif.get_fields(fp)
|
||||
return exifdata['DateTimeOriginal']
|
||||
return exifdata["DateTimeOriginal"]
|
||||
except Exception:
|
||||
logging.info("Couldn't read EXIF of picture: %s", self.path)
|
||||
return ''
|
||||
return ""
|
||||
|
||||
@classmethod
|
||||
def can_handle(cls, path):
|
||||
@ -79,28 +78,27 @@ class Photo(fs.File):
|
||||
else:
|
||||
percentage = group.percentage
|
||||
dupe_count = len(group.dupes)
|
||||
dupe_folder_path = getattr(self, 'display_folder_path', self.folder_path)
|
||||
dupe_folder_path = getattr(self, "display_folder_path", self.folder_path)
|
||||
return {
|
||||
'name': self.name,
|
||||
'folder_path': str(dupe_folder_path),
|
||||
'size': format_size(size, 0, 1, False),
|
||||
'extension': self.extension,
|
||||
'dimensions': format_dimensions(dimensions),
|
||||
'exif_timestamp': self.exif_timestamp,
|
||||
'mtime': format_timestamp(mtime, delta and m),
|
||||
'percentage': format_perc(percentage),
|
||||
'dupe_count': format_dupe_count(dupe_count),
|
||||
"name": self.name,
|
||||
"folder_path": str(dupe_folder_path),
|
||||
"size": format_size(size, 0, 1, False),
|
||||
"extension": self.extension,
|
||||
"dimensions": format_dimensions(dimensions),
|
||||
"exif_timestamp": self.exif_timestamp,
|
||||
"mtime": format_timestamp(mtime, delta and m),
|
||||
"percentage": format_perc(percentage),
|
||||
"dupe_count": format_dupe_count(dupe_count),
|
||||
}
|
||||
|
||||
def _read_info(self, field):
|
||||
fs.File._read_info(self, field)
|
||||
if field == 'dimensions':
|
||||
if field == "dimensions":
|
||||
self.dimensions = self._plat_get_dimensions()
|
||||
if self._get_orientation() in {5, 6, 7, 8}:
|
||||
self.dimensions = (self.dimensions[1], self.dimensions[0])
|
||||
elif field == 'exif_timestamp':
|
||||
elif field == "exif_timestamp":
|
||||
self.exif_timestamp = self._get_exif_timestamp()
|
||||
|
||||
def get_blocks(self, block_count_per_side):
|
||||
return self._plat_get_blocks(block_count_per_side, self._get_orientation())
|
||||
|
||||
|
@ -8,11 +8,16 @@
|
||||
from hscommon.trans import trget
|
||||
|
||||
from core.prioritize import (
|
||||
KindCategory, FolderCategory, FilenameCategory, NumericalCategory,
|
||||
SizeCategory, MtimeCategory
|
||||
KindCategory,
|
||||
FolderCategory,
|
||||
FilenameCategory,
|
||||
NumericalCategory,
|
||||
SizeCategory,
|
||||
MtimeCategory,
|
||||
)
|
||||
|
||||
coltr = trget('columns')
|
||||
coltr = trget("columns")
|
||||
|
||||
|
||||
class DimensionsCategory(NumericalCategory):
|
||||
NAME = coltr("Dimensions")
|
||||
@ -24,8 +29,13 @@ class DimensionsCategory(NumericalCategory):
|
||||
width, height = value
|
||||
return (-width, -height)
|
||||
|
||||
|
||||
def all_categories():
|
||||
return [
|
||||
KindCategory, FolderCategory, FilenameCategory, SizeCategory, DimensionsCategory,
|
||||
MtimeCategory
|
||||
KindCategory,
|
||||
FolderCategory,
|
||||
FilenameCategory,
|
||||
SizeCategory,
|
||||
DimensionsCategory,
|
||||
MtimeCategory,
|
||||
]
|
||||
|
@ -1,8 +1,8 @@
|
||||
# Created On: 2011-11-27
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from hscommon.gui.column import Column
|
||||
@ -10,19 +10,20 @@ from hscommon.trans import trget
|
||||
|
||||
from core.gui.result_table import ResultTable as ResultTableBase
|
||||
|
||||
coltr = trget('columns')
|
||||
coltr = trget("columns")
|
||||
|
||||
|
||||
class ResultTable(ResultTableBase):
|
||||
COLUMNS = [
|
||||
Column('marked', ''),
|
||||
Column('name', coltr("Filename")),
|
||||
Column('folder_path', coltr("Folder"), optional=True),
|
||||
Column('size', coltr("Size (KB)"), optional=True),
|
||||
Column('extension', coltr("Kind"), visible=False, optional=True),
|
||||
Column('dimensions', coltr("Dimensions"), optional=True),
|
||||
Column('exif_timestamp', coltr("EXIF Timestamp"), visible=False, optional=True),
|
||||
Column('mtime', coltr("Modification"), visible=False, optional=True),
|
||||
Column('percentage', coltr("Match %"), optional=True),
|
||||
Column('dupe_count', coltr("Dupe Count"), visible=False, optional=True),
|
||||
Column("marked", ""),
|
||||
Column("name", coltr("Filename")),
|
||||
Column("folder_path", coltr("Folder"), optional=True),
|
||||
Column("size", coltr("Size (KB)"), optional=True),
|
||||
Column("extension", coltr("Kind"), visible=False, optional=True),
|
||||
Column("dimensions", coltr("Dimensions"), optional=True),
|
||||
Column("exif_timestamp", coltr("EXIF Timestamp"), visible=False, optional=True),
|
||||
Column("mtime", coltr("Modification"), visible=False, optional=True),
|
||||
Column("percentage", coltr("Match %"), optional=True),
|
||||
Column("dupe_count", coltr("Dupe Count"), visible=False, optional=True),
|
||||
]
|
||||
DELTA_COLUMNS = {'size', 'dimensions', 'mtime'}
|
||||
DELTA_COLUMNS = {"size", "dimensions", "mtime"}
|
||||
|
@ -10,6 +10,7 @@ from core.scanner import Scanner, ScanType, ScanOption
|
||||
|
||||
from . import matchblock, matchexif
|
||||
|
||||
|
||||
class ScannerPE(Scanner):
|
||||
cache_path = None
|
||||
match_scaled = False
|
||||
@ -28,10 +29,9 @@ class ScannerPE(Scanner):
|
||||
cache_path=self.cache_path,
|
||||
threshold=self.min_match_percentage,
|
||||
match_scaled=self.match_scaled,
|
||||
j=j
|
||||
j=j,
|
||||
)
|
||||
elif self.scan_type == ScanType.ExifTimestamp:
|
||||
return matchexif.getmatches(files, self.match_scaled, j)
|
||||
else:
|
||||
raise Exception("Invalid scan type")
|
||||
|
||||
|
@ -1,48 +1,50 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2011/09/07
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from hscommon.util import dedupe, flatten, rem_file_ext
|
||||
from hscommon.trans import trget, tr
|
||||
|
||||
coltr = trget('columns')
|
||||
coltr = trget("columns")
|
||||
|
||||
|
||||
class CriterionCategory:
|
||||
NAME = "Undefined"
|
||||
|
||||
|
||||
def __init__(self, results):
|
||||
self.results = results
|
||||
|
||||
#--- Virtual
|
||||
|
||||
# --- Virtual
|
||||
def extract_value(self, dupe):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def format_criterion_value(self, value):
|
||||
return value
|
||||
|
||||
|
||||
def sort_key(self, dupe, crit_value):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def criteria_list(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class Criterion:
|
||||
def __init__(self, category, value):
|
||||
self.category = category
|
||||
self.value = value
|
||||
self.display_value = category.format_criterion_value(value)
|
||||
|
||||
|
||||
def sort_key(self, dupe):
|
||||
return self.category.sort_key(dupe, self.value)
|
||||
|
||||
|
||||
@property
|
||||
def display(self):
|
||||
return "{} ({})".format(self.category.NAME, self.display_value)
|
||||
|
||||
|
||||
|
||||
class ValueListCategory(CriterionCategory):
|
||||
def sort_key(self, dupe, crit_value):
|
||||
@ -52,45 +54,47 @@ class ValueListCategory(CriterionCategory):
|
||||
return 0
|
||||
else:
|
||||
return 1
|
||||
|
||||
|
||||
def criteria_list(self):
|
||||
dupes = flatten(g[:] for g in self.results.groups)
|
||||
values = sorted(dedupe(self.extract_value(d) for d in dupes))
|
||||
return [Criterion(self, value) for value in values]
|
||||
|
||||
|
||||
|
||||
class KindCategory(ValueListCategory):
|
||||
NAME = coltr("Kind")
|
||||
|
||||
|
||||
def extract_value(self, dupe):
|
||||
value = dupe.extension
|
||||
if not value:
|
||||
value = tr("None")
|
||||
return value
|
||||
|
||||
|
||||
class FolderCategory(ValueListCategory):
|
||||
NAME = coltr("Folder")
|
||||
|
||||
|
||||
def extract_value(self, dupe):
|
||||
return dupe.folder_path
|
||||
|
||||
|
||||
def format_criterion_value(self, value):
|
||||
return str(value)
|
||||
|
||||
|
||||
def sort_key(self, dupe, crit_value):
|
||||
value = self.extract_value(dupe)
|
||||
if value[:len(crit_value)] == crit_value:
|
||||
if value[: len(crit_value)] == crit_value:
|
||||
return 0
|
||||
else:
|
||||
return 1
|
||||
|
||||
|
||||
class FilenameCategory(CriterionCategory):
|
||||
NAME = coltr("Filename")
|
||||
ENDS_WITH_NUMBER = 0
|
||||
DOESNT_END_WITH_NUMBER = 1
|
||||
LONGEST = 2
|
||||
SHORTEST = 3
|
||||
|
||||
|
||||
def format_criterion_value(self, value):
|
||||
return {
|
||||
self.ENDS_WITH_NUMBER: tr("Ends with number"),
|
||||
@ -98,10 +102,10 @@ class FilenameCategory(CriterionCategory):
|
||||
self.LONGEST: tr("Longest"),
|
||||
self.SHORTEST: tr("Shortest"),
|
||||
}[value]
|
||||
|
||||
|
||||
def extract_value(self, dupe):
|
||||
return rem_file_ext(dupe.name)
|
||||
|
||||
|
||||
def sort_key(self, dupe, crit_value):
|
||||
value = self.extract_value(dupe)
|
||||
if crit_value in {self.ENDS_WITH_NUMBER, self.DOESNT_END_WITH_NUMBER}:
|
||||
@ -113,50 +117,57 @@ class FilenameCategory(CriterionCategory):
|
||||
else:
|
||||
value = len(value)
|
||||
if crit_value == self.LONGEST:
|
||||
value *= -1 # We want the biggest values on top
|
||||
value *= -1 # We want the biggest values on top
|
||||
return value
|
||||
|
||||
|
||||
def criteria_list(self):
|
||||
return [Criterion(self, crit_value) for crit_value in [
|
||||
self.ENDS_WITH_NUMBER,
|
||||
self.DOESNT_END_WITH_NUMBER,
|
||||
self.LONGEST,
|
||||
self.SHORTEST,
|
||||
]]
|
||||
return [
|
||||
Criterion(self, crit_value)
|
||||
for crit_value in [
|
||||
self.ENDS_WITH_NUMBER,
|
||||
self.DOESNT_END_WITH_NUMBER,
|
||||
self.LONGEST,
|
||||
self.SHORTEST,
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
class NumericalCategory(CriterionCategory):
|
||||
HIGHEST = 0
|
||||
LOWEST = 1
|
||||
|
||||
|
||||
def format_criterion_value(self, value):
|
||||
return tr("Highest") if value == self.HIGHEST else tr("Lowest")
|
||||
|
||||
def invert_numerical_value(self, value): # Virtual
|
||||
|
||||
def invert_numerical_value(self, value): # Virtual
|
||||
return value * -1
|
||||
|
||||
|
||||
def sort_key(self, dupe, crit_value):
|
||||
value = self.extract_value(dupe)
|
||||
if crit_value == self.HIGHEST: # we want highest values on top
|
||||
if crit_value == self.HIGHEST: # we want highest values on top
|
||||
value = self.invert_numerical_value(value)
|
||||
return value
|
||||
|
||||
|
||||
def criteria_list(self):
|
||||
return [Criterion(self, self.HIGHEST), Criterion(self, self.LOWEST)]
|
||||
|
||||
|
||||
|
||||
class SizeCategory(NumericalCategory):
|
||||
NAME = coltr("Size")
|
||||
|
||||
|
||||
def extract_value(self, dupe):
|
||||
return dupe.size
|
||||
|
||||
|
||||
class MtimeCategory(NumericalCategory):
|
||||
NAME = coltr("Modification")
|
||||
|
||||
|
||||
def extract_value(self, dupe):
|
||||
return dupe.mtime
|
||||
|
||||
|
||||
def format_criterion_value(self, value):
|
||||
return tr("Newest") if value == self.HIGHEST else tr("Oldest")
|
||||
|
||||
|
||||
def all_categories():
|
||||
return [KindCategory, FolderCategory, FilenameCategory, SizeCategory, MtimeCategory]
|
||||
|
109
core/results.py
109
core/results.py
@ -20,6 +20,7 @@ from hscommon.trans import tr
|
||||
from . import engine
|
||||
from .markable import Markable
|
||||
|
||||
|
||||
class Results(Markable):
|
||||
"""Manages a collection of duplicate :class:`~core.engine.Group`.
|
||||
|
||||
@ -34,21 +35,22 @@ class Results(Markable):
|
||||
A list of all duplicates (:class:`~core.fs.File` instances), without ref, contained in the
|
||||
currently managed :attr:`groups`.
|
||||
"""
|
||||
#---Override
|
||||
|
||||
# ---Override
|
||||
def __init__(self, app):
|
||||
Markable.__init__(self)
|
||||
self.__groups = []
|
||||
self.__group_of_duplicate = {}
|
||||
self.__groups_sort_descriptor = None # This is a tuple (key, asc)
|
||||
self.__groups_sort_descriptor = None # This is a tuple (key, asc)
|
||||
self.__dupes = None
|
||||
self.__dupes_sort_descriptor = None # This is a tuple (key, asc, delta)
|
||||
self.__dupes_sort_descriptor = None # This is a tuple (key, asc, delta)
|
||||
self.__filters = None
|
||||
self.__filtered_dupes = None
|
||||
self.__filtered_groups = None
|
||||
self.__recalculate_stats()
|
||||
self.__marked_size = 0
|
||||
self.app = app
|
||||
self.problems = [] # (dupe, error_msg)
|
||||
self.problems = [] # (dupe, error_msg)
|
||||
self.is_modified = False
|
||||
|
||||
def _did_mark(self, dupe):
|
||||
@ -90,7 +92,7 @@ class Results(Markable):
|
||||
else:
|
||||
Markable.mark_none(self)
|
||||
|
||||
#---Private
|
||||
# ---Private
|
||||
def __get_dupe_list(self):
|
||||
if self.__dupes is None:
|
||||
self.__dupes = flatten(group.dupes for group in self.groups)
|
||||
@ -98,10 +100,13 @@ class Results(Markable):
|
||||
# This is debug logging to try to figure out #44
|
||||
logging.warning(
|
||||
"There is a None value in the Results' dupe list. dupes: %r groups: %r",
|
||||
self.__dupes, self.groups
|
||||
self.__dupes,
|
||||
self.groups,
|
||||
)
|
||||
if self.__filtered_dupes:
|
||||
self.__dupes = [dupe for dupe in self.__dupes if dupe in self.__filtered_dupes]
|
||||
self.__dupes = [
|
||||
dupe for dupe in self.__dupes if dupe in self.__filtered_dupes
|
||||
]
|
||||
sd = self.__dupes_sort_descriptor
|
||||
if sd:
|
||||
self.sort_dupes(sd[0], sd[1], sd[2])
|
||||
@ -120,10 +125,18 @@ class Results(Markable):
|
||||
total_count = self.__total_count
|
||||
total_size = self.__total_size
|
||||
else:
|
||||
mark_count = len([dupe for dupe in self.__filtered_dupes if self.is_marked(dupe)])
|
||||
marked_size = sum(dupe.size for dupe in self.__filtered_dupes if self.is_marked(dupe))
|
||||
total_count = len([dupe for dupe in self.__filtered_dupes if self.is_markable(dupe)])
|
||||
total_size = sum(dupe.size for dupe in self.__filtered_dupes if self.is_markable(dupe))
|
||||
mark_count = len(
|
||||
[dupe for dupe in self.__filtered_dupes if self.is_marked(dupe)]
|
||||
)
|
||||
marked_size = sum(
|
||||
dupe.size for dupe in self.__filtered_dupes if self.is_marked(dupe)
|
||||
)
|
||||
total_count = len(
|
||||
[dupe for dupe in self.__filtered_dupes if self.is_markable(dupe)]
|
||||
)
|
||||
total_size = sum(
|
||||
dupe.size for dupe in self.__filtered_dupes if self.is_markable(dupe)
|
||||
)
|
||||
if self.mark_inverted:
|
||||
marked_size = self.__total_size - marked_size
|
||||
result = tr("%d / %d (%s / %s) duplicates marked.") % (
|
||||
@ -133,7 +146,7 @@ class Results(Markable):
|
||||
format_size(total_size, 2),
|
||||
)
|
||||
if self.__filters:
|
||||
result += tr(" filter: %s") % ' --> '.join(self.__filters)
|
||||
result += tr(" filter: %s") % " --> ".join(self.__filters)
|
||||
return result
|
||||
|
||||
def __recalculate_stats(self):
|
||||
@ -151,7 +164,7 @@ class Results(Markable):
|
||||
for g in self.__groups:
|
||||
for dupe in g:
|
||||
self.__group_of_duplicate[dupe] = g
|
||||
if not hasattr(dupe, 'is_ref'):
|
||||
if not hasattr(dupe, "is_ref"):
|
||||
dupe.is_ref = False
|
||||
self.is_modified = bool(self.__groups)
|
||||
old_filters = nonone(self.__filters, [])
|
||||
@ -159,7 +172,7 @@ class Results(Markable):
|
||||
for filter_str in old_filters:
|
||||
self.apply_filter(filter_str)
|
||||
|
||||
#---Public
|
||||
# ---Public
|
||||
def apply_filter(self, filter_str):
|
||||
"""Applies a filter ``filter_str`` to :attr:`groups`
|
||||
|
||||
@ -182,11 +195,15 @@ class Results(Markable):
|
||||
try:
|
||||
filter_re = re.compile(filter_str, re.IGNORECASE)
|
||||
except re.error:
|
||||
return # don't apply this filter.
|
||||
return # don't apply this filter.
|
||||
self.__filters.append(filter_str)
|
||||
if self.__filtered_dupes is None:
|
||||
self.__filtered_dupes = flatten(g[:] for g in self.groups)
|
||||
self.__filtered_dupes = set(dupe for dupe in self.__filtered_dupes if filter_re.search(str(dupe.path)))
|
||||
self.__filtered_dupes = set(
|
||||
dupe
|
||||
for dupe in self.__filtered_dupes
|
||||
if filter_re.search(str(dupe.path))
|
||||
)
|
||||
filtered_groups = set()
|
||||
for dupe in self.__filtered_dupes:
|
||||
filtered_groups.add(self.get_group_of_duplicate(dupe))
|
||||
@ -214,6 +231,7 @@ class Results(Markable):
|
||||
:param get_file: a function f(path) returning a :class:`~core.fs.File` wrapping the path.
|
||||
:param j: A :ref:`job progress instance <jobs>`.
|
||||
"""
|
||||
|
||||
def do_match(ref_file, other_files, group):
|
||||
if not other_files:
|
||||
return
|
||||
@ -223,31 +241,31 @@ class Results(Markable):
|
||||
|
||||
self.apply_filter(None)
|
||||
root = ET.parse(infile).getroot()
|
||||
group_elems = list(root.getiterator('group'))
|
||||
group_elems = list(root.getiterator("group"))
|
||||
groups = []
|
||||
marked = set()
|
||||
for group_elem in j.iter_with_progress(group_elems, every=100):
|
||||
group = engine.Group()
|
||||
dupes = []
|
||||
for file_elem in group_elem.getiterator('file'):
|
||||
path = file_elem.get('path')
|
||||
words = file_elem.get('words', '')
|
||||
for file_elem in group_elem.getiterator("file"):
|
||||
path = file_elem.get("path")
|
||||
words = file_elem.get("words", "")
|
||||
if not path:
|
||||
continue
|
||||
file = get_file(path)
|
||||
if file is None:
|
||||
continue
|
||||
file.words = words.split(',')
|
||||
file.is_ref = file_elem.get('is_ref') == 'y'
|
||||
file.words = words.split(",")
|
||||
file.is_ref = file_elem.get("is_ref") == "y"
|
||||
dupes.append(file)
|
||||
if file_elem.get('marked') == 'y':
|
||||
if file_elem.get("marked") == "y":
|
||||
marked.add(file)
|
||||
for match_elem in group_elem.getiterator('match'):
|
||||
for match_elem in group_elem.getiterator("match"):
|
||||
try:
|
||||
attrs = match_elem.attrib
|
||||
first_file = dupes[int(attrs['first'])]
|
||||
second_file = dupes[int(attrs['second'])]
|
||||
percentage = int(attrs['percentage'])
|
||||
first_file = dupes[int(attrs["first"])]
|
||||
second_file = dupes[int(attrs["second"])]
|
||||
percentage = int(attrs["percentage"])
|
||||
group.add_match(engine.Match(first_file, second_file, percentage))
|
||||
except (IndexError, KeyError, ValueError):
|
||||
# Covers missing attr, non-int values and indexes out of bounds
|
||||
@ -339,9 +357,9 @@ class Results(Markable):
|
||||
:param outfile: file object or path.
|
||||
"""
|
||||
self.apply_filter(None)
|
||||
root = ET.Element('results')
|
||||
root = ET.Element("results")
|
||||
for g in self.groups:
|
||||
group_elem = ET.SubElement(root, 'group')
|
||||
group_elem = ET.SubElement(root, "group")
|
||||
dupe2index = {}
|
||||
for index, d in enumerate(g):
|
||||
dupe2index[d] = index
|
||||
@ -349,24 +367,24 @@ class Results(Markable):
|
||||
words = engine.unpack_fields(d.words)
|
||||
except AttributeError:
|
||||
words = ()
|
||||
file_elem = ET.SubElement(group_elem, 'file')
|
||||
file_elem = ET.SubElement(group_elem, "file")
|
||||
try:
|
||||
file_elem.set('path', str(d.path))
|
||||
file_elem.set('words', ','.join(words))
|
||||
except ValueError: # If there's an invalid character, just skip the file
|
||||
file_elem.set('path', '')
|
||||
file_elem.set('is_ref', ('y' if d.is_ref else 'n'))
|
||||
file_elem.set('marked', ('y' if self.is_marked(d) else 'n'))
|
||||
file_elem.set("path", str(d.path))
|
||||
file_elem.set("words", ",".join(words))
|
||||
except ValueError: # If there's an invalid character, just skip the file
|
||||
file_elem.set("path", "")
|
||||
file_elem.set("is_ref", ("y" if d.is_ref else "n"))
|
||||
file_elem.set("marked", ("y" if self.is_marked(d) else "n"))
|
||||
for match in g.matches:
|
||||
match_elem = ET.SubElement(group_elem, 'match')
|
||||
match_elem.set('first', str(dupe2index[match.first]))
|
||||
match_elem.set('second', str(dupe2index[match.second]))
|
||||
match_elem.set('percentage', str(int(match.percentage)))
|
||||
match_elem = ET.SubElement(group_elem, "match")
|
||||
match_elem.set("first", str(dupe2index[match.first]))
|
||||
match_elem.set("second", str(dupe2index[match.second]))
|
||||
match_elem.set("percentage", str(int(match.percentage)))
|
||||
tree = ET.ElementTree(root)
|
||||
|
||||
def do_write(outfile):
|
||||
with FileOrPath(outfile, 'wb') as fp:
|
||||
tree.write(fp, encoding='utf-8')
|
||||
with FileOrPath(outfile, "wb") as fp:
|
||||
tree.write(fp, encoding="utf-8")
|
||||
|
||||
try:
|
||||
do_write(outfile)
|
||||
@ -392,7 +410,9 @@ class Results(Markable):
|
||||
"""
|
||||
if not self.__dupes:
|
||||
self.__get_dupe_list()
|
||||
keyfunc = lambda d: self.app._get_dupe_sort_key(d, lambda: self.get_group_of_duplicate(d), key, delta)
|
||||
keyfunc = lambda d: self.app._get_dupe_sort_key(
|
||||
d, lambda: self.get_group_of_duplicate(d), key, delta
|
||||
)
|
||||
self.__dupes.sort(key=keyfunc, reverse=not asc)
|
||||
self.__dupes_sort_descriptor = (key, asc, delta)
|
||||
|
||||
@ -408,8 +428,7 @@ class Results(Markable):
|
||||
self.groups.sort(key=keyfunc, reverse=not asc)
|
||||
self.__groups_sort_descriptor = (key, asc)
|
||||
|
||||
#---Properties
|
||||
# ---Properties
|
||||
dupes = property(__get_dupe_list)
|
||||
groups = property(__get_groups, __set_groups)
|
||||
stat_line = property(__get_stat_line)
|
||||
|
||||
|
@ -19,6 +19,7 @@ from . import engine
|
||||
# there will be some nasty bugs popping up (ScanType is used in core when in should exclusively be
|
||||
# used in core_*). One day I'll clean this up.
|
||||
|
||||
|
||||
class ScanType:
|
||||
Filename = 0
|
||||
Fields = 1
|
||||
@ -27,23 +28,26 @@ class ScanType:
|
||||
Folders = 4
|
||||
Contents = 5
|
||||
|
||||
#PE
|
||||
# PE
|
||||
FuzzyBlock = 10
|
||||
ExifTimestamp = 11
|
||||
|
||||
ScanOption = namedtuple('ScanOption', 'scan_type label')
|
||||
|
||||
SCANNABLE_TAGS = ['track', 'artist', 'album', 'title', 'genre', 'year']
|
||||
ScanOption = namedtuple("ScanOption", "scan_type label")
|
||||
|
||||
SCANNABLE_TAGS = ["track", "artist", "album", "title", "genre", "year"]
|
||||
|
||||
RE_DIGIT_ENDING = re.compile(r"\d+|\(\d+\)|\[\d+\]|{\d+}")
|
||||
|
||||
RE_DIGIT_ENDING = re.compile(r'\d+|\(\d+\)|\[\d+\]|{\d+}')
|
||||
|
||||
def is_same_with_digit(name, refname):
|
||||
# Returns True if name is the same as refname, but with digits (with brackets or not) at the end
|
||||
if not name.startswith(refname):
|
||||
return False
|
||||
end = name[len(refname):].strip()
|
||||
end = name[len(refname) :].strip()
|
||||
return RE_DIGIT_ENDING.match(end) is not None
|
||||
|
||||
|
||||
def remove_dupe_paths(files):
|
||||
# Returns files with duplicates-by-path removed. Files with the exact same path are considered
|
||||
# duplicates and only the first file to have a path is kept. In certain cases, we have files
|
||||
@ -57,25 +61,29 @@ def remove_dupe_paths(files):
|
||||
if normalized in path2file:
|
||||
try:
|
||||
if op.samefile(normalized, str(path2file[normalized].path)):
|
||||
continue # same file, it's a dupe
|
||||
continue # same file, it's a dupe
|
||||
else:
|
||||
pass # We don't treat them as dupes
|
||||
pass # We don't treat them as dupes
|
||||
except OSError:
|
||||
continue # File doesn't exist? Well, treat them as dupes
|
||||
continue # File doesn't exist? Well, treat them as dupes
|
||||
else:
|
||||
path2file[normalized] = f
|
||||
result.append(f)
|
||||
return result
|
||||
|
||||
|
||||
class Scanner:
|
||||
def __init__(self):
|
||||
self.discarded_file_count = 0
|
||||
|
||||
def _getmatches(self, files, j):
|
||||
if self.size_threshold or self.scan_type in {ScanType.Contents, ScanType.Folders}:
|
||||
if self.size_threshold or self.scan_type in {
|
||||
ScanType.Contents,
|
||||
ScanType.Folders,
|
||||
}:
|
||||
j = j.start_subjob([2, 8])
|
||||
for f in j.iter_with_progress(files, tr("Read size of %d/%d files")):
|
||||
f.size # pre-read, makes a smoother progress if read here (especially for bundles)
|
||||
f.size # pre-read, makes a smoother progress if read here (especially for bundles)
|
||||
if self.size_threshold:
|
||||
files = [f for f in files if f.size >= self.size_threshold]
|
||||
if self.scan_type in {ScanType.Contents, ScanType.Folders}:
|
||||
@ -83,12 +91,12 @@ class Scanner:
|
||||
else:
|
||||
j = j.start_subjob([2, 8])
|
||||
kw = {}
|
||||
kw['match_similar_words'] = self.match_similar_words
|
||||
kw['weight_words'] = self.word_weighting
|
||||
kw['min_match_percentage'] = self.min_match_percentage
|
||||
kw["match_similar_words"] = self.match_similar_words
|
||||
kw["weight_words"] = self.word_weighting
|
||||
kw["min_match_percentage"] = self.min_match_percentage
|
||||
if self.scan_type == ScanType.FieldsNoOrder:
|
||||
self.scan_type = ScanType.Fields
|
||||
kw['no_field_order'] = True
|
||||
kw["no_field_order"] = True
|
||||
func = {
|
||||
ScanType.Filename: lambda f: engine.getwords(rem_file_ext(f.name)),
|
||||
ScanType.Fields: lambda f: engine.getfields(rem_file_ext(f.name)),
|
||||
@ -111,9 +119,9 @@ class Scanner:
|
||||
def _tie_breaker(ref, dupe):
|
||||
refname = rem_file_ext(ref.name).lower()
|
||||
dupename = rem_file_ext(dupe.name).lower()
|
||||
if 'copy' in dupename:
|
||||
if "copy" in dupename:
|
||||
return False
|
||||
if 'copy' in refname:
|
||||
if "copy" in refname:
|
||||
return True
|
||||
if is_same_with_digit(dupename, refname):
|
||||
return False
|
||||
@ -130,12 +138,12 @@ class Scanner:
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_dupe_groups(self, files, ignore_list=None, j=job.nulljob):
|
||||
for f in (f for f in files if not hasattr(f, 'is_ref')):
|
||||
for f in (f for f in files if not hasattr(f, "is_ref")):
|
||||
f.is_ref = False
|
||||
files = remove_dupe_paths(files)
|
||||
logging.info("Getting matches. Scan type: %d", self.scan_type)
|
||||
matches = self._getmatches(files, j)
|
||||
logging.info('Found %d matches' % len(matches))
|
||||
logging.info("Found %d matches" % len(matches))
|
||||
j.set_progress(100, tr("Almost done! Fiddling with results..."))
|
||||
# In removing what we call here "false matches", we first want to remove, if we scan by
|
||||
# folders, we want to remove folder matches for which the parent is also in a match (they're
|
||||
@ -153,20 +161,38 @@ class Scanner:
|
||||
toremove.add(p)
|
||||
else:
|
||||
last_parent_path = p
|
||||
matches = [m for m in matches if m.first.path not in toremove or m.second.path not in toremove]
|
||||
matches = [
|
||||
m
|
||||
for m in matches
|
||||
if m.first.path not in toremove or m.second.path not in toremove
|
||||
]
|
||||
if not self.mix_file_kind:
|
||||
matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)]
|
||||
matches = [m for m in matches if m.first.path.exists() and m.second.path.exists()]
|
||||
matches = [
|
||||
m
|
||||
for m in matches
|
||||
if get_file_ext(m.first.name) == get_file_ext(m.second.name)
|
||||
]
|
||||
matches = [
|
||||
m for m in matches if m.first.path.exists() and m.second.path.exists()
|
||||
]
|
||||
matches = [m for m in matches if not (m.first.is_ref and m.second.is_ref)]
|
||||
if ignore_list:
|
||||
matches = [
|
||||
m for m in matches
|
||||
m
|
||||
for m in matches
|
||||
if not ignore_list.AreIgnored(str(m.first.path), str(m.second.path))
|
||||
]
|
||||
logging.info('Grouping matches')
|
||||
logging.info("Grouping matches")
|
||||
groups = engine.get_groups(matches)
|
||||
if self.scan_type in {ScanType.Filename, ScanType.Fields, ScanType.FieldsNoOrder, ScanType.Tag}:
|
||||
matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
|
||||
if self.scan_type in {
|
||||
ScanType.Filename,
|
||||
ScanType.Fields,
|
||||
ScanType.FieldsNoOrder,
|
||||
ScanType.Tag,
|
||||
}:
|
||||
matched_files = dedupe(
|
||||
[m.first for m in matches] + [m.second for m in matches]
|
||||
)
|
||||
self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups)
|
||||
else:
|
||||
# Ticket #195
|
||||
@ -181,7 +207,7 @@ class Scanner:
|
||||
# reporting discarded matches.
|
||||
self.discarded_file_count = 0
|
||||
groups = [g for g in groups if any(not f.is_ref for f in g)]
|
||||
logging.info('Created %d groups' % len(groups))
|
||||
logging.info("Created %d groups" % len(groups))
|
||||
for g in groups:
|
||||
g.prioritize(self._key_func, self._tie_breaker)
|
||||
return groups
|
||||
@ -190,7 +216,6 @@ class Scanner:
|
||||
min_match_percentage = 80
|
||||
mix_file_kind = True
|
||||
scan_type = ScanType.Filename
|
||||
scanned_tags = {'artist', 'title'}
|
||||
scanned_tags = {"artist", "title"}
|
||||
size_threshold = 0
|
||||
word_weighting = False
|
||||
|
||||
|
@ -1 +1 @@
|
||||
from . import fs, result_table, scanner # noqa
|
||||
from . import fs, result_table, scanner # noqa
|
||||
|
@ -11,6 +11,7 @@ from hscommon.util import format_size
|
||||
from core import fs
|
||||
from core.util import format_timestamp, format_perc, format_words, format_dupe_count
|
||||
|
||||
|
||||
def get_display_info(dupe, group, delta):
|
||||
size = dupe.size
|
||||
mtime = dupe.mtime
|
||||
@ -26,16 +27,17 @@ def get_display_info(dupe, group, delta):
|
||||
percentage = group.percentage
|
||||
dupe_count = len(group.dupes)
|
||||
return {
|
||||
'name': dupe.name,
|
||||
'folder_path': str(dupe.folder_path),
|
||||
'size': format_size(size, 0, 1, False),
|
||||
'extension': dupe.extension,
|
||||
'mtime': format_timestamp(mtime, delta and m),
|
||||
'percentage': format_perc(percentage),
|
||||
'words': format_words(dupe.words) if hasattr(dupe, 'words') else '',
|
||||
'dupe_count': format_dupe_count(dupe_count),
|
||||
"name": dupe.name,
|
||||
"folder_path": str(dupe.folder_path),
|
||||
"size": format_size(size, 0, 1, False),
|
||||
"extension": dupe.extension,
|
||||
"mtime": format_timestamp(mtime, delta and m),
|
||||
"percentage": format_perc(percentage),
|
||||
"words": format_words(dupe.words) if hasattr(dupe, "words") else "",
|
||||
"dupe_count": format_dupe_count(dupe_count),
|
||||
}
|
||||
|
||||
|
||||
class File(fs.File):
|
||||
def get_display_info(self, group, delta):
|
||||
return get_display_info(self, group, delta)
|
||||
@ -44,4 +46,3 @@ class File(fs.File):
|
||||
class Folder(fs.Folder):
|
||||
def get_display_info(self, group, delta):
|
||||
return get_display_info(self, group, delta)
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
# Created On: 2011-11-27
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from hscommon.gui.column import Column
|
||||
@ -10,18 +10,19 @@ from hscommon.trans import trget
|
||||
|
||||
from core.gui.result_table import ResultTable as ResultTableBase
|
||||
|
||||
coltr = trget('columns')
|
||||
coltr = trget("columns")
|
||||
|
||||
|
||||
class ResultTable(ResultTableBase):
|
||||
COLUMNS = [
|
||||
Column('marked', ''),
|
||||
Column('name', coltr("Filename")),
|
||||
Column('folder_path', coltr("Folder"), optional=True),
|
||||
Column('size', coltr("Size (KB)"), optional=True),
|
||||
Column('extension', coltr("Kind"), visible=False, optional=True),
|
||||
Column('mtime', coltr("Modification"), visible=False, optional=True),
|
||||
Column('percentage', coltr("Match %"), optional=True),
|
||||
Column('words', coltr("Words Used"), visible=False, optional=True),
|
||||
Column('dupe_count', coltr("Dupe Count"), visible=False, optional=True),
|
||||
Column("marked", ""),
|
||||
Column("name", coltr("Filename")),
|
||||
Column("folder_path", coltr("Folder"), optional=True),
|
||||
Column("size", coltr("Size (KB)"), optional=True),
|
||||
Column("extension", coltr("Kind"), visible=False, optional=True),
|
||||
Column("mtime", coltr("Modification"), visible=False, optional=True),
|
||||
Column("percentage", coltr("Match %"), optional=True),
|
||||
Column("words", coltr("Words Used"), visible=False, optional=True),
|
||||
Column("dupe_count", coltr("Dupe Count"), visible=False, optional=True),
|
||||
]
|
||||
DELTA_COLUMNS = {'size', 'mtime'}
|
||||
DELTA_COLUMNS = {"size", "mtime"}
|
||||
|
@ -8,6 +8,7 @@ from hscommon.trans import tr
|
||||
|
||||
from core.scanner import Scanner as ScannerBase, ScanOption, ScanType
|
||||
|
||||
|
||||
class ScannerSE(ScannerBase):
|
||||
@staticmethod
|
||||
def get_scan_options():
|
||||
@ -16,4 +17,3 @@ class ScannerSE(ScannerBase):
|
||||
ScanOption(ScanType.Contents, tr("Contents")),
|
||||
ScanOption(ScanType.Folders, tr("Folders")),
|
||||
]
|
||||
|
||||
|
@ -20,93 +20,106 @@ from .results_test import GetTestGroups
|
||||
from .. import app, fs, engine
|
||||
from ..scanner import ScanType
|
||||
|
||||
|
||||
def add_fake_files_to_directories(directories, files):
|
||||
directories.get_files = lambda j=None: iter(files)
|
||||
directories._dirs.append('this is just so Scan() doesnt return 3')
|
||||
directories._dirs.append("this is just so Scan() doesnt return 3")
|
||||
|
||||
|
||||
class TestCaseDupeGuru:
|
||||
def test_apply_filter_calls_results_apply_filter(self, monkeypatch):
|
||||
dgapp = TestApp().app
|
||||
monkeypatch.setattr(dgapp.results, 'apply_filter', log_calls(dgapp.results.apply_filter))
|
||||
dgapp.apply_filter('foo')
|
||||
monkeypatch.setattr(
|
||||
dgapp.results, "apply_filter", log_calls(dgapp.results.apply_filter)
|
||||
)
|
||||
dgapp.apply_filter("foo")
|
||||
eq_(2, len(dgapp.results.apply_filter.calls))
|
||||
call = dgapp.results.apply_filter.calls[0]
|
||||
assert call['filter_str'] is None
|
||||
assert call["filter_str"] is None
|
||||
call = dgapp.results.apply_filter.calls[1]
|
||||
eq_('foo', call['filter_str'])
|
||||
eq_("foo", call["filter_str"])
|
||||
|
||||
def test_apply_filter_escapes_regexp(self, monkeypatch):
|
||||
dgapp = TestApp().app
|
||||
monkeypatch.setattr(dgapp.results, 'apply_filter', log_calls(dgapp.results.apply_filter))
|
||||
dgapp.apply_filter('()[]\\.|+?^abc')
|
||||
monkeypatch.setattr(
|
||||
dgapp.results, "apply_filter", log_calls(dgapp.results.apply_filter)
|
||||
)
|
||||
dgapp.apply_filter("()[]\\.|+?^abc")
|
||||
call = dgapp.results.apply_filter.calls[1]
|
||||
eq_('\\(\\)\\[\\]\\\\\\.\\|\\+\\?\\^abc', call['filter_str'])
|
||||
dgapp.apply_filter('(*)') # In "simple mode", we want the * to behave as a wilcard
|
||||
eq_("\\(\\)\\[\\]\\\\\\.\\|\\+\\?\\^abc", call["filter_str"])
|
||||
dgapp.apply_filter(
|
||||
"(*)"
|
||||
) # In "simple mode", we want the * to behave as a wilcard
|
||||
call = dgapp.results.apply_filter.calls[3]
|
||||
eq_(r'\(.*\)', call['filter_str'])
|
||||
dgapp.options['escape_filter_regexp'] = False
|
||||
dgapp.apply_filter('(abc)')
|
||||
eq_(r"\(.*\)", call["filter_str"])
|
||||
dgapp.options["escape_filter_regexp"] = False
|
||||
dgapp.apply_filter("(abc)")
|
||||
call = dgapp.results.apply_filter.calls[5]
|
||||
eq_('(abc)', call['filter_str'])
|
||||
eq_("(abc)", call["filter_str"])
|
||||
|
||||
def test_copy_or_move(self, tmpdir, monkeypatch):
|
||||
# The goal here is just to have a test for a previous blowup I had. I know my test coverage
|
||||
# for this unit is pathetic. What's done is done. My approach now is to add tests for
|
||||
# every change I want to make. The blowup was caused by a missing import.
|
||||
p = Path(str(tmpdir))
|
||||
p['foo'].open('w').close()
|
||||
monkeypatch.setattr(hscommon.conflict, 'smart_copy', log_calls(lambda source_path, dest_path: None))
|
||||
p["foo"].open("w").close()
|
||||
monkeypatch.setattr(
|
||||
hscommon.conflict,
|
||||
"smart_copy",
|
||||
log_calls(lambda source_path, dest_path: None),
|
||||
)
|
||||
# XXX This monkeypatch is temporary. will be fixed in a better monkeypatcher.
|
||||
monkeypatch.setattr(app, 'smart_copy', hscommon.conflict.smart_copy)
|
||||
monkeypatch.setattr(os, 'makedirs', lambda path: None) # We don't want the test to create that fake directory
|
||||
monkeypatch.setattr(app, "smart_copy", hscommon.conflict.smart_copy)
|
||||
monkeypatch.setattr(
|
||||
os, "makedirs", lambda path: None
|
||||
) # We don't want the test to create that fake directory
|
||||
dgapp = TestApp().app
|
||||
dgapp.directories.add_path(p)
|
||||
[f] = dgapp.directories.get_files()
|
||||
dgapp.copy_or_move(f, True, 'some_destination', 0)
|
||||
dgapp.copy_or_move(f, True, "some_destination", 0)
|
||||
eq_(1, len(hscommon.conflict.smart_copy.calls))
|
||||
call = hscommon.conflict.smart_copy.calls[0]
|
||||
eq_(call['dest_path'], op.join('some_destination', 'foo'))
|
||||
eq_(call['source_path'], f.path)
|
||||
eq_(call["dest_path"], op.join("some_destination", "foo"))
|
||||
eq_(call["source_path"], f.path)
|
||||
|
||||
def test_copy_or_move_clean_empty_dirs(self, tmpdir, monkeypatch):
|
||||
tmppath = Path(str(tmpdir))
|
||||
sourcepath = tmppath['source']
|
||||
sourcepath = tmppath["source"]
|
||||
sourcepath.mkdir()
|
||||
sourcepath['myfile'].open('w')
|
||||
sourcepath["myfile"].open("w")
|
||||
app = TestApp().app
|
||||
app.directories.add_path(tmppath)
|
||||
[myfile] = app.directories.get_files()
|
||||
monkeypatch.setattr(app, 'clean_empty_dirs', log_calls(lambda path: None))
|
||||
app.copy_or_move(myfile, False, tmppath['dest'], 0)
|
||||
monkeypatch.setattr(app, "clean_empty_dirs", log_calls(lambda path: None))
|
||||
app.copy_or_move(myfile, False, tmppath["dest"], 0)
|
||||
calls = app.clean_empty_dirs.calls
|
||||
eq_(1, len(calls))
|
||||
eq_(sourcepath, calls[0]['path'])
|
||||
eq_(sourcepath, calls[0]["path"])
|
||||
|
||||
def test_Scan_with_objects_evaluating_to_false(self):
|
||||
class FakeFile(fs.File):
|
||||
def __bool__(self):
|
||||
return False
|
||||
|
||||
|
||||
# At some point, any() was used in a wrong way that made Scan() wrongly return 1
|
||||
app = TestApp().app
|
||||
f1, f2 = [FakeFile('foo') for i in range(2)]
|
||||
f1, f2 = [FakeFile("foo") for i in range(2)]
|
||||
f1.is_ref, f2.is_ref = (False, False)
|
||||
assert not (bool(f1) and bool(f2))
|
||||
add_fake_files_to_directories(app.directories, [f1, f2])
|
||||
app.start_scanning() # no exception
|
||||
app.start_scanning() # no exception
|
||||
|
||||
@mark.skipif("not hasattr(os, 'link')")
|
||||
def test_ignore_hardlink_matches(self, tmpdir):
|
||||
# If the ignore_hardlink_matches option is set, don't match files hardlinking to the same
|
||||
# inode.
|
||||
tmppath = Path(str(tmpdir))
|
||||
tmppath['myfile'].open('w').write('foo')
|
||||
os.link(str(tmppath['myfile']), str(tmppath['hardlink']))
|
||||
tmppath["myfile"].open("w").write("foo")
|
||||
os.link(str(tmppath["myfile"]), str(tmppath["hardlink"]))
|
||||
app = TestApp().app
|
||||
app.directories.add_path(tmppath)
|
||||
app.options['scan_type'] = ScanType.Contents
|
||||
app.options['ignore_hardlink_matches'] = True
|
||||
app.options["scan_type"] = ScanType.Contents
|
||||
app.options["ignore_hardlink_matches"] = True
|
||||
app.start_scanning()
|
||||
eq_(len(app.results.groups), 0)
|
||||
|
||||
@ -116,27 +129,32 @@ class TestCaseDupeGuru:
|
||||
# making the selected row None. Don't crash when it happens.
|
||||
dgapp = TestApp().app
|
||||
# selected_row is None because there's no result.
|
||||
assert not dgapp.result_table.rename_selected('foo') # no crash
|
||||
assert not dgapp.result_table.rename_selected("foo") # no crash
|
||||
|
||||
|
||||
class TestCaseDupeGuru_clean_empty_dirs:
|
||||
def pytest_funcarg__do_setup(self, request):
|
||||
monkeypatch = request.getfuncargvalue('monkeypatch')
|
||||
monkeypatch.setattr(hscommon.util, 'delete_if_empty', log_calls(lambda path, files_to_delete=[]: None))
|
||||
monkeypatch = request.getfuncargvalue("monkeypatch")
|
||||
monkeypatch.setattr(
|
||||
hscommon.util,
|
||||
"delete_if_empty",
|
||||
log_calls(lambda path, files_to_delete=[]: None),
|
||||
)
|
||||
# XXX This monkeypatch is temporary. will be fixed in a better monkeypatcher.
|
||||
monkeypatch.setattr(app, 'delete_if_empty', hscommon.util.delete_if_empty)
|
||||
monkeypatch.setattr(app, "delete_if_empty", hscommon.util.delete_if_empty)
|
||||
self.app = TestApp().app
|
||||
|
||||
def test_option_off(self, do_setup):
|
||||
self.app.clean_empty_dirs(Path('/foo/bar'))
|
||||
self.app.clean_empty_dirs(Path("/foo/bar"))
|
||||
eq_(0, len(hscommon.util.delete_if_empty.calls))
|
||||
|
||||
def test_option_on(self, do_setup):
|
||||
self.app.options['clean_empty_dirs'] = True
|
||||
self.app.clean_empty_dirs(Path('/foo/bar'))
|
||||
self.app.options["clean_empty_dirs"] = True
|
||||
self.app.clean_empty_dirs(Path("/foo/bar"))
|
||||
calls = hscommon.util.delete_if_empty.calls
|
||||
eq_(1, len(calls))
|
||||
eq_(Path('/foo/bar'), calls[0]['path'])
|
||||
eq_(['.DS_Store'], calls[0]['files_to_delete'])
|
||||
eq_(Path("/foo/bar"), calls[0]["path"])
|
||||
eq_([".DS_Store"], calls[0]["files_to_delete"])
|
||||
|
||||
def test_recurse_up(self, do_setup, monkeypatch):
|
||||
# delete_if_empty must be recursively called up in the path until it returns False
|
||||
@ -144,16 +162,16 @@ class TestCaseDupeGuru_clean_empty_dirs:
|
||||
def mock_delete_if_empty(path, files_to_delete=[]):
|
||||
return len(path) > 1
|
||||
|
||||
monkeypatch.setattr(hscommon.util, 'delete_if_empty', mock_delete_if_empty)
|
||||
monkeypatch.setattr(hscommon.util, "delete_if_empty", mock_delete_if_empty)
|
||||
# XXX This monkeypatch is temporary. will be fixed in a better monkeypatcher.
|
||||
monkeypatch.setattr(app, 'delete_if_empty', mock_delete_if_empty)
|
||||
self.app.options['clean_empty_dirs'] = True
|
||||
self.app.clean_empty_dirs(Path('not-empty/empty/empty'))
|
||||
monkeypatch.setattr(app, "delete_if_empty", mock_delete_if_empty)
|
||||
self.app.options["clean_empty_dirs"] = True
|
||||
self.app.clean_empty_dirs(Path("not-empty/empty/empty"))
|
||||
calls = hscommon.util.delete_if_empty.calls
|
||||
eq_(3, len(calls))
|
||||
eq_(Path('not-empty/empty/empty'), calls[0]['path'])
|
||||
eq_(Path('not-empty/empty'), calls[1]['path'])
|
||||
eq_(Path('not-empty'), calls[2]['path'])
|
||||
eq_(Path("not-empty/empty/empty"), calls[0]["path"])
|
||||
eq_(Path("not-empty/empty"), calls[1]["path"])
|
||||
eq_(Path("not-empty"), calls[2]["path"])
|
||||
|
||||
|
||||
class TestCaseDupeGuruWithResults:
|
||||
@ -166,10 +184,10 @@ class TestCaseDupeGuruWithResults:
|
||||
self.dtree = app.dtree
|
||||
self.rtable = app.rtable
|
||||
self.rtable.refresh()
|
||||
tmpdir = request.getfuncargvalue('tmpdir')
|
||||
tmpdir = request.getfuncargvalue("tmpdir")
|
||||
tmppath = Path(str(tmpdir))
|
||||
tmppath['foo'].mkdir()
|
||||
tmppath['bar'].mkdir()
|
||||
tmppath["foo"].mkdir()
|
||||
tmppath["bar"].mkdir()
|
||||
self.app.directories.add_path(tmppath)
|
||||
|
||||
def test_GetObjects(self, do_setup):
|
||||
@ -187,8 +205,8 @@ class TestCaseDupeGuruWithResults:
|
||||
|
||||
def test_GetObjects_after_sort(self, do_setup):
|
||||
objects = self.objects
|
||||
groups = self.groups[:] # we need an un-sorted reference
|
||||
self.rtable.sort('name', False)
|
||||
groups = self.groups[:] # we need an un-sorted reference
|
||||
self.rtable.sort("name", False)
|
||||
r = self.rtable[1]
|
||||
assert r._group is groups[1]
|
||||
assert r._dupe is objects[4]
|
||||
@ -198,7 +216,7 @@ class TestCaseDupeGuruWithResults:
|
||||
self.rtable.select([1, 2, 3])
|
||||
self.app.remove_selected()
|
||||
# The first 2 dupes have been removed. The 3rd one is a ref. it stays there, in first pos.
|
||||
eq_(self.rtable.selected_indexes, [1]) # no exception
|
||||
eq_(self.rtable.selected_indexes, [1]) # no exception
|
||||
|
||||
def test_selectResultNodePaths(self, do_setup):
|
||||
app = self.app
|
||||
@ -220,9 +238,9 @@ class TestCaseDupeGuruWithResults:
|
||||
def test_selectResultNodePaths_after_sort(self, do_setup):
|
||||
app = self.app
|
||||
objects = self.objects
|
||||
groups = self.groups[:] #To keep the old order in memory
|
||||
self.rtable.sort('name', False) #0
|
||||
#Now, the group order is supposed to be reversed
|
||||
groups = self.groups[:] # To keep the old order in memory
|
||||
self.rtable.sort("name", False) # 0
|
||||
# Now, the group order is supposed to be reversed
|
||||
self.rtable.select([1, 2, 3])
|
||||
eq_(len(app.selected_dupes), 3)
|
||||
assert app.selected_dupes[0] is objects[4]
|
||||
@ -242,13 +260,13 @@ class TestCaseDupeGuruWithResults:
|
||||
self.rtable.power_marker = True
|
||||
self.rtable.select([0, 1, 2])
|
||||
app.remove_selected()
|
||||
eq_(self.rtable.selected_indexes, []) # no exception
|
||||
eq_(self.rtable.selected_indexes, []) # no exception
|
||||
|
||||
def test_selectPowerMarkerRows_after_sort(self, do_setup):
|
||||
app = self.app
|
||||
objects = self.objects
|
||||
self.rtable.power_marker = True
|
||||
self.rtable.sort('name', False)
|
||||
self.rtable.sort("name", False)
|
||||
self.rtable.select([0, 1, 2])
|
||||
eq_(len(app.selected_dupes), 3)
|
||||
assert app.selected_dupes[0] is objects[4]
|
||||
@ -285,11 +303,11 @@ class TestCaseDupeGuruWithResults:
|
||||
|
||||
def test_refreshDetailsWithSelected(self, do_setup):
|
||||
self.rtable.select([1, 4])
|
||||
eq_(self.dpanel.row(0), ('Filename', 'bar bleh', 'foo bar'))
|
||||
self.dpanel.view.check_gui_calls(['refresh'])
|
||||
eq_(self.dpanel.row(0), ("Filename", "bar bleh", "foo bar"))
|
||||
self.dpanel.view.check_gui_calls(["refresh"])
|
||||
self.rtable.select([])
|
||||
eq_(self.dpanel.row(0), ('Filename', '---', '---'))
|
||||
self.dpanel.view.check_gui_calls(['refresh'])
|
||||
eq_(self.dpanel.row(0), ("Filename", "---", "---"))
|
||||
self.dpanel.view.check_gui_calls(["refresh"])
|
||||
|
||||
def test_makeSelectedReference(self, do_setup):
|
||||
app = self.app
|
||||
@ -300,12 +318,14 @@ class TestCaseDupeGuruWithResults:
|
||||
assert groups[0].ref is objects[1]
|
||||
assert groups[1].ref is objects[4]
|
||||
|
||||
def test_makeSelectedReference_by_selecting_two_dupes_in_the_same_group(self, do_setup):
|
||||
def test_makeSelectedReference_by_selecting_two_dupes_in_the_same_group(
|
||||
self, do_setup
|
||||
):
|
||||
app = self.app
|
||||
objects = self.objects
|
||||
groups = self.groups
|
||||
self.rtable.select([1, 2, 4])
|
||||
#Only [0, 0] and [1, 0] must go ref, not [0, 1] because it is a part of the same group
|
||||
# Only [0, 0] and [1, 0] must go ref, not [0, 1] because it is a part of the same group
|
||||
app.make_selected_reference()
|
||||
assert groups[0].ref is objects[1]
|
||||
assert groups[1].ref is objects[4]
|
||||
@ -314,7 +334,7 @@ class TestCaseDupeGuruWithResults:
|
||||
app = self.app
|
||||
self.rtable.select([1, 4])
|
||||
app.remove_selected()
|
||||
eq_(len(app.results.dupes), 1) # the first path is now selected
|
||||
eq_(len(app.results.dupes), 1) # the first path is now selected
|
||||
app.remove_selected()
|
||||
eq_(len(app.results.dupes), 0)
|
||||
|
||||
@ -336,27 +356,27 @@ class TestCaseDupeGuruWithResults:
|
||||
|
||||
def test_addDirectory_does_not_exist(self, do_setup):
|
||||
app = self.app
|
||||
app.add_directory('/does_not_exist')
|
||||
app.add_directory("/does_not_exist")
|
||||
eq_(len(app.view.messages), 1)
|
||||
assert "exist" in app.view.messages[0]
|
||||
|
||||
def test_ignore(self, do_setup):
|
||||
app = self.app
|
||||
self.rtable.select([4]) #The dupe of the second, 2 sized group
|
||||
self.rtable.select([4]) # The dupe of the second, 2 sized group
|
||||
app.add_selected_to_ignore_list()
|
||||
eq_(len(app.ignore_list), 1)
|
||||
self.rtable.select([1]) #first dupe of the 3 dupes group
|
||||
self.rtable.select([1]) # first dupe of the 3 dupes group
|
||||
app.add_selected_to_ignore_list()
|
||||
#BOTH the ref and the other dupe should have been added
|
||||
# BOTH the ref and the other dupe should have been added
|
||||
eq_(len(app.ignore_list), 3)
|
||||
|
||||
def test_purgeIgnoreList(self, do_setup, tmpdir):
|
||||
app = self.app
|
||||
p1 = str(tmpdir.join('file1'))
|
||||
p2 = str(tmpdir.join('file2'))
|
||||
open(p1, 'w').close()
|
||||
open(p2, 'w').close()
|
||||
dne = '/does_not_exist'
|
||||
p1 = str(tmpdir.join("file1"))
|
||||
p2 = str(tmpdir.join("file2"))
|
||||
open(p1, "w").close()
|
||||
open(p2, "w").close()
|
||||
dne = "/does_not_exist"
|
||||
app.ignore_list.Ignore(dne, p1)
|
||||
app.ignore_list.Ignore(p2, dne)
|
||||
app.ignore_list.Ignore(p1, p2)
|
||||
@ -381,9 +401,11 @@ class TestCaseDupeGuruWithResults:
|
||||
# When doing a scan with results being present prior to the scan, correctly invalidate the
|
||||
# results table.
|
||||
app = self.app
|
||||
app.JOB = Job(1, lambda *args, **kw: False) # Cancels the task
|
||||
add_fake_files_to_directories(app.directories, self.objects) # We want the scan to at least start
|
||||
app.start_scanning() # will be cancelled immediately
|
||||
app.JOB = Job(1, lambda *args, **kw: False) # Cancels the task
|
||||
add_fake_files_to_directories(
|
||||
app.directories, self.objects
|
||||
) # We want the scan to at least start
|
||||
app.start_scanning() # will be cancelled immediately
|
||||
eq_(len(app.result_table), 0)
|
||||
|
||||
def test_selected_dupes_after_removal(self, do_setup):
|
||||
@ -401,21 +423,21 @@ class TestCaseDupeGuruWithResults:
|
||||
# Ref #238
|
||||
self.rtable.delta_values = True
|
||||
self.rtable.power_marker = True
|
||||
self.rtable.sort('dupe_count', False)
|
||||
self.rtable.sort("dupe_count", False)
|
||||
# don't crash
|
||||
self.rtable.sort('percentage', False)
|
||||
self.rtable.sort("percentage", False)
|
||||
# don't crash
|
||||
|
||||
|
||||
class TestCaseDupeGuru_renameSelected:
|
||||
def pytest_funcarg__do_setup(self, request):
|
||||
tmpdir = request.getfuncargvalue('tmpdir')
|
||||
tmpdir = request.getfuncargvalue("tmpdir")
|
||||
p = Path(str(tmpdir))
|
||||
fp = open(str(p['foo bar 1']), mode='w')
|
||||
fp = open(str(p["foo bar 1"]), mode="w")
|
||||
fp.close()
|
||||
fp = open(str(p['foo bar 2']), mode='w')
|
||||
fp = open(str(p["foo bar 2"]), mode="w")
|
||||
fp.close()
|
||||
fp = open(str(p['foo bar 3']), mode='w')
|
||||
fp = open(str(p["foo bar 3"]), mode="w")
|
||||
fp.close()
|
||||
files = fs.get_files(p)
|
||||
for f in files:
|
||||
@ -437,46 +459,46 @@ class TestCaseDupeGuru_renameSelected:
|
||||
app = self.app
|
||||
g = self.groups[0]
|
||||
self.rtable.select([1])
|
||||
assert app.rename_selected('renamed')
|
||||
assert app.rename_selected("renamed")
|
||||
names = [p.name for p in self.p.listdir()]
|
||||
assert 'renamed' in names
|
||||
assert 'foo bar 2' not in names
|
||||
eq_(g.dupes[0].name, 'renamed')
|
||||
assert "renamed" in names
|
||||
assert "foo bar 2" not in names
|
||||
eq_(g.dupes[0].name, "renamed")
|
||||
|
||||
def test_none_selected(self, do_setup, monkeypatch):
|
||||
app = self.app
|
||||
g = self.groups[0]
|
||||
self.rtable.select([])
|
||||
monkeypatch.setattr(logging, 'warning', log_calls(lambda msg: None))
|
||||
assert not app.rename_selected('renamed')
|
||||
msg = logging.warning.calls[0]['msg']
|
||||
eq_('dupeGuru Warning: list index out of range', msg)
|
||||
monkeypatch.setattr(logging, "warning", log_calls(lambda msg: None))
|
||||
assert not app.rename_selected("renamed")
|
||||
msg = logging.warning.calls[0]["msg"]
|
||||
eq_("dupeGuru Warning: list index out of range", msg)
|
||||
names = [p.name for p in self.p.listdir()]
|
||||
assert 'renamed' not in names
|
||||
assert 'foo bar 2' in names
|
||||
eq_(g.dupes[0].name, 'foo bar 2')
|
||||
assert "renamed" not in names
|
||||
assert "foo bar 2" in names
|
||||
eq_(g.dupes[0].name, "foo bar 2")
|
||||
|
||||
def test_name_already_exists(self, do_setup, monkeypatch):
|
||||
app = self.app
|
||||
g = self.groups[0]
|
||||
self.rtable.select([1])
|
||||
monkeypatch.setattr(logging, 'warning', log_calls(lambda msg: None))
|
||||
assert not app.rename_selected('foo bar 1')
|
||||
msg = logging.warning.calls[0]['msg']
|
||||
assert msg.startswith('dupeGuru Warning: \'foo bar 1\' already exists in')
|
||||
monkeypatch.setattr(logging, "warning", log_calls(lambda msg: None))
|
||||
assert not app.rename_selected("foo bar 1")
|
||||
msg = logging.warning.calls[0]["msg"]
|
||||
assert msg.startswith("dupeGuru Warning: 'foo bar 1' already exists in")
|
||||
names = [p.name for p in self.p.listdir()]
|
||||
assert 'foo bar 1' in names
|
||||
assert 'foo bar 2' in names
|
||||
eq_(g.dupes[0].name, 'foo bar 2')
|
||||
assert "foo bar 1" in names
|
||||
assert "foo bar 2" in names
|
||||
eq_(g.dupes[0].name, "foo bar 2")
|
||||
|
||||
|
||||
class TestAppWithDirectoriesInTree:
|
||||
def pytest_funcarg__do_setup(self, request):
|
||||
tmpdir = request.getfuncargvalue('tmpdir')
|
||||
tmpdir = request.getfuncargvalue("tmpdir")
|
||||
p = Path(str(tmpdir))
|
||||
p['sub1'].mkdir()
|
||||
p['sub2'].mkdir()
|
||||
p['sub3'].mkdir()
|
||||
p["sub1"].mkdir()
|
||||
p["sub2"].mkdir()
|
||||
p["sub3"].mkdir()
|
||||
app = TestApp()
|
||||
self.app = app.app
|
||||
self.dtree = app.dtree
|
||||
@ -487,12 +509,11 @@ class TestAppWithDirectoriesInTree:
|
||||
# Setting a node state to something also affect subnodes. These subnodes must be correctly
|
||||
# refreshed.
|
||||
node = self.dtree[0]
|
||||
eq_(len(node), 3) # a len() call is required for subnodes to be loaded
|
||||
eq_(len(node), 3) # a len() call is required for subnodes to be loaded
|
||||
subnode = node[0]
|
||||
node.state = 1 # the state property is a state index
|
||||
node.state = 1 # the state property is a state index
|
||||
node = self.dtree[0]
|
||||
eq_(len(node), 3)
|
||||
subnode = node[0]
|
||||
eq_(subnode.state, 1)
|
||||
self.dtree.view.check_gui_calls(['refresh_states'])
|
||||
|
||||
self.dtree.view.check_gui_calls(["refresh_states"])
|
||||
|
@ -4,7 +4,7 @@
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from hscommon.testutil import TestApp as TestAppBase, CallLogger, eq_, with_app # noqa
|
||||
from hscommon.testutil import TestApp as TestAppBase, CallLogger, eq_, with_app # noqa
|
||||
from hscommon.path import Path
|
||||
from hscommon.util import get_file_ext, format_size
|
||||
from hscommon.gui.column import Column
|
||||
@ -17,6 +17,7 @@ from ..app import DupeGuru as DupeGuruBase
|
||||
from ..gui.result_table import ResultTable as ResultTableBase
|
||||
from ..gui.prioritize_dialog import PrioritizeDialog
|
||||
|
||||
|
||||
class DupeGuruView:
|
||||
JOB = nulljob
|
||||
|
||||
@ -39,28 +40,32 @@ class DupeGuruView:
|
||||
self.messages.append(msg)
|
||||
|
||||
def ask_yes_no(self, prompt):
|
||||
return True # always answer yes
|
||||
return True # always answer yes
|
||||
|
||||
def create_results_window(self):
|
||||
pass
|
||||
|
||||
|
||||
class ResultTable(ResultTableBase):
|
||||
COLUMNS = [
|
||||
Column('marked', ''),
|
||||
Column('name', 'Filename'),
|
||||
Column('folder_path', 'Directory'),
|
||||
Column('size', 'Size (KB)'),
|
||||
Column('extension', 'Kind'),
|
||||
Column("marked", ""),
|
||||
Column("name", "Filename"),
|
||||
Column("folder_path", "Directory"),
|
||||
Column("size", "Size (KB)"),
|
||||
Column("extension", "Kind"),
|
||||
]
|
||||
DELTA_COLUMNS = {'size', }
|
||||
DELTA_COLUMNS = {
|
||||
"size",
|
||||
}
|
||||
|
||||
|
||||
class DupeGuru(DupeGuruBase):
|
||||
NAME = 'dupeGuru'
|
||||
METADATA_TO_READ = ['size']
|
||||
NAME = "dupeGuru"
|
||||
METADATA_TO_READ = ["size"]
|
||||
|
||||
def __init__(self):
|
||||
DupeGuruBase.__init__(self, DupeGuruView())
|
||||
self.appdata = '/tmp'
|
||||
self.appdata = "/tmp"
|
||||
self._recreate_result_table()
|
||||
|
||||
def _prioritization_categories(self):
|
||||
@ -78,7 +83,7 @@ class NamedObject:
|
||||
def __init__(self, name="foobar", with_words=False, size=1, folder=None):
|
||||
self.name = name
|
||||
if folder is None:
|
||||
folder = 'basepath'
|
||||
folder = "basepath"
|
||||
self._folder = Path(folder)
|
||||
self.size = size
|
||||
self.md5partial = name
|
||||
@ -88,7 +93,7 @@ class NamedObject:
|
||||
self.is_ref = False
|
||||
|
||||
def __bool__(self):
|
||||
return False #Make sure that operations are made correctly when the bool value of files is false.
|
||||
return False # Make sure that operations are made correctly when the bool value of files is false.
|
||||
|
||||
def get_display_info(self, group, delta):
|
||||
size = self.size
|
||||
@ -97,10 +102,10 @@ class NamedObject:
|
||||
r = group.ref
|
||||
size -= r.size
|
||||
return {
|
||||
'name': self.name,
|
||||
'folder_path': str(self.folder_path),
|
||||
'size': format_size(size, 0, 1, False),
|
||||
'extension': self.extension if hasattr(self, 'extension') else '---',
|
||||
"name": self.name,
|
||||
"folder_path": str(self.folder_path),
|
||||
"size": format_size(size, 0, 1, False),
|
||||
"extension": self.extension if hasattr(self, "extension") else "---",
|
||||
}
|
||||
|
||||
@property
|
||||
@ -115,6 +120,7 @@ class NamedObject:
|
||||
def extension(self):
|
||||
return get_file_ext(self.name)
|
||||
|
||||
|
||||
# Returns a group set that looks like that:
|
||||
# "foo bar" (1)
|
||||
# "bar bleh" (1024)
|
||||
@ -127,21 +133,24 @@ def GetTestGroups():
|
||||
NamedObject("bar bleh"),
|
||||
NamedObject("foo bleh"),
|
||||
NamedObject("ibabtu"),
|
||||
NamedObject("ibabtu")
|
||||
NamedObject("ibabtu"),
|
||||
]
|
||||
objects[1].size = 1024
|
||||
matches = engine.getmatches(objects) #we should have 5 matches
|
||||
groups = engine.get_groups(matches) #We should have 2 groups
|
||||
matches = engine.getmatches(objects) # we should have 5 matches
|
||||
groups = engine.get_groups(matches) # We should have 2 groups
|
||||
for g in groups:
|
||||
g.prioritize(lambda x: objects.index(x)) #We want the dupes to be in the same order as the list is
|
||||
groups.sort(key=len, reverse=True) # We want the group with 3 members to be first.
|
||||
g.prioritize(
|
||||
lambda x: objects.index(x)
|
||||
) # We want the dupes to be in the same order as the list is
|
||||
groups.sort(key=len, reverse=True) # We want the group with 3 members to be first.
|
||||
return (objects, matches, groups)
|
||||
|
||||
|
||||
class TestApp(TestAppBase):
|
||||
def __init__(self):
|
||||
def link_gui(gui):
|
||||
gui.view = self.make_logger()
|
||||
if hasattr(gui, 'columns'): # tables
|
||||
if hasattr(gui, "columns"): # tables
|
||||
gui.columns.view = self.make_logger()
|
||||
return gui
|
||||
|
||||
@ -166,7 +175,7 @@ class TestApp(TestAppBase):
|
||||
# rtable is a property because its instance can be replaced during execution
|
||||
return self.app.result_table
|
||||
|
||||
#--- Helpers
|
||||
# --- Helpers
|
||||
def select_pri_criterion(self, name):
|
||||
# Select a main prioritize criterion by name instead of by index. Makes tests more
|
||||
# maintainable.
|
||||
|
@ -13,13 +13,18 @@ try:
|
||||
except ImportError:
|
||||
skip("Can't import the block module, probably hasn't been compiled.")
|
||||
|
||||
def my_avgdiff(first, second, limit=768, min_iter=3): # this is so I don't have to re-write every call
|
||||
|
||||
def my_avgdiff(
|
||||
first, second, limit=768, min_iter=3
|
||||
): # this is so I don't have to re-write every call
|
||||
return avgdiff(first, second, limit, min_iter)
|
||||
|
||||
|
||||
BLACK = (0, 0, 0)
|
||||
RED = (0xff, 0, 0)
|
||||
GREEN = (0, 0xff, 0)
|
||||
BLUE = (0, 0, 0xff)
|
||||
RED = (0xFF, 0, 0)
|
||||
GREEN = (0, 0xFF, 0)
|
||||
BLUE = (0, 0, 0xFF)
|
||||
|
||||
|
||||
class FakeImage:
|
||||
def __init__(self, size, data):
|
||||
@ -37,16 +42,20 @@ class FakeImage:
|
||||
pixels.append(pixel)
|
||||
return FakeImage((box[2] - box[0], box[3] - box[1]), pixels)
|
||||
|
||||
|
||||
def empty():
|
||||
return FakeImage((0, 0), [])
|
||||
|
||||
def single_pixel(): #one red pixel
|
||||
return FakeImage((1, 1), [(0xff, 0, 0)])
|
||||
|
||||
def single_pixel(): # one red pixel
|
||||
return FakeImage((1, 1), [(0xFF, 0, 0)])
|
||||
|
||||
|
||||
def four_pixels():
|
||||
pixels = [RED, (0, 0x80, 0xff), (0x80, 0, 0), (0, 0x40, 0x80)]
|
||||
pixels = [RED, (0, 0x80, 0xFF), (0x80, 0, 0), (0, 0x40, 0x80)]
|
||||
return FakeImage((2, 2), pixels)
|
||||
|
||||
|
||||
class TestCasegetblock:
|
||||
def test_single_pixel(self):
|
||||
im = single_pixel()
|
||||
@ -60,9 +69,9 @@ class TestCasegetblock:
|
||||
def test_four_pixels(self):
|
||||
im = four_pixels()
|
||||
[b] = getblocks2(im, 1)
|
||||
meanred = (0xff + 0x80) // 4
|
||||
meanred = (0xFF + 0x80) // 4
|
||||
meangreen = (0x80 + 0x40) // 4
|
||||
meanblue = (0xff + 0x80) // 4
|
||||
meanblue = (0xFF + 0x80) // 4
|
||||
eq_((meanred, meangreen, meanblue), b)
|
||||
|
||||
|
||||
@ -158,6 +167,7 @@ class TestCasegetblock:
|
||||
# eq_(BLACK, blocks[3])
|
||||
#
|
||||
|
||||
|
||||
class TestCasegetblocks2:
|
||||
def test_empty_image(self):
|
||||
im = empty()
|
||||
@ -169,9 +179,9 @@ class TestCasegetblocks2:
|
||||
blocks = getblocks2(im, 1)
|
||||
eq_(1, len(blocks))
|
||||
block = blocks[0]
|
||||
meanred = (0xff + 0x80) // 4
|
||||
meanred = (0xFF + 0x80) // 4
|
||||
meangreen = (0x80 + 0x40) // 4
|
||||
meanblue = (0xff + 0x80) // 4
|
||||
meanblue = (0xFF + 0x80) // 4
|
||||
eq_((meanred, meangreen, meanblue), block)
|
||||
|
||||
def test_four_blocks_all_black(self):
|
||||
@ -225,25 +235,25 @@ class TestCaseavgdiff:
|
||||
my_avgdiff([b, b], [b])
|
||||
|
||||
def test_first_arg_is_empty_but_not_second(self):
|
||||
#Don't return 0 (as when the 2 lists are empty), raise!
|
||||
# Don't return 0 (as when the 2 lists are empty), raise!
|
||||
b = (0, 0, 0)
|
||||
with raises(DifferentBlockCountError):
|
||||
my_avgdiff([], [b])
|
||||
|
||||
def test_limit(self):
|
||||
ref = (0, 0, 0)
|
||||
b1 = (10, 10, 10) #avg 30
|
||||
b2 = (20, 20, 20) #avg 45
|
||||
b3 = (30, 30, 30) #avg 60
|
||||
b1 = (10, 10, 10) # avg 30
|
||||
b2 = (20, 20, 20) # avg 45
|
||||
b3 = (30, 30, 30) # avg 60
|
||||
blocks1 = [ref, ref, ref]
|
||||
blocks2 = [b1, b2, b3]
|
||||
eq_(45, my_avgdiff(blocks1, blocks2, 44))
|
||||
|
||||
def test_min_iterations(self):
|
||||
ref = (0, 0, 0)
|
||||
b1 = (10, 10, 10) #avg 30
|
||||
b2 = (20, 20, 20) #avg 45
|
||||
b3 = (10, 10, 10) #avg 40
|
||||
b1 = (10, 10, 10) # avg 30
|
||||
b2 = (20, 20, 20) # avg 45
|
||||
b3 = (10, 10, 10) # avg 40
|
||||
blocks1 = [ref, ref, ref]
|
||||
blocks2 = [b1, b2, b3]
|
||||
eq_(40, my_avgdiff(blocks1, blocks2, 45 - 1, 3))
|
||||
|
@ -16,34 +16,35 @@ try:
|
||||
except ImportError:
|
||||
skip("Can't import the cache module, probably hasn't been compiled.")
|
||||
|
||||
|
||||
class TestCasecolors_to_string:
|
||||
def test_no_color(self):
|
||||
eq_('', colors_to_string([]))
|
||||
eq_("", colors_to_string([]))
|
||||
|
||||
def test_single_color(self):
|
||||
eq_('000000', colors_to_string([(0, 0, 0)]))
|
||||
eq_('010101', colors_to_string([(1, 1, 1)]))
|
||||
eq_('0a141e', colors_to_string([(10, 20, 30)]))
|
||||
eq_("000000", colors_to_string([(0, 0, 0)]))
|
||||
eq_("010101", colors_to_string([(1, 1, 1)]))
|
||||
eq_("0a141e", colors_to_string([(10, 20, 30)]))
|
||||
|
||||
def test_two_colors(self):
|
||||
eq_('000102030405', colors_to_string([(0, 1, 2), (3, 4, 5)]))
|
||||
eq_("000102030405", colors_to_string([(0, 1, 2), (3, 4, 5)]))
|
||||
|
||||
|
||||
class TestCasestring_to_colors:
|
||||
def test_empty(self):
|
||||
eq_([], string_to_colors(''))
|
||||
eq_([], string_to_colors(""))
|
||||
|
||||
def test_single_color(self):
|
||||
eq_([(0, 0, 0)], string_to_colors('000000'))
|
||||
eq_([(2, 3, 4)], string_to_colors('020304'))
|
||||
eq_([(10, 20, 30)], string_to_colors('0a141e'))
|
||||
eq_([(0, 0, 0)], string_to_colors("000000"))
|
||||
eq_([(2, 3, 4)], string_to_colors("020304"))
|
||||
eq_([(10, 20, 30)], string_to_colors("0a141e"))
|
||||
|
||||
def test_two_colors(self):
|
||||
eq_([(10, 20, 30), (40, 50, 60)], string_to_colors('0a141e28323c'))
|
||||
eq_([(10, 20, 30), (40, 50, 60)], string_to_colors("0a141e28323c"))
|
||||
|
||||
def test_incomplete_color(self):
|
||||
# don't return anything if it's not a complete color
|
||||
eq_([], string_to_colors('102'))
|
||||
eq_([], string_to_colors("102"))
|
||||
|
||||
|
||||
class BaseTestCaseCache:
|
||||
@ -54,58 +55,58 @@ class BaseTestCaseCache:
|
||||
c = self.get_cache()
|
||||
eq_(0, len(c))
|
||||
with raises(KeyError):
|
||||
c['foo']
|
||||
c["foo"]
|
||||
|
||||
def test_set_then_retrieve_blocks(self):
|
||||
c = self.get_cache()
|
||||
b = [(0, 0, 0), (1, 2, 3)]
|
||||
c['foo'] = b
|
||||
eq_(b, c['foo'])
|
||||
c["foo"] = b
|
||||
eq_(b, c["foo"])
|
||||
|
||||
def test_delitem(self):
|
||||
c = self.get_cache()
|
||||
c['foo'] = ''
|
||||
del c['foo']
|
||||
assert 'foo' not in c
|
||||
c["foo"] = ""
|
||||
del c["foo"]
|
||||
assert "foo" not in c
|
||||
with raises(KeyError):
|
||||
del c['foo']
|
||||
del c["foo"]
|
||||
|
||||
def test_persistance(self, tmpdir):
|
||||
DBNAME = tmpdir.join('hstest.db')
|
||||
DBNAME = tmpdir.join("hstest.db")
|
||||
c = self.get_cache(str(DBNAME))
|
||||
c['foo'] = [(1, 2, 3)]
|
||||
c["foo"] = [(1, 2, 3)]
|
||||
del c
|
||||
c = self.get_cache(str(DBNAME))
|
||||
eq_([(1, 2, 3)], c['foo'])
|
||||
eq_([(1, 2, 3)], c["foo"])
|
||||
|
||||
def test_filter(self):
|
||||
c = self.get_cache()
|
||||
c['foo'] = ''
|
||||
c['bar'] = ''
|
||||
c['baz'] = ''
|
||||
c.filter(lambda p: p != 'bar') #only 'bar' is removed
|
||||
c["foo"] = ""
|
||||
c["bar"] = ""
|
||||
c["baz"] = ""
|
||||
c.filter(lambda p: p != "bar") # only 'bar' is removed
|
||||
eq_(2, len(c))
|
||||
assert 'foo' in c
|
||||
assert 'baz' in c
|
||||
assert 'bar' not in c
|
||||
assert "foo" in c
|
||||
assert "baz" in c
|
||||
assert "bar" not in c
|
||||
|
||||
def test_clear(self):
|
||||
c = self.get_cache()
|
||||
c['foo'] = ''
|
||||
c['bar'] = ''
|
||||
c['baz'] = ''
|
||||
c["foo"] = ""
|
||||
c["bar"] = ""
|
||||
c["baz"] = ""
|
||||
c.clear()
|
||||
eq_(0, len(c))
|
||||
assert 'foo' not in c
|
||||
assert 'baz' not in c
|
||||
assert 'bar' not in c
|
||||
assert "foo" not in c
|
||||
assert "baz" not in c
|
||||
assert "bar" not in c
|
||||
|
||||
def test_by_id(self):
|
||||
# it's possible to use the cache by referring to the files by their row_id
|
||||
c = self.get_cache()
|
||||
b = [(0, 0, 0), (1, 2, 3)]
|
||||
c['foo'] = b
|
||||
foo_id = c.get_id('foo')
|
||||
c["foo"] = b
|
||||
foo_id = c.get_id("foo")
|
||||
eq_(c[foo_id], b)
|
||||
|
||||
|
||||
@ -120,16 +121,16 @@ class TestCaseSqliteCache(BaseTestCaseCache):
|
||||
# If we don't do this monkeypatching, we get a weird exception about trying to flush a
|
||||
# closed file. I've tried setting logging level and stuff, but nothing worked. So, there we
|
||||
# go, a dirty monkeypatch.
|
||||
monkeypatch.setattr(logging, 'warning', lambda *args, **kw: None)
|
||||
dbname = str(tmpdir.join('foo.db'))
|
||||
fp = open(dbname, 'w')
|
||||
fp.write('invalid sqlite content')
|
||||
monkeypatch.setattr(logging, "warning", lambda *args, **kw: None)
|
||||
dbname = str(tmpdir.join("foo.db"))
|
||||
fp = open(dbname, "w")
|
||||
fp.write("invalid sqlite content")
|
||||
fp.close()
|
||||
c = self.get_cache(dbname) # should not raise a DatabaseError
|
||||
c['foo'] = [(1, 2, 3)]
|
||||
c = self.get_cache(dbname) # should not raise a DatabaseError
|
||||
c["foo"] = [(1, 2, 3)]
|
||||
del c
|
||||
c = self.get_cache(dbname)
|
||||
eq_(c['foo'], [(1, 2, 3)])
|
||||
eq_(c["foo"], [(1, 2, 3)])
|
||||
|
||||
|
||||
class TestCaseShelveCache(BaseTestCaseCache):
|
||||
@ -161,4 +162,3 @@ class TestCaseCacheSQLEscape:
|
||||
del c["foo'bar"]
|
||||
except KeyError:
|
||||
assert False
|
||||
|
||||
|
@ -1 +1 @@
|
||||
from hscommon.testutil import pytest_funcarg__app # noqa
|
||||
from hscommon.testutil import pytest_funcarg__app # noqa
|
||||
|
@ -14,91 +14,105 @@ from hscommon.path import Path
|
||||
from hscommon.testutil import eq_
|
||||
|
||||
from ..fs import File
|
||||
from ..directories import Directories, DirectoryState, AlreadyThereError, InvalidPathError
|
||||
from ..directories import (
|
||||
Directories,
|
||||
DirectoryState,
|
||||
AlreadyThereError,
|
||||
InvalidPathError,
|
||||
)
|
||||
|
||||
|
||||
def create_fake_fs(rootpath):
|
||||
# We have it as a separate function because other units are using it.
|
||||
rootpath = rootpath['fs']
|
||||
rootpath = rootpath["fs"]
|
||||
rootpath.mkdir()
|
||||
rootpath['dir1'].mkdir()
|
||||
rootpath['dir2'].mkdir()
|
||||
rootpath['dir3'].mkdir()
|
||||
fp = rootpath['file1.test'].open('w')
|
||||
fp.write('1')
|
||||
rootpath["dir1"].mkdir()
|
||||
rootpath["dir2"].mkdir()
|
||||
rootpath["dir3"].mkdir()
|
||||
fp = rootpath["file1.test"].open("w")
|
||||
fp.write("1")
|
||||
fp.close()
|
||||
fp = rootpath['file2.test'].open('w')
|
||||
fp.write('12')
|
||||
fp = rootpath["file2.test"].open("w")
|
||||
fp.write("12")
|
||||
fp.close()
|
||||
fp = rootpath['file3.test'].open('w')
|
||||
fp.write('123')
|
||||
fp = rootpath["file3.test"].open("w")
|
||||
fp.write("123")
|
||||
fp.close()
|
||||
fp = rootpath['dir1']['file1.test'].open('w')
|
||||
fp.write('1')
|
||||
fp = rootpath["dir1"]["file1.test"].open("w")
|
||||
fp.write("1")
|
||||
fp.close()
|
||||
fp = rootpath['dir2']['file2.test'].open('w')
|
||||
fp.write('12')
|
||||
fp = rootpath["dir2"]["file2.test"].open("w")
|
||||
fp.write("12")
|
||||
fp.close()
|
||||
fp = rootpath['dir3']['file3.test'].open('w')
|
||||
fp.write('123')
|
||||
fp = rootpath["dir3"]["file3.test"].open("w")
|
||||
fp.write("123")
|
||||
fp.close()
|
||||
return rootpath
|
||||
|
||||
|
||||
testpath = None
|
||||
|
||||
|
||||
def setup_module(module):
|
||||
# In this unit, we have tests depending on two directory structure. One with only one file in it
|
||||
# and another with a more complex structure.
|
||||
testpath = Path(tempfile.mkdtemp())
|
||||
module.testpath = testpath
|
||||
rootpath = testpath['onefile']
|
||||
rootpath = testpath["onefile"]
|
||||
rootpath.mkdir()
|
||||
fp = rootpath['test.txt'].open('w')
|
||||
fp.write('test_data')
|
||||
fp = rootpath["test.txt"].open("w")
|
||||
fp.write("test_data")
|
||||
fp.close()
|
||||
create_fake_fs(testpath)
|
||||
|
||||
|
||||
def teardown_module(module):
|
||||
shutil.rmtree(str(module.testpath))
|
||||
|
||||
|
||||
def test_empty():
|
||||
d = Directories()
|
||||
eq_(len(d), 0)
|
||||
assert 'foobar' not in d
|
||||
assert "foobar" not in d
|
||||
|
||||
|
||||
def test_add_path():
|
||||
d = Directories()
|
||||
p = testpath['onefile']
|
||||
p = testpath["onefile"]
|
||||
d.add_path(p)
|
||||
eq_(1, len(d))
|
||||
assert p in d
|
||||
assert (p['foobar']) in d
|
||||
assert (p["foobar"]) in d
|
||||
assert p.parent() not in d
|
||||
p = testpath['fs']
|
||||
p = testpath["fs"]
|
||||
d.add_path(p)
|
||||
eq_(2, len(d))
|
||||
assert p in d
|
||||
|
||||
|
||||
def test_AddPath_when_path_is_already_there():
|
||||
d = Directories()
|
||||
p = testpath['onefile']
|
||||
p = testpath["onefile"]
|
||||
d.add_path(p)
|
||||
with raises(AlreadyThereError):
|
||||
d.add_path(p)
|
||||
with raises(AlreadyThereError):
|
||||
d.add_path(p['foobar'])
|
||||
d.add_path(p["foobar"])
|
||||
eq_(1, len(d))
|
||||
|
||||
|
||||
def test_add_path_containing_paths_already_there():
|
||||
d = Directories()
|
||||
d.add_path(testpath['onefile'])
|
||||
d.add_path(testpath["onefile"])
|
||||
eq_(1, len(d))
|
||||
d.add_path(testpath)
|
||||
eq_(len(d), 1)
|
||||
eq_(d[0], testpath)
|
||||
|
||||
|
||||
def test_AddPath_non_latin(tmpdir):
|
||||
p = Path(str(tmpdir))
|
||||
to_add = p['unicode\u201a']
|
||||
to_add = p["unicode\u201a"]
|
||||
os.mkdir(str(to_add))
|
||||
d = Directories()
|
||||
try:
|
||||
@ -106,63 +120,69 @@ def test_AddPath_non_latin(tmpdir):
|
||||
except UnicodeDecodeError:
|
||||
assert False
|
||||
|
||||
|
||||
def test_del():
|
||||
d = Directories()
|
||||
d.add_path(testpath['onefile'])
|
||||
d.add_path(testpath["onefile"])
|
||||
try:
|
||||
del d[1]
|
||||
assert False
|
||||
except IndexError:
|
||||
pass
|
||||
d.add_path(testpath['fs'])
|
||||
d.add_path(testpath["fs"])
|
||||
del d[1]
|
||||
eq_(1, len(d))
|
||||
|
||||
|
||||
def test_states():
|
||||
d = Directories()
|
||||
p = testpath['onefile']
|
||||
p = testpath["onefile"]
|
||||
d.add_path(p)
|
||||
eq_(DirectoryState.Normal, d.get_state(p))
|
||||
d.set_state(p, DirectoryState.Reference)
|
||||
eq_(DirectoryState.Reference, d.get_state(p))
|
||||
eq_(DirectoryState.Reference, d.get_state(p['dir1']))
|
||||
eq_(DirectoryState.Reference, d.get_state(p["dir1"]))
|
||||
eq_(1, len(d.states))
|
||||
eq_(p, list(d.states.keys())[0])
|
||||
eq_(DirectoryState.Reference, d.states[p])
|
||||
|
||||
|
||||
def test_get_state_with_path_not_there():
|
||||
# When the path's not there, just return DirectoryState.Normal
|
||||
d = Directories()
|
||||
d.add_path(testpath['onefile'])
|
||||
d.add_path(testpath["onefile"])
|
||||
eq_(d.get_state(testpath), DirectoryState.Normal)
|
||||
|
||||
|
||||
def test_states_overwritten_when_larger_directory_eat_smaller_ones():
|
||||
# ref #248
|
||||
# When setting the state of a folder, we overwrite previously set states for subfolders.
|
||||
d = Directories()
|
||||
p = testpath['onefile']
|
||||
p = testpath["onefile"]
|
||||
d.add_path(p)
|
||||
d.set_state(p, DirectoryState.Excluded)
|
||||
d.add_path(testpath)
|
||||
d.set_state(testpath, DirectoryState.Reference)
|
||||
eq_(d.get_state(p), DirectoryState.Reference)
|
||||
eq_(d.get_state(p['dir1']), DirectoryState.Reference)
|
||||
eq_(d.get_state(p["dir1"]), DirectoryState.Reference)
|
||||
eq_(d.get_state(testpath), DirectoryState.Reference)
|
||||
|
||||
|
||||
def test_get_files():
|
||||
d = Directories()
|
||||
p = testpath['fs']
|
||||
p = testpath["fs"]
|
||||
d.add_path(p)
|
||||
d.set_state(p['dir1'], DirectoryState.Reference)
|
||||
d.set_state(p['dir2'], DirectoryState.Excluded)
|
||||
d.set_state(p["dir1"], DirectoryState.Reference)
|
||||
d.set_state(p["dir2"], DirectoryState.Excluded)
|
||||
files = list(d.get_files())
|
||||
eq_(5, len(files))
|
||||
for f in files:
|
||||
if f.path.parent() == p['dir1']:
|
||||
if f.path.parent() == p["dir1"]:
|
||||
assert f.is_ref
|
||||
else:
|
||||
assert not f.is_ref
|
||||
|
||||
|
||||
def test_get_files_with_folders():
|
||||
# When fileclasses handle folders, return them and stop recursing!
|
||||
class FakeFile(File):
|
||||
@ -171,106 +191,115 @@ def test_get_files_with_folders():
|
||||
return True
|
||||
|
||||
d = Directories()
|
||||
p = testpath['fs']
|
||||
p = testpath["fs"]
|
||||
d.add_path(p)
|
||||
files = list(d.get_files(fileclasses=[FakeFile]))
|
||||
# We have the 3 root files and the 3 root dirs
|
||||
eq_(6, len(files))
|
||||
|
||||
|
||||
def test_get_folders():
|
||||
d = Directories()
|
||||
p = testpath['fs']
|
||||
p = testpath["fs"]
|
||||
d.add_path(p)
|
||||
d.set_state(p['dir1'], DirectoryState.Reference)
|
||||
d.set_state(p['dir2'], DirectoryState.Excluded)
|
||||
d.set_state(p["dir1"], DirectoryState.Reference)
|
||||
d.set_state(p["dir2"], DirectoryState.Excluded)
|
||||
folders = list(d.get_folders())
|
||||
eq_(len(folders), 3)
|
||||
ref = [f for f in folders if f.is_ref]
|
||||
not_ref = [f for f in folders if not f.is_ref]
|
||||
eq_(len(ref), 1)
|
||||
eq_(ref[0].path, p['dir1'])
|
||||
eq_(ref[0].path, p["dir1"])
|
||||
eq_(len(not_ref), 2)
|
||||
eq_(ref[0].size, 1)
|
||||
|
||||
|
||||
def test_get_files_with_inherited_exclusion():
|
||||
d = Directories()
|
||||
p = testpath['onefile']
|
||||
p = testpath["onefile"]
|
||||
d.add_path(p)
|
||||
d.set_state(p, DirectoryState.Excluded)
|
||||
eq_([], list(d.get_files()))
|
||||
|
||||
|
||||
def test_save_and_load(tmpdir):
|
||||
d1 = Directories()
|
||||
d2 = Directories()
|
||||
p1 = Path(str(tmpdir.join('p1')))
|
||||
p1 = Path(str(tmpdir.join("p1")))
|
||||
p1.mkdir()
|
||||
p2 = Path(str(tmpdir.join('p2')))
|
||||
p2 = Path(str(tmpdir.join("p2")))
|
||||
p2.mkdir()
|
||||
d1.add_path(p1)
|
||||
d1.add_path(p2)
|
||||
d1.set_state(p1, DirectoryState.Reference)
|
||||
d1.set_state(p1['dir1'], DirectoryState.Excluded)
|
||||
tmpxml = str(tmpdir.join('directories_testunit.xml'))
|
||||
d1.set_state(p1["dir1"], DirectoryState.Excluded)
|
||||
tmpxml = str(tmpdir.join("directories_testunit.xml"))
|
||||
d1.save_to_file(tmpxml)
|
||||
d2.load_from_file(tmpxml)
|
||||
eq_(2, len(d2))
|
||||
eq_(DirectoryState.Reference, d2.get_state(p1))
|
||||
eq_(DirectoryState.Excluded, d2.get_state(p1['dir1']))
|
||||
eq_(DirectoryState.Excluded, d2.get_state(p1["dir1"]))
|
||||
|
||||
|
||||
def test_invalid_path():
|
||||
d = Directories()
|
||||
p = Path('does_not_exist')
|
||||
p = Path("does_not_exist")
|
||||
with raises(InvalidPathError):
|
||||
d.add_path(p)
|
||||
eq_(0, len(d))
|
||||
|
||||
|
||||
def test_set_state_on_invalid_path():
|
||||
d = Directories()
|
||||
try:
|
||||
d.set_state(Path('foobar',), DirectoryState.Normal)
|
||||
d.set_state(Path("foobar",), DirectoryState.Normal)
|
||||
except LookupError:
|
||||
assert False
|
||||
|
||||
|
||||
def test_load_from_file_with_invalid_path(tmpdir):
|
||||
#This test simulates a load from file resulting in a
|
||||
#InvalidPath raise. Other directories must be loaded.
|
||||
# This test simulates a load from file resulting in a
|
||||
# InvalidPath raise. Other directories must be loaded.
|
||||
d1 = Directories()
|
||||
d1.add_path(testpath['onefile'])
|
||||
#Will raise InvalidPath upon loading
|
||||
p = Path(str(tmpdir.join('toremove')))
|
||||
d1.add_path(testpath["onefile"])
|
||||
# Will raise InvalidPath upon loading
|
||||
p = Path(str(tmpdir.join("toremove")))
|
||||
p.mkdir()
|
||||
d1.add_path(p)
|
||||
p.rmdir()
|
||||
tmpxml = str(tmpdir.join('directories_testunit.xml'))
|
||||
tmpxml = str(tmpdir.join("directories_testunit.xml"))
|
||||
d1.save_to_file(tmpxml)
|
||||
d2 = Directories()
|
||||
d2.load_from_file(tmpxml)
|
||||
eq_(1, len(d2))
|
||||
|
||||
|
||||
def test_unicode_save(tmpdir):
|
||||
d = Directories()
|
||||
p1 = Path(str(tmpdir))['hello\xe9']
|
||||
p1 = Path(str(tmpdir))["hello\xe9"]
|
||||
p1.mkdir()
|
||||
p1['foo\xe9'].mkdir()
|
||||
p1["foo\xe9"].mkdir()
|
||||
d.add_path(p1)
|
||||
d.set_state(p1['foo\xe9'], DirectoryState.Excluded)
|
||||
tmpxml = str(tmpdir.join('directories_testunit.xml'))
|
||||
d.set_state(p1["foo\xe9"], DirectoryState.Excluded)
|
||||
tmpxml = str(tmpdir.join("directories_testunit.xml"))
|
||||
try:
|
||||
d.save_to_file(tmpxml)
|
||||
except UnicodeDecodeError:
|
||||
assert False
|
||||
|
||||
|
||||
def test_get_files_refreshes_its_directories():
|
||||
d = Directories()
|
||||
p = testpath['fs']
|
||||
p = testpath["fs"]
|
||||
d.add_path(p)
|
||||
files = d.get_files()
|
||||
eq_(6, len(list(files)))
|
||||
time.sleep(1)
|
||||
os.remove(str(p['dir1']['file1.test']))
|
||||
os.remove(str(p["dir1"]["file1.test"]))
|
||||
files = d.get_files()
|
||||
eq_(5, len(list(files)))
|
||||
|
||||
|
||||
def test_get_files_does_not_choke_on_non_existing_directories(tmpdir):
|
||||
d = Directories()
|
||||
p = Path(str(tmpdir))
|
||||
@ -278,36 +307,37 @@ def test_get_files_does_not_choke_on_non_existing_directories(tmpdir):
|
||||
p.rmtree()
|
||||
eq_([], list(d.get_files()))
|
||||
|
||||
|
||||
def test_get_state_returns_excluded_by_default_for_hidden_directories(tmpdir):
|
||||
d = Directories()
|
||||
p = Path(str(tmpdir))
|
||||
hidden_dir_path = p['.foo']
|
||||
p['.foo'].mkdir()
|
||||
hidden_dir_path = p[".foo"]
|
||||
p[".foo"].mkdir()
|
||||
d.add_path(p)
|
||||
eq_(d.get_state(hidden_dir_path), DirectoryState.Excluded)
|
||||
# But it can be overriden
|
||||
d.set_state(hidden_dir_path, DirectoryState.Normal)
|
||||
eq_(d.get_state(hidden_dir_path), DirectoryState.Normal)
|
||||
|
||||
|
||||
def test_default_path_state_override(tmpdir):
|
||||
# It's possible for a subclass to override the default state of a path
|
||||
class MyDirectories(Directories):
|
||||
def _default_state_for_path(self, path):
|
||||
if 'foobar' in path:
|
||||
if "foobar" in path:
|
||||
return DirectoryState.Excluded
|
||||
|
||||
d = MyDirectories()
|
||||
p1 = Path(str(tmpdir))
|
||||
p1['foobar'].mkdir()
|
||||
p1['foobar/somefile'].open('w').close()
|
||||
p1['foobaz'].mkdir()
|
||||
p1['foobaz/somefile'].open('w').close()
|
||||
p1["foobar"].mkdir()
|
||||
p1["foobar/somefile"].open("w").close()
|
||||
p1["foobaz"].mkdir()
|
||||
p1["foobaz/somefile"].open("w").close()
|
||||
d.add_path(p1)
|
||||
eq_(d.get_state(p1['foobaz']), DirectoryState.Normal)
|
||||
eq_(d.get_state(p1['foobar']), DirectoryState.Excluded)
|
||||
eq_(len(list(d.get_files())), 1) # only the 'foobaz' file is there
|
||||
eq_(d.get_state(p1["foobaz"]), DirectoryState.Normal)
|
||||
eq_(d.get_state(p1["foobar"]), DirectoryState.Excluded)
|
||||
eq_(len(list(d.get_files())), 1) # only the 'foobaz' file is there
|
||||
# However, the default state can be changed
|
||||
d.set_state(p1['foobar'], DirectoryState.Normal)
|
||||
eq_(d.get_state(p1['foobar']), DirectoryState.Normal)
|
||||
d.set_state(p1["foobar"], DirectoryState.Normal)
|
||||
eq_(d.get_state(p1["foobar"]), DirectoryState.Normal)
|
||||
eq_(len(list(d.get_files())), 2)
|
||||
|
||||
|
@ -13,13 +13,28 @@ from hscommon.testutil import eq_, log_calls
|
||||
from .base import NamedObject
|
||||
from .. import engine
|
||||
from ..engine import (
|
||||
get_match, getwords, Group, getfields, unpack_fields, compare_fields, compare, WEIGHT_WORDS,
|
||||
MATCH_SIMILAR_WORDS, NO_FIELD_ORDER, build_word_dict, get_groups, getmatches, Match,
|
||||
getmatches_by_contents, merge_similar_words, reduce_common_words
|
||||
get_match,
|
||||
getwords,
|
||||
Group,
|
||||
getfields,
|
||||
unpack_fields,
|
||||
compare_fields,
|
||||
compare,
|
||||
WEIGHT_WORDS,
|
||||
MATCH_SIMILAR_WORDS,
|
||||
NO_FIELD_ORDER,
|
||||
build_word_dict,
|
||||
get_groups,
|
||||
getmatches,
|
||||
Match,
|
||||
getmatches_by_contents,
|
||||
merge_similar_words,
|
||||
reduce_common_words,
|
||||
)
|
||||
|
||||
no = NamedObject
|
||||
|
||||
|
||||
def get_match_triangle():
|
||||
o1 = NamedObject(with_words=True)
|
||||
o2 = NamedObject(with_words=True)
|
||||
@ -29,6 +44,7 @@ def get_match_triangle():
|
||||
m3 = get_match(o2, o3)
|
||||
return [m1, m2, m3]
|
||||
|
||||
|
||||
def get_test_group():
|
||||
m1, m2, m3 = get_match_triangle()
|
||||
result = Group()
|
||||
@ -37,6 +53,7 @@ def get_test_group():
|
||||
result.add_match(m3)
|
||||
return result
|
||||
|
||||
|
||||
def assert_match(m, name1, name2):
|
||||
# When testing matches, whether objects are in first or second position very often doesn't
|
||||
# matter. This function makes this test more convenient.
|
||||
@ -46,53 +63,54 @@ def assert_match(m, name1, name2):
|
||||
eq_(m.first.name, name2)
|
||||
eq_(m.second.name, name1)
|
||||
|
||||
|
||||
class TestCasegetwords:
|
||||
def test_spaces(self):
|
||||
eq_(['a', 'b', 'c', 'd'], getwords("a b c d"))
|
||||
eq_(['a', 'b', 'c', 'd'], getwords(" a b c d "))
|
||||
eq_(["a", "b", "c", "d"], getwords("a b c d"))
|
||||
eq_(["a", "b", "c", "d"], getwords(" a b c d "))
|
||||
|
||||
def test_splitter_chars(self):
|
||||
eq_(
|
||||
[chr(i) for i in range(ord('a'), ord('z')+1)],
|
||||
getwords("a-b_c&d+e(f)g;h\\i[j]k{l}m:n.o,p<q>r/s?t~u!v@w#x$y*z")
|
||||
[chr(i) for i in range(ord("a"), ord("z") + 1)],
|
||||
getwords("a-b_c&d+e(f)g;h\\i[j]k{l}m:n.o,p<q>r/s?t~u!v@w#x$y*z"),
|
||||
)
|
||||
|
||||
def test_joiner_chars(self):
|
||||
eq_(["aec"], getwords("a'e\u0301c"))
|
||||
|
||||
def test_empty(self):
|
||||
eq_([], getwords(''))
|
||||
eq_([], getwords(""))
|
||||
|
||||
def test_returns_lowercase(self):
|
||||
eq_(['foo', 'bar'], getwords('FOO BAR'))
|
||||
eq_(["foo", "bar"], getwords("FOO BAR"))
|
||||
|
||||
def test_decompose_unicode(self):
|
||||
eq_(getwords('foo\xe9bar'), ['fooebar'])
|
||||
eq_(getwords("foo\xe9bar"), ["fooebar"])
|
||||
|
||||
|
||||
class TestCasegetfields:
|
||||
def test_simple(self):
|
||||
eq_([['a', 'b'], ['c', 'd', 'e']], getfields('a b - c d e'))
|
||||
eq_([["a", "b"], ["c", "d", "e"]], getfields("a b - c d e"))
|
||||
|
||||
def test_empty(self):
|
||||
eq_([], getfields(''))
|
||||
eq_([], getfields(""))
|
||||
|
||||
def test_cleans_empty_fields(self):
|
||||
expected = [['a', 'bc', 'def']]
|
||||
actual = getfields(' - a bc def')
|
||||
expected = [["a", "bc", "def"]]
|
||||
actual = getfields(" - a bc def")
|
||||
eq_(expected, actual)
|
||||
expected = [['bc', 'def']]
|
||||
expected = [["bc", "def"]]
|
||||
|
||||
|
||||
class TestCaseunpack_fields:
|
||||
def test_with_fields(self):
|
||||
expected = ['a', 'b', 'c', 'd', 'e', 'f']
|
||||
actual = unpack_fields([['a'], ['b', 'c'], ['d', 'e', 'f']])
|
||||
expected = ["a", "b", "c", "d", "e", "f"]
|
||||
actual = unpack_fields([["a"], ["b", "c"], ["d", "e", "f"]])
|
||||
eq_(expected, actual)
|
||||
|
||||
def test_without_fields(self):
|
||||
expected = ['a', 'b', 'c', 'd', 'e', 'f']
|
||||
actual = unpack_fields(['a', 'b', 'c', 'd', 'e', 'f'])
|
||||
expected = ["a", "b", "c", "d", "e", "f"]
|
||||
actual = unpack_fields(["a", "b", "c", "d", "e", "f"])
|
||||
eq_(expected, actual)
|
||||
|
||||
def test_empty(self):
|
||||
@ -101,134 +119,151 @@ class TestCaseunpack_fields:
|
||||
|
||||
class TestCaseWordCompare:
|
||||
def test_list(self):
|
||||
eq_(100, compare(['a', 'b', 'c', 'd'], ['a', 'b', 'c', 'd']))
|
||||
eq_(86, compare(['a', 'b', 'c', 'd'], ['a', 'b', 'c']))
|
||||
eq_(100, compare(["a", "b", "c", "d"], ["a", "b", "c", "d"]))
|
||||
eq_(86, compare(["a", "b", "c", "d"], ["a", "b", "c"]))
|
||||
|
||||
def test_unordered(self):
|
||||
#Sometimes, users don't want fuzzy matching too much When they set the slider
|
||||
#to 100, they don't expect a filename with the same words, but not the same order, to match.
|
||||
#Thus, we want to return 99 in that case.
|
||||
eq_(99, compare(['a', 'b', 'c', 'd'], ['d', 'b', 'c', 'a']))
|
||||
# Sometimes, users don't want fuzzy matching too much When they set the slider
|
||||
# to 100, they don't expect a filename with the same words, but not the same order, to match.
|
||||
# Thus, we want to return 99 in that case.
|
||||
eq_(99, compare(["a", "b", "c", "d"], ["d", "b", "c", "a"]))
|
||||
|
||||
def test_word_occurs_twice(self):
|
||||
#if a word occurs twice in first, but once in second, we want the word to be only counted once
|
||||
eq_(89, compare(['a', 'b', 'c', 'd', 'a'], ['d', 'b', 'c', 'a']))
|
||||
# if a word occurs twice in first, but once in second, we want the word to be only counted once
|
||||
eq_(89, compare(["a", "b", "c", "d", "a"], ["d", "b", "c", "a"]))
|
||||
|
||||
def test_uses_copy_of_lists(self):
|
||||
first = ['foo', 'bar']
|
||||
second = ['bar', 'bleh']
|
||||
first = ["foo", "bar"]
|
||||
second = ["bar", "bleh"]
|
||||
compare(first, second)
|
||||
eq_(['foo', 'bar'], first)
|
||||
eq_(['bar', 'bleh'], second)
|
||||
eq_(["foo", "bar"], first)
|
||||
eq_(["bar", "bleh"], second)
|
||||
|
||||
def test_word_weight(self):
|
||||
eq_(int((6.0 / 13.0) * 100), compare(['foo', 'bar'], ['bar', 'bleh'], (WEIGHT_WORDS, )))
|
||||
eq_(
|
||||
int((6.0 / 13.0) * 100),
|
||||
compare(["foo", "bar"], ["bar", "bleh"], (WEIGHT_WORDS,)),
|
||||
)
|
||||
|
||||
def test_similar_words(self):
|
||||
eq_(100, compare(['the', 'white', 'stripes'], ['the', 'whites', 'stripe'], (MATCH_SIMILAR_WORDS, )))
|
||||
eq_(
|
||||
100,
|
||||
compare(
|
||||
["the", "white", "stripes"],
|
||||
["the", "whites", "stripe"],
|
||||
(MATCH_SIMILAR_WORDS,),
|
||||
),
|
||||
)
|
||||
|
||||
def test_empty(self):
|
||||
eq_(0, compare([], []))
|
||||
|
||||
def test_with_fields(self):
|
||||
eq_(67, compare([['a', 'b'], ['c', 'd', 'e']], [['a', 'b'], ['c', 'd', 'f']]))
|
||||
eq_(67, compare([["a", "b"], ["c", "d", "e"]], [["a", "b"], ["c", "d", "f"]]))
|
||||
|
||||
def test_propagate_flags_with_fields(self, monkeypatch):
|
||||
def mock_compare(first, second, flags):
|
||||
eq_((0, 1, 2, 3, 5), flags)
|
||||
|
||||
monkeypatch.setattr(engine, 'compare_fields', mock_compare)
|
||||
compare([['a']], [['a']], (0, 1, 2, 3, 5))
|
||||
monkeypatch.setattr(engine, "compare_fields", mock_compare)
|
||||
compare([["a"]], [["a"]], (0, 1, 2, 3, 5))
|
||||
|
||||
|
||||
class TestCaseWordCompareWithFields:
|
||||
def test_simple(self):
|
||||
eq_(67, compare_fields([['a', 'b'], ['c', 'd', 'e']], [['a', 'b'], ['c', 'd', 'f']]))
|
||||
eq_(
|
||||
67,
|
||||
compare_fields(
|
||||
[["a", "b"], ["c", "d", "e"]], [["a", "b"], ["c", "d", "f"]]
|
||||
),
|
||||
)
|
||||
|
||||
def test_empty(self):
|
||||
eq_(0, compare_fields([], []))
|
||||
|
||||
def test_different_length(self):
|
||||
eq_(0, compare_fields([['a'], ['b']], [['a'], ['b'], ['c']]))
|
||||
eq_(0, compare_fields([["a"], ["b"]], [["a"], ["b"], ["c"]]))
|
||||
|
||||
def test_propagates_flags(self, monkeypatch):
|
||||
def mock_compare(first, second, flags):
|
||||
eq_((0, 1, 2, 3, 5), flags)
|
||||
|
||||
monkeypatch.setattr(engine, 'compare_fields', mock_compare)
|
||||
compare_fields([['a']], [['a']], (0, 1, 2, 3, 5))
|
||||
monkeypatch.setattr(engine, "compare_fields", mock_compare)
|
||||
compare_fields([["a"]], [["a"]], (0, 1, 2, 3, 5))
|
||||
|
||||
def test_order(self):
|
||||
first = [['a', 'b'], ['c', 'd', 'e']]
|
||||
second = [['c', 'd', 'f'], ['a', 'b']]
|
||||
first = [["a", "b"], ["c", "d", "e"]]
|
||||
second = [["c", "d", "f"], ["a", "b"]]
|
||||
eq_(0, compare_fields(first, second))
|
||||
|
||||
def test_no_order(self):
|
||||
first = [['a', 'b'], ['c', 'd', 'e']]
|
||||
second = [['c', 'd', 'f'], ['a', 'b']]
|
||||
eq_(67, compare_fields(first, second, (NO_FIELD_ORDER, )))
|
||||
first = [['a', 'b'], ['a', 'b']] #a field can only be matched once.
|
||||
second = [['c', 'd', 'f'], ['a', 'b']]
|
||||
eq_(0, compare_fields(first, second, (NO_FIELD_ORDER, )))
|
||||
first = [['a', 'b'], ['a', 'b', 'c']]
|
||||
second = [['c', 'd', 'f'], ['a', 'b']]
|
||||
eq_(33, compare_fields(first, second, (NO_FIELD_ORDER, )))
|
||||
first = [["a", "b"], ["c", "d", "e"]]
|
||||
second = [["c", "d", "f"], ["a", "b"]]
|
||||
eq_(67, compare_fields(first, second, (NO_FIELD_ORDER,)))
|
||||
first = [["a", "b"], ["a", "b"]] # a field can only be matched once.
|
||||
second = [["c", "d", "f"], ["a", "b"]]
|
||||
eq_(0, compare_fields(first, second, (NO_FIELD_ORDER,)))
|
||||
first = [["a", "b"], ["a", "b", "c"]]
|
||||
second = [["c", "d", "f"], ["a", "b"]]
|
||||
eq_(33, compare_fields(first, second, (NO_FIELD_ORDER,)))
|
||||
|
||||
def test_compare_fields_without_order_doesnt_alter_fields(self):
|
||||
#The NO_ORDER comp type altered the fields!
|
||||
first = [['a', 'b'], ['c', 'd', 'e']]
|
||||
second = [['c', 'd', 'f'], ['a', 'b']]
|
||||
eq_(67, compare_fields(first, second, (NO_FIELD_ORDER, )))
|
||||
eq_([['a', 'b'], ['c', 'd', 'e']], first)
|
||||
eq_([['c', 'd', 'f'], ['a', 'b']], second)
|
||||
# The NO_ORDER comp type altered the fields!
|
||||
first = [["a", "b"], ["c", "d", "e"]]
|
||||
second = [["c", "d", "f"], ["a", "b"]]
|
||||
eq_(67, compare_fields(first, second, (NO_FIELD_ORDER,)))
|
||||
eq_([["a", "b"], ["c", "d", "e"]], first)
|
||||
eq_([["c", "d", "f"], ["a", "b"]], second)
|
||||
|
||||
|
||||
class TestCasebuild_word_dict:
|
||||
def test_with_standard_words(self):
|
||||
l = [NamedObject('foo bar', True)]
|
||||
l.append(NamedObject('bar baz', True))
|
||||
l.append(NamedObject('baz bleh foo', True))
|
||||
d = build_word_dict(l)
|
||||
itemList = [NamedObject("foo bar", True)]
|
||||
itemList.append(NamedObject("bar baz", True))
|
||||
itemList.append(NamedObject("baz bleh foo", True))
|
||||
d = build_word_dict(itemList)
|
||||
eq_(4, len(d))
|
||||
eq_(2, len(d['foo']))
|
||||
assert l[0] in d['foo']
|
||||
assert l[2] in d['foo']
|
||||
eq_(2, len(d['bar']))
|
||||
assert l[0] in d['bar']
|
||||
assert l[1] in d['bar']
|
||||
eq_(2, len(d['baz']))
|
||||
assert l[1] in d['baz']
|
||||
assert l[2] in d['baz']
|
||||
eq_(1, len(d['bleh']))
|
||||
assert l[2] in d['bleh']
|
||||
eq_(2, len(d["foo"]))
|
||||
assert itemList[0] in d["foo"]
|
||||
assert itemList[2] in d["foo"]
|
||||
eq_(2, len(d["bar"]))
|
||||
assert itemList[0] in d["bar"]
|
||||
assert itemList[1] in d["bar"]
|
||||
eq_(2, len(d["baz"]))
|
||||
assert itemList[1] in d["baz"]
|
||||
assert itemList[2] in d["baz"]
|
||||
eq_(1, len(d["bleh"]))
|
||||
assert itemList[2] in d["bleh"]
|
||||
|
||||
def test_unpack_fields(self):
|
||||
o = NamedObject('')
|
||||
o.words = [['foo', 'bar'], ['baz']]
|
||||
o = NamedObject("")
|
||||
o.words = [["foo", "bar"], ["baz"]]
|
||||
d = build_word_dict([o])
|
||||
eq_(3, len(d))
|
||||
eq_(1, len(d['foo']))
|
||||
eq_(1, len(d["foo"]))
|
||||
|
||||
def test_words_are_unaltered(self):
|
||||
o = NamedObject('')
|
||||
o.words = [['foo', 'bar'], ['baz']]
|
||||
o = NamedObject("")
|
||||
o.words = [["foo", "bar"], ["baz"]]
|
||||
build_word_dict([o])
|
||||
eq_([['foo', 'bar'], ['baz']], o.words)
|
||||
eq_([["foo", "bar"], ["baz"]], o.words)
|
||||
|
||||
def test_object_instances_can_only_be_once_in_words_object_list(self):
|
||||
o = NamedObject('foo foo', True)
|
||||
o = NamedObject("foo foo", True)
|
||||
d = build_word_dict([o])
|
||||
eq_(1, len(d['foo']))
|
||||
eq_(1, len(d["foo"]))
|
||||
|
||||
def test_job(self):
|
||||
def do_progress(p, d=''):
|
||||
def do_progress(p, d=""):
|
||||
self.log.append(p)
|
||||
return True
|
||||
|
||||
j = job.Job(1, do_progress)
|
||||
self.log = []
|
||||
s = "foo bar"
|
||||
build_word_dict([NamedObject(s, True), NamedObject(s, True), NamedObject(s, True)], j)
|
||||
build_word_dict(
|
||||
[NamedObject(s, True), NamedObject(s, True), NamedObject(s, True)], j
|
||||
)
|
||||
# We don't have intermediate log because iter_with_progress is called with every > 1
|
||||
eq_(0, self.log[0])
|
||||
eq_(100, self.log[1])
|
||||
@ -237,51 +272,56 @@ class TestCasebuild_word_dict:
|
||||
class TestCasemerge_similar_words:
|
||||
def test_some_similar_words(self):
|
||||
d = {
|
||||
'foobar': set([1]),
|
||||
'foobar1': set([2]),
|
||||
'foobar2': set([3]),
|
||||
"foobar": set([1]),
|
||||
"foobar1": set([2]),
|
||||
"foobar2": set([3]),
|
||||
}
|
||||
merge_similar_words(d)
|
||||
eq_(1, len(d))
|
||||
eq_(3, len(d['foobar']))
|
||||
|
||||
eq_(3, len(d["foobar"]))
|
||||
|
||||
|
||||
class TestCasereduce_common_words:
|
||||
def test_typical(self):
|
||||
d = {
|
||||
'foo': set([NamedObject('foo bar', True) for i in range(50)]),
|
||||
'bar': set([NamedObject('foo bar', True) for i in range(49)])
|
||||
"foo": set([NamedObject("foo bar", True) for i in range(50)]),
|
||||
"bar": set([NamedObject("foo bar", True) for i in range(49)]),
|
||||
}
|
||||
reduce_common_words(d, 50)
|
||||
assert 'foo' not in d
|
||||
eq_(49, len(d['bar']))
|
||||
assert "foo" not in d
|
||||
eq_(49, len(d["bar"]))
|
||||
|
||||
def test_dont_remove_objects_with_only_common_words(self):
|
||||
d = {
|
||||
'common': set([NamedObject("common uncommon", True) for i in range(50)] + [NamedObject("common", True)]),
|
||||
'uncommon': set([NamedObject("common uncommon", True)])
|
||||
"common": set(
|
||||
[NamedObject("common uncommon", True) for i in range(50)]
|
||||
+ [NamedObject("common", True)]
|
||||
),
|
||||
"uncommon": set([NamedObject("common uncommon", True)]),
|
||||
}
|
||||
reduce_common_words(d, 50)
|
||||
eq_(1, len(d['common']))
|
||||
eq_(1, len(d['uncommon']))
|
||||
eq_(1, len(d["common"]))
|
||||
eq_(1, len(d["uncommon"]))
|
||||
|
||||
def test_values_still_are_set_instances(self):
|
||||
d = {
|
||||
'common': set([NamedObject("common uncommon", True) for i in range(50)] + [NamedObject("common", True)]),
|
||||
'uncommon': set([NamedObject("common uncommon", True)])
|
||||
"common": set(
|
||||
[NamedObject("common uncommon", True) for i in range(50)]
|
||||
+ [NamedObject("common", True)]
|
||||
),
|
||||
"uncommon": set([NamedObject("common uncommon", True)]),
|
||||
}
|
||||
reduce_common_words(d, 50)
|
||||
assert isinstance(d['common'], set)
|
||||
assert isinstance(d['uncommon'], set)
|
||||
assert isinstance(d["common"], set)
|
||||
assert isinstance(d["uncommon"], set)
|
||||
|
||||
def test_dont_raise_KeyError_when_a_word_has_been_removed(self):
|
||||
#If a word has been removed by the reduce, an object in a subsequent common word that
|
||||
#contains the word that has been removed would cause a KeyError.
|
||||
# If a word has been removed by the reduce, an object in a subsequent common word that
|
||||
# contains the word that has been removed would cause a KeyError.
|
||||
d = {
|
||||
'foo': set([NamedObject('foo bar baz', True) for i in range(50)]),
|
||||
'bar': set([NamedObject('foo bar baz', True) for i in range(50)]),
|
||||
'baz': set([NamedObject('foo bar baz', True) for i in range(49)])
|
||||
"foo": set([NamedObject("foo bar baz", True) for i in range(50)]),
|
||||
"bar": set([NamedObject("foo bar baz", True) for i in range(50)]),
|
||||
"baz": set([NamedObject("foo bar baz", True) for i in range(49)]),
|
||||
}
|
||||
try:
|
||||
reduce_common_words(d, 50)
|
||||
@ -289,35 +329,37 @@ class TestCasereduce_common_words:
|
||||
self.fail()
|
||||
|
||||
def test_unpack_fields(self):
|
||||
#object.words may be fields.
|
||||
# object.words may be fields.
|
||||
def create_it():
|
||||
o = NamedObject('')
|
||||
o.words = [['foo', 'bar'], ['baz']]
|
||||
o = NamedObject("")
|
||||
o.words = [["foo", "bar"], ["baz"]]
|
||||
return o
|
||||
|
||||
d = {
|
||||
'foo': set([create_it() for i in range(50)])
|
||||
}
|
||||
d = {"foo": set([create_it() for i in range(50)])}
|
||||
try:
|
||||
reduce_common_words(d, 50)
|
||||
except TypeError:
|
||||
self.fail("must support fields.")
|
||||
|
||||
def test_consider_a_reduced_common_word_common_even_after_reduction(self):
|
||||
#There was a bug in the code that causeda word that has already been reduced not to
|
||||
#be counted as a common word for subsequent words. For example, if 'foo' is processed
|
||||
#as a common word, keeping a "foo bar" file in it, and the 'bar' is processed, "foo bar"
|
||||
#would not stay in 'bar' because 'foo' is not a common word anymore.
|
||||
only_common = NamedObject('foo bar', True)
|
||||
# There was a bug in the code that causeda word that has already been reduced not to
|
||||
# be counted as a common word for subsequent words. For example, if 'foo' is processed
|
||||
# as a common word, keeping a "foo bar" file in it, and the 'bar' is processed, "foo bar"
|
||||
# would not stay in 'bar' because 'foo' is not a common word anymore.
|
||||
only_common = NamedObject("foo bar", True)
|
||||
d = {
|
||||
'foo': set([NamedObject('foo bar baz', True) for i in range(49)] + [only_common]),
|
||||
'bar': set([NamedObject('foo bar baz', True) for i in range(49)] + [only_common]),
|
||||
'baz': set([NamedObject('foo bar baz', True) for i in range(49)])
|
||||
"foo": set(
|
||||
[NamedObject("foo bar baz", True) for i in range(49)] + [only_common]
|
||||
),
|
||||
"bar": set(
|
||||
[NamedObject("foo bar baz", True) for i in range(49)] + [only_common]
|
||||
),
|
||||
"baz": set([NamedObject("foo bar baz", True) for i in range(49)]),
|
||||
}
|
||||
reduce_common_words(d, 50)
|
||||
eq_(1, len(d['foo']))
|
||||
eq_(1, len(d['bar']))
|
||||
eq_(49, len(d['baz']))
|
||||
eq_(1, len(d["foo"]))
|
||||
eq_(1, len(d["bar"]))
|
||||
eq_(49, len(d["baz"]))
|
||||
|
||||
|
||||
class TestCaseget_match:
|
||||
@ -326,8 +368,8 @@ class TestCaseget_match:
|
||||
o2 = NamedObject("bar bleh", True)
|
||||
m = get_match(o1, o2)
|
||||
eq_(50, m.percentage)
|
||||
eq_(['foo', 'bar'], m.first.words)
|
||||
eq_(['bar', 'bleh'], m.second.words)
|
||||
eq_(["foo", "bar"], m.first.words)
|
||||
eq_(["bar", "bleh"], m.second.words)
|
||||
assert m.first is o1
|
||||
assert m.second is o2
|
||||
|
||||
@ -340,7 +382,9 @@ class TestCaseget_match:
|
||||
assert object() not in m
|
||||
|
||||
def test_word_weight(self):
|
||||
m = get_match(NamedObject("foo bar", True), NamedObject("bar bleh", True), (WEIGHT_WORDS, ))
|
||||
m = get_match(
|
||||
NamedObject("foo bar", True), NamedObject("bar bleh", True), (WEIGHT_WORDS,)
|
||||
)
|
||||
eq_(m.percentage, int((6.0 / 13.0) * 100))
|
||||
|
||||
|
||||
@ -349,54 +393,59 @@ class TestCaseGetMatches:
|
||||
eq_(getmatches([]), [])
|
||||
|
||||
def test_simple(self):
|
||||
l = [NamedObject("foo bar"), NamedObject("bar bleh"), NamedObject("a b c foo")]
|
||||
r = getmatches(l)
|
||||
itemList = [NamedObject("foo bar"), NamedObject("bar bleh"), NamedObject("a b c foo")]
|
||||
r = getmatches(itemList)
|
||||
eq_(2, len(r))
|
||||
m = first(m for m in r if m.percentage == 50) #"foo bar" and "bar bleh"
|
||||
assert_match(m, 'foo bar', 'bar bleh')
|
||||
m = first(m for m in r if m.percentage == 33) #"foo bar" and "a b c foo"
|
||||
assert_match(m, 'foo bar', 'a b c foo')
|
||||
m = first(m for m in r if m.percentage == 50) # "foo bar" and "bar bleh"
|
||||
assert_match(m, "foo bar", "bar bleh")
|
||||
m = first(m for m in r if m.percentage == 33) # "foo bar" and "a b c foo"
|
||||
assert_match(m, "foo bar", "a b c foo")
|
||||
|
||||
def test_null_and_unrelated_objects(self):
|
||||
l = [NamedObject("foo bar"), NamedObject("bar bleh"), NamedObject(""), NamedObject("unrelated object")]
|
||||
r = getmatches(l)
|
||||
itemList = [
|
||||
NamedObject("foo bar"),
|
||||
NamedObject("bar bleh"),
|
||||
NamedObject(""),
|
||||
NamedObject("unrelated object"),
|
||||
]
|
||||
r = getmatches(itemList)
|
||||
eq_(len(r), 1)
|
||||
m = r[0]
|
||||
eq_(m.percentage, 50)
|
||||
assert_match(m, 'foo bar', 'bar bleh')
|
||||
assert_match(m, "foo bar", "bar bleh")
|
||||
|
||||
def test_twice_the_same_word(self):
|
||||
l = [NamedObject("foo foo bar"), NamedObject("bar bleh")]
|
||||
r = getmatches(l)
|
||||
itemList = [NamedObject("foo foo bar"), NamedObject("bar bleh")]
|
||||
r = getmatches(itemList)
|
||||
eq_(1, len(r))
|
||||
|
||||
def test_twice_the_same_word_when_preworded(self):
|
||||
l = [NamedObject("foo foo bar", True), NamedObject("bar bleh", True)]
|
||||
r = getmatches(l)
|
||||
itemList = [NamedObject("foo foo bar", True), NamedObject("bar bleh", True)]
|
||||
r = getmatches(itemList)
|
||||
eq_(1, len(r))
|
||||
|
||||
def test_two_words_match(self):
|
||||
l = [NamedObject("foo bar"), NamedObject("foo bar bleh")]
|
||||
r = getmatches(l)
|
||||
itemList = [NamedObject("foo bar"), NamedObject("foo bar bleh")]
|
||||
r = getmatches(itemList)
|
||||
eq_(1, len(r))
|
||||
|
||||
def test_match_files_with_only_common_words(self):
|
||||
#If a word occurs more than 50 times, it is excluded from the matching process
|
||||
#The problem with the common_word_threshold is that the files containing only common
|
||||
#words will never be matched together. We *should* match them.
|
||||
# If a word occurs more than 50 times, it is excluded from the matching process
|
||||
# The problem with the common_word_threshold is that the files containing only common
|
||||
# words will never be matched together. We *should* match them.
|
||||
# This test assumes that the common word threashold const is 50
|
||||
l = [NamedObject("foo") for i in range(50)]
|
||||
r = getmatches(l)
|
||||
itemList = [NamedObject("foo") for i in range(50)]
|
||||
r = getmatches(itemList)
|
||||
eq_(1225, len(r))
|
||||
|
||||
def test_use_words_already_there_if_there(self):
|
||||
o1 = NamedObject('foo')
|
||||
o2 = NamedObject('bar')
|
||||
o2.words = ['foo']
|
||||
o1 = NamedObject("foo")
|
||||
o2 = NamedObject("bar")
|
||||
o2.words = ["foo"]
|
||||
eq_(1, len(getmatches([o1, o2])))
|
||||
|
||||
def test_job(self):
|
||||
def do_progress(p, d=''):
|
||||
def do_progress(p, d=""):
|
||||
self.log.append(p)
|
||||
return True
|
||||
|
||||
@ -409,28 +458,28 @@ class TestCaseGetMatches:
|
||||
eq_(100, self.log[-1])
|
||||
|
||||
def test_weight_words(self):
|
||||
l = [NamedObject("foo bar"), NamedObject("bar bleh")]
|
||||
m = getmatches(l, weight_words=True)[0]
|
||||
itemList = [NamedObject("foo bar"), NamedObject("bar bleh")]
|
||||
m = getmatches(itemList, weight_words=True)[0]
|
||||
eq_(int((6.0 / 13.0) * 100), m.percentage)
|
||||
|
||||
def test_similar_word(self):
|
||||
l = [NamedObject("foobar"), NamedObject("foobars")]
|
||||
eq_(len(getmatches(l, match_similar_words=True)), 1)
|
||||
eq_(getmatches(l, match_similar_words=True)[0].percentage, 100)
|
||||
l = [NamedObject("foobar"), NamedObject("foo")]
|
||||
eq_(len(getmatches(l, match_similar_words=True)), 0) #too far
|
||||
l = [NamedObject("bizkit"), NamedObject("bizket")]
|
||||
eq_(len(getmatches(l, match_similar_words=True)), 1)
|
||||
l = [NamedObject("foobar"), NamedObject("foosbar")]
|
||||
eq_(len(getmatches(l, match_similar_words=True)), 1)
|
||||
itemList = [NamedObject("foobar"), NamedObject("foobars")]
|
||||
eq_(len(getmatches(itemList, match_similar_words=True)), 1)
|
||||
eq_(getmatches(itemList, match_similar_words=True)[0].percentage, 100)
|
||||
itemList = [NamedObject("foobar"), NamedObject("foo")]
|
||||
eq_(len(getmatches(itemList, match_similar_words=True)), 0) # too far
|
||||
itemList = [NamedObject("bizkit"), NamedObject("bizket")]
|
||||
eq_(len(getmatches(itemList, match_similar_words=True)), 1)
|
||||
itemList = [NamedObject("foobar"), NamedObject("foosbar")]
|
||||
eq_(len(getmatches(itemList, match_similar_words=True)), 1)
|
||||
|
||||
def test_single_object_with_similar_words(self):
|
||||
l = [NamedObject("foo foos")]
|
||||
eq_(len(getmatches(l, match_similar_words=True)), 0)
|
||||
itemList = [NamedObject("foo foos")]
|
||||
eq_(len(getmatches(itemList, match_similar_words=True)), 0)
|
||||
|
||||
def test_double_words_get_counted_only_once(self):
|
||||
l = [NamedObject("foo bar foo bleh"), NamedObject("foo bar bleh bar")]
|
||||
m = getmatches(l)[0]
|
||||
itemList = [NamedObject("foo bar foo bleh"), NamedObject("foo bar bleh bar")]
|
||||
m = getmatches(itemList)[0]
|
||||
eq_(75, m.percentage)
|
||||
|
||||
def test_with_fields(self):
|
||||
@ -450,13 +499,13 @@ class TestCaseGetMatches:
|
||||
eq_(m.percentage, 50)
|
||||
|
||||
def test_only_match_similar_when_the_option_is_set(self):
|
||||
l = [NamedObject("foobar"), NamedObject("foobars")]
|
||||
eq_(len(getmatches(l, match_similar_words=False)), 0)
|
||||
itemList = [NamedObject("foobar"), NamedObject("foobars")]
|
||||
eq_(len(getmatches(itemList, match_similar_words=False)), 0)
|
||||
|
||||
def test_dont_recurse_do_match(self):
|
||||
# with nosetests, the stack is increased. The number has to be high enough not to be failing falsely
|
||||
sys.setrecursionlimit(200)
|
||||
files = [NamedObject('foo bar') for i in range(201)]
|
||||
files = [NamedObject("foo bar") for i in range(201)]
|
||||
try:
|
||||
getmatches(files)
|
||||
except RuntimeError:
|
||||
@ -465,9 +514,9 @@ class TestCaseGetMatches:
|
||||
sys.setrecursionlimit(1000)
|
||||