mirror of
https://github.com/arsenetar/dupeguru.git
synced 2026-01-22 14:41:39 +00:00
Format files with black
- Format all files with black - Update tox.ini flake8 arguments to be compatible - Add black to requirements-extra.txt - Reduce ignored flake8 rules and fix a few violations
This commit is contained in:
@@ -1,3 +1,2 @@
|
||||
__version__ = '4.0.4'
|
||||
__appname__ = 'dupeGuru'
|
||||
|
||||
__version__ = "4.0.4"
|
||||
__appname__ = "dupeGuru"
|
||||
|
||||
227
core/app.py
227
core/app.py
@@ -34,8 +34,8 @@ from .gui.ignore_list_dialog import IgnoreListDialog
|
||||
from .gui.problem_dialog import ProblemDialog
|
||||
from .gui.stats_label import StatsLabel
|
||||
|
||||
HAD_FIRST_LAUNCH_PREFERENCE = 'HadFirstLaunch'
|
||||
DEBUG_MODE_PREFERENCE = 'DebugMode'
|
||||
HAD_FIRST_LAUNCH_PREFERENCE = "HadFirstLaunch"
|
||||
DEBUG_MODE_PREFERENCE = "DebugMode"
|
||||
|
||||
MSG_NO_MARKED_DUPES = tr("There are no marked duplicates. Nothing has been done.")
|
||||
MSG_NO_SELECTED_DUPES = tr("There are no selected duplicates. Nothing has been done.")
|
||||
@@ -44,23 +44,27 @@ MSG_MANY_FILES_TO_OPEN = tr(
|
||||
"files are opened with, doing so can create quite a mess. Continue?"
|
||||
)
|
||||
|
||||
|
||||
class DestType:
|
||||
Direct = 0
|
||||
Relative = 1
|
||||
Absolute = 2
|
||||
|
||||
|
||||
class JobType:
|
||||
Scan = 'job_scan'
|
||||
Load = 'job_load'
|
||||
Move = 'job_move'
|
||||
Copy = 'job_copy'
|
||||
Delete = 'job_delete'
|
||||
Scan = "job_scan"
|
||||
Load = "job_load"
|
||||
Move = "job_move"
|
||||
Copy = "job_copy"
|
||||
Delete = "job_delete"
|
||||
|
||||
|
||||
class AppMode:
|
||||
Standard = 0
|
||||
Music = 1
|
||||
Picture = 2
|
||||
|
||||
|
||||
JOBID2TITLE = {
|
||||
JobType.Scan: tr("Scanning for duplicates"),
|
||||
JobType.Load: tr("Loading"),
|
||||
@@ -69,6 +73,7 @@ JOBID2TITLE = {
|
||||
JobType.Delete: tr("Sending to Trash"),
|
||||
}
|
||||
|
||||
|
||||
class DupeGuru(Broadcaster):
|
||||
"""Holds everything together.
|
||||
|
||||
@@ -100,7 +105,8 @@ class DupeGuru(Broadcaster):
|
||||
|
||||
Instance of :mod:`meta-gui <core.gui>` table listing the results from :attr:`results`
|
||||
"""
|
||||
#--- View interface
|
||||
|
||||
# --- View interface
|
||||
# get_default(key_name)
|
||||
# set_default(key_name, value)
|
||||
# show_message(msg)
|
||||
@@ -116,7 +122,7 @@ class DupeGuru(Broadcaster):
|
||||
|
||||
NAME = PROMPT_NAME = "dupeGuru"
|
||||
|
||||
PICTURE_CACHE_TYPE = 'sqlite' # set to 'shelve' for a ShelveCache
|
||||
PICTURE_CACHE_TYPE = "sqlite" # set to 'shelve' for a ShelveCache
|
||||
|
||||
def __init__(self, view):
|
||||
if view.get_default(DEBUG_MODE_PREFERENCE):
|
||||
@@ -124,7 +130,9 @@ class DupeGuru(Broadcaster):
|
||||
logging.debug("Debug mode enabled")
|
||||
Broadcaster.__init__(self)
|
||||
self.view = view
|
||||
self.appdata = desktop.special_folder_path(desktop.SpecialFolder.AppData, appname=self.NAME)
|
||||
self.appdata = desktop.special_folder_path(
|
||||
desktop.SpecialFolder.AppData, appname=self.NAME
|
||||
)
|
||||
if not op.exists(self.appdata):
|
||||
os.makedirs(self.appdata)
|
||||
self.app_mode = AppMode.Standard
|
||||
@@ -136,11 +144,11 @@ class DupeGuru(Broadcaster):
|
||||
# sent to the scanner. They don't have default values because those defaults values are
|
||||
# defined in the scanner class.
|
||||
self.options = {
|
||||
'escape_filter_regexp': True,
|
||||
'clean_empty_dirs': False,
|
||||
'ignore_hardlink_matches': False,
|
||||
'copymove_dest_type': DestType.Relative,
|
||||
'picture_cache_type': self.PICTURE_CACHE_TYPE
|
||||
"escape_filter_regexp": True,
|
||||
"clean_empty_dirs": False,
|
||||
"ignore_hardlink_matches": False,
|
||||
"copymove_dest_type": DestType.Relative,
|
||||
"picture_cache_type": self.PICTURE_CACHE_TYPE,
|
||||
}
|
||||
self.selected_dupes = []
|
||||
self.details_panel = DetailsPanel(self)
|
||||
@@ -155,7 +163,7 @@ class DupeGuru(Broadcaster):
|
||||
for child in children:
|
||||
child.connect()
|
||||
|
||||
#--- Private
|
||||
# --- Private
|
||||
def _recreate_result_table(self):
|
||||
if self.result_table is not None:
|
||||
self.result_table.disconnect()
|
||||
@@ -169,26 +177,30 @@ class DupeGuru(Broadcaster):
|
||||
self.view.create_results_window()
|
||||
|
||||
def _get_picture_cache_path(self):
|
||||
cache_type = self.options['picture_cache_type']
|
||||
cache_name = 'cached_pictures.shelve' if cache_type == 'shelve' else 'cached_pictures.db'
|
||||
cache_type = self.options["picture_cache_type"]
|
||||
cache_name = (
|
||||
"cached_pictures.shelve" if cache_type == "shelve" else "cached_pictures.db"
|
||||
)
|
||||
return op.join(self.appdata, cache_name)
|
||||
|
||||
def _get_dupe_sort_key(self, dupe, get_group, key, delta):
|
||||
if self.app_mode in (AppMode.Music, AppMode.Picture):
|
||||
if key == 'folder_path':
|
||||
dupe_folder_path = getattr(dupe, 'display_folder_path', dupe.folder_path)
|
||||
if key == "folder_path":
|
||||
dupe_folder_path = getattr(
|
||||
dupe, "display_folder_path", dupe.folder_path
|
||||
)
|
||||
return str(dupe_folder_path).lower()
|
||||
if self.app_mode == AppMode.Picture:
|
||||
if delta and key == 'dimensions':
|
||||
if delta and key == "dimensions":
|
||||
r = cmp_value(dupe, key)
|
||||
ref_value = cmp_value(get_group().ref, key)
|
||||
return get_delta_dimensions(r, ref_value)
|
||||
if key == 'marked':
|
||||
if key == "marked":
|
||||
return self.results.is_marked(dupe)
|
||||
if key == 'percentage':
|
||||
if key == "percentage":
|
||||
m = get_group().get_match_of(dupe)
|
||||
return m.percentage
|
||||
elif key == 'dupe_count':
|
||||
elif key == "dupe_count":
|
||||
return 0
|
||||
else:
|
||||
result = cmp_value(dupe, key)
|
||||
@@ -203,21 +215,25 @@ class DupeGuru(Broadcaster):
|
||||
|
||||
def _get_group_sort_key(self, group, key):
|
||||
if self.app_mode in (AppMode.Music, AppMode.Picture):
|
||||
if key == 'folder_path':
|
||||
dupe_folder_path = getattr(group.ref, 'display_folder_path', group.ref.folder_path)
|
||||
if key == "folder_path":
|
||||
dupe_folder_path = getattr(
|
||||
group.ref, "display_folder_path", group.ref.folder_path
|
||||
)
|
||||
return str(dupe_folder_path).lower()
|
||||
if key == 'percentage':
|
||||
if key == "percentage":
|
||||
return group.percentage
|
||||
if key == 'dupe_count':
|
||||
if key == "dupe_count":
|
||||
return len(group)
|
||||
if key == 'marked':
|
||||
if key == "marked":
|
||||
return len([dupe for dupe in group.dupes if self.results.is_marked(dupe)])
|
||||
return cmp_value(group.ref, key)
|
||||
|
||||
def _do_delete(self, j, link_deleted, use_hardlinks, direct_deletion):
|
||||
def op(dupe):
|
||||
j.add_progress()
|
||||
return self._do_delete_dupe(dupe, link_deleted, use_hardlinks, direct_deletion)
|
||||
return self._do_delete_dupe(
|
||||
dupe, link_deleted, use_hardlinks, direct_deletion
|
||||
)
|
||||
|
||||
j.start_job(self.results.mark_count)
|
||||
self.results.perform_on_marked(op, True)
|
||||
@@ -233,7 +249,7 @@ class DupeGuru(Broadcaster):
|
||||
else:
|
||||
os.remove(str_path)
|
||||
else:
|
||||
send2trash(str_path) # Raises OSError when there's a problem
|
||||
send2trash(str_path) # Raises OSError when there's a problem
|
||||
if link_deleted:
|
||||
group = self.results.get_group_of_duplicate(dupe)
|
||||
ref = group.ref
|
||||
@@ -258,8 +274,9 @@ class DupeGuru(Broadcaster):
|
||||
|
||||
def _get_export_data(self):
|
||||
columns = [
|
||||
col for col in self.result_table.columns.ordered_columns
|
||||
if col.visible and col.name != 'marked'
|
||||
col
|
||||
for col in self.result_table.columns.ordered_columns
|
||||
if col.visible and col.name != "marked"
|
||||
]
|
||||
colnames = [col.display for col in columns]
|
||||
rows = []
|
||||
@@ -273,10 +290,11 @@ class DupeGuru(Broadcaster):
|
||||
|
||||
def _results_changed(self):
|
||||
self.selected_dupes = [
|
||||
d for d in self.selected_dupes
|
||||
d
|
||||
for d in self.selected_dupes
|
||||
if self.results.get_group_of_duplicate(d) is not None
|
||||
]
|
||||
self.notify('results_changed')
|
||||
self.notify("results_changed")
|
||||
|
||||
def _start_job(self, jobid, func, args=()):
|
||||
title = JOBID2TITLE[jobid]
|
||||
@@ -310,7 +328,9 @@ class DupeGuru(Broadcaster):
|
||||
msg = {
|
||||
JobType.Copy: tr("All marked files were copied successfully."),
|
||||
JobType.Move: tr("All marked files were moved successfully."),
|
||||
JobType.Delete: tr("All marked files were successfully sent to Trash."),
|
||||
JobType.Delete: tr(
|
||||
"All marked files were successfully sent to Trash."
|
||||
),
|
||||
}[jobid]
|
||||
self.view.show_message(msg)
|
||||
|
||||
@@ -341,9 +361,9 @@ class DupeGuru(Broadcaster):
|
||||
if dupes == self.selected_dupes:
|
||||
return
|
||||
self.selected_dupes = dupes
|
||||
self.notify('dupes_selected')
|
||||
self.notify("dupes_selected")
|
||||
|
||||
#--- Protected
|
||||
# --- Protected
|
||||
def _get_fileclasses(self):
|
||||
if self.app_mode == AppMode.Picture:
|
||||
return [pe.photo.PLAT_SPECIFIC_PHOTO_CLASS]
|
||||
@@ -360,7 +380,7 @@ class DupeGuru(Broadcaster):
|
||||
else:
|
||||
return prioritize.all_categories()
|
||||
|
||||
#--- Public
|
||||
# --- Public
|
||||
def add_directory(self, d):
|
||||
"""Adds folder ``d`` to :attr:`directories`.
|
||||
|
||||
@@ -370,7 +390,7 @@ class DupeGuru(Broadcaster):
|
||||
"""
|
||||
try:
|
||||
self.directories.add_path(Path(d))
|
||||
self.notify('directories_changed')
|
||||
self.notify("directories_changed")
|
||||
except directories.AlreadyThereError:
|
||||
self.view.show_message(tr("'{}' already is in the list.").format(d))
|
||||
except directories.InvalidPathError:
|
||||
@@ -383,7 +403,9 @@ class DupeGuru(Broadcaster):
|
||||
if not dupes:
|
||||
self.view.show_message(MSG_NO_SELECTED_DUPES)
|
||||
return
|
||||
msg = tr("All selected %d matches are going to be ignored in all subsequent scans. Continue?")
|
||||
msg = tr(
|
||||
"All selected %d matches are going to be ignored in all subsequent scans. Continue?"
|
||||
)
|
||||
if not self.view.ask_yes_no(msg % len(dupes)):
|
||||
return
|
||||
for dupe in dupes:
|
||||
@@ -400,22 +422,22 @@ class DupeGuru(Broadcaster):
|
||||
:param str filter: filter to apply
|
||||
"""
|
||||
self.results.apply_filter(None)
|
||||
if self.options['escape_filter_regexp']:
|
||||
filter = escape(filter, set('()[]\\.|+?^'))
|
||||
filter = escape(filter, '*', '.')
|
||||
if self.options["escape_filter_regexp"]:
|
||||
filter = escape(filter, set("()[]\\.|+?^"))
|
||||
filter = escape(filter, "*", ".")
|
||||
self.results.apply_filter(filter)
|
||||
self._results_changed()
|
||||
|
||||
def clean_empty_dirs(self, path):
|
||||
if self.options['clean_empty_dirs']:
|
||||
while delete_if_empty(path, ['.DS_Store']):
|
||||
if self.options["clean_empty_dirs"]:
|
||||
while delete_if_empty(path, [".DS_Store"]):
|
||||
path = path.parent()
|
||||
|
||||
def clear_picture_cache(self):
|
||||
try:
|
||||
os.remove(self._get_picture_cache_path())
|
||||
except FileNotFoundError:
|
||||
pass # we don't care
|
||||
pass # we don't care
|
||||
|
||||
def copy_or_move(self, dupe, copy: bool, destination: str, dest_type: DestType):
|
||||
source_path = dupe.path
|
||||
@@ -444,6 +466,7 @@ class DupeGuru(Broadcaster):
|
||||
|
||||
:param bool copy: If True, duplicates will be copied instead of moved
|
||||
"""
|
||||
|
||||
def do(j):
|
||||
def op(dupe):
|
||||
j.add_progress()
|
||||
@@ -459,7 +482,7 @@ class DupeGuru(Broadcaster):
|
||||
prompt = tr("Select a directory to {} marked files to").format(opname)
|
||||
destination = self.view.select_dest_folder(prompt)
|
||||
if destination:
|
||||
desttype = self.options['copymove_dest_type']
|
||||
desttype = self.options["copymove_dest_type"]
|
||||
jobid = JobType.Copy if copy else JobType.Move
|
||||
self._start_job(jobid, do)
|
||||
|
||||
@@ -472,8 +495,9 @@ class DupeGuru(Broadcaster):
|
||||
if not self.deletion_options.show(self.results.mark_count):
|
||||
return
|
||||
args = [
|
||||
self.deletion_options.link_deleted, self.deletion_options.use_hardlinks,
|
||||
self.deletion_options.direct
|
||||
self.deletion_options.link_deleted,
|
||||
self.deletion_options.use_hardlinks,
|
||||
self.deletion_options.direct,
|
||||
]
|
||||
logging.debug("Starting deletion job with args %r", args)
|
||||
self._start_job(JobType.Delete, self._do_delete, args=args)
|
||||
@@ -495,7 +519,9 @@ class DupeGuru(Broadcaster):
|
||||
The columns and their order in the resulting CSV file is determined in the same way as in
|
||||
:meth:`export_to_xhtml`.
|
||||
"""
|
||||
dest_file = self.view.select_dest_file(tr("Select a destination for your exported CSV"), 'csv')
|
||||
dest_file = self.view.select_dest_file(
|
||||
tr("Select a destination for your exported CSV"), "csv"
|
||||
)
|
||||
if dest_file:
|
||||
colnames, rows = self._get_export_data()
|
||||
try:
|
||||
@@ -505,13 +531,16 @@ class DupeGuru(Broadcaster):
|
||||
|
||||
def get_display_info(self, dupe, group, delta=False):
|
||||
def empty_data():
|
||||
return {c.name: '---' for c in self.result_table.COLUMNS[1:]}
|
||||
return {c.name: "---" for c in self.result_table.COLUMNS[1:]}
|
||||
|
||||
if (dupe is None) or (group is None):
|
||||
return empty_data()
|
||||
try:
|
||||
return dupe.get_display_info(group, delta)
|
||||
except Exception as e:
|
||||
logging.warning("Exception on GetDisplayInfo for %s: %s", str(dupe.path), str(e))
|
||||
logging.warning(
|
||||
"Exception on GetDisplayInfo for %s: %s", str(dupe.path), str(e)
|
||||
)
|
||||
return empty_data()
|
||||
|
||||
def invoke_custom_command(self):
|
||||
@@ -521,9 +550,11 @@ class DupeGuru(Broadcaster):
|
||||
is replaced with that dupe's ref file. If there's no selection, the command is not invoked.
|
||||
If the dupe is a ref, ``%d`` and ``%r`` will be the same.
|
||||
"""
|
||||
cmd = self.view.get_default('CustomCommand')
|
||||
cmd = self.view.get_default("CustomCommand")
|
||||
if not cmd:
|
||||
msg = tr("You have no custom command set up. Set it up in your preferences.")
|
||||
msg = tr(
|
||||
"You have no custom command set up. Set it up in your preferences."
|
||||
)
|
||||
self.view.show_message(msg)
|
||||
return
|
||||
if not self.selected_dupes:
|
||||
@@ -531,8 +562,8 @@ class DupeGuru(Broadcaster):
|
||||
dupe = self.selected_dupes[0]
|
||||
group = self.results.get_group_of_duplicate(dupe)
|
||||
ref = group.ref
|
||||
cmd = cmd.replace('%d', str(dupe.path))
|
||||
cmd = cmd.replace('%r', str(ref.path))
|
||||
cmd = cmd.replace("%d", str(dupe.path))
|
||||
cmd = cmd.replace("%r", str(ref.path))
|
||||
match = re.match(r'"([^"]+)"(.*)', cmd)
|
||||
if match is not None:
|
||||
# This code here is because subprocess. Popen doesn't seem to accept, under Windows,
|
||||
@@ -551,9 +582,9 @@ class DupeGuru(Broadcaster):
|
||||
is persistent data, is the same as when the last session was closed (when :meth:`save` was
|
||||
called).
|
||||
"""
|
||||
self.directories.load_from_file(op.join(self.appdata, 'last_directories.xml'))
|
||||
self.notify('directories_changed')
|
||||
p = op.join(self.appdata, 'ignore_list.xml')
|
||||
self.directories.load_from_file(op.join(self.appdata, "last_directories.xml"))
|
||||
self.notify("directories_changed")
|
||||
p = op.join(self.appdata, "ignore_list.xml")
|
||||
self.ignore_list.load_from_xml(p)
|
||||
self.ignore_list_dialog.refresh()
|
||||
|
||||
@@ -562,8 +593,10 @@ class DupeGuru(Broadcaster):
|
||||
|
||||
:param str filename: path of the XML file (created with :meth:`save_as`) to load
|
||||
"""
|
||||
|
||||
def do(j):
|
||||
self.results.load_from_xml(filename, self._get_file, j)
|
||||
|
||||
self._start_job(JobType.Load, do)
|
||||
|
||||
def make_selected_reference(self):
|
||||
@@ -588,35 +621,36 @@ class DupeGuru(Broadcaster):
|
||||
if not self.result_table.power_marker:
|
||||
if changed_groups:
|
||||
self.selected_dupes = [
|
||||
d for d in self.selected_dupes
|
||||
d
|
||||
for d in self.selected_dupes
|
||||
if self.results.get_group_of_duplicate(d).ref is d
|
||||
]
|
||||
self.notify('results_changed')
|
||||
self.notify("results_changed")
|
||||
else:
|
||||
# If we're in "Dupes Only" mode (previously called Power Marker), things are a bit
|
||||
# different. The refs are not shown in the table, and if our operation is successful,
|
||||
# this means that there's no way to follow our dupe selection. Then, the best thing to
|
||||
# do is to keep our selection index-wise (different dupe selection, but same index
|
||||
# selection).
|
||||
self.notify('results_changed_but_keep_selection')
|
||||
self.notify("results_changed_but_keep_selection")
|
||||
|
||||
def mark_all(self):
|
||||
"""Set all dupes in the results as marked.
|
||||
"""
|
||||
self.results.mark_all()
|
||||
self.notify('marking_changed')
|
||||
self.notify("marking_changed")
|
||||
|
||||
def mark_none(self):
|
||||
"""Set all dupes in the results as unmarked.
|
||||
"""
|
||||
self.results.mark_none()
|
||||
self.notify('marking_changed')
|
||||
self.notify("marking_changed")
|
||||
|
||||
def mark_invert(self):
|
||||
"""Invert the marked state of all dupes in the results.
|
||||
"""
|
||||
self.results.mark_invert()
|
||||
self.notify('marking_changed')
|
||||
self.notify("marking_changed")
|
||||
|
||||
def mark_dupe(self, dupe, marked):
|
||||
"""Change marked status of ``dupe``.
|
||||
@@ -629,7 +663,7 @@ class DupeGuru(Broadcaster):
|
||||
self.results.mark(dupe)
|
||||
else:
|
||||
self.results.unmark(dupe)
|
||||
self.notify('marking_changed')
|
||||
self.notify("marking_changed")
|
||||
|
||||
def open_selected(self):
|
||||
"""Open :attr:`selected_dupes` with their associated application.
|
||||
@@ -656,7 +690,7 @@ class DupeGuru(Broadcaster):
|
||||
indexes = sorted(indexes, reverse=True)
|
||||
for index in indexes:
|
||||
del self.directories[index]
|
||||
self.notify('directories_changed')
|
||||
self.notify("directories_changed")
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
@@ -669,7 +703,7 @@ class DupeGuru(Broadcaster):
|
||||
:type duplicates: list of :class:`~core.fs.File`
|
||||
"""
|
||||
self.results.remove_duplicates(self.without_ref(duplicates))
|
||||
self.notify('results_changed_but_keep_selection')
|
||||
self.notify("results_changed_but_keep_selection")
|
||||
|
||||
def remove_marked(self):
|
||||
"""Removed marked duplicates from the results (without touching the files themselves).
|
||||
@@ -724,7 +758,9 @@ class DupeGuru(Broadcaster):
|
||||
if group.prioritize(key_func=sort_key):
|
||||
count += 1
|
||||
self._results_changed()
|
||||
msg = tr("{} duplicate groups were changed by the re-prioritization.").format(count)
|
||||
msg = tr("{} duplicate groups were changed by the re-prioritization.").format(
|
||||
count
|
||||
)
|
||||
self.view.show_message(msg)
|
||||
|
||||
def reveal_selected(self):
|
||||
@@ -734,10 +770,10 @@ class DupeGuru(Broadcaster):
|
||||
def save(self):
|
||||
if not op.exists(self.appdata):
|
||||
os.makedirs(self.appdata)
|
||||
self.directories.save_to_file(op.join(self.appdata, 'last_directories.xml'))
|
||||
p = op.join(self.appdata, 'ignore_list.xml')
|
||||
self.directories.save_to_file(op.join(self.appdata, "last_directories.xml"))
|
||||
p = op.join(self.appdata, "ignore_list.xml")
|
||||
self.ignore_list.save_to_xml(p)
|
||||
self.notify('save_session')
|
||||
self.notify("save_session")
|
||||
|
||||
def save_as(self, filename):
|
||||
"""Save results in ``filename``.
|
||||
@@ -756,7 +792,9 @@ class DupeGuru(Broadcaster):
|
||||
"""
|
||||
scanner = self.SCANNER_CLASS()
|
||||
if not self.directories.has_any_file():
|
||||
self.view.show_message(tr("The selected directories contain no scannable file."))
|
||||
self.view.show_message(
|
||||
tr("The selected directories contain no scannable file.")
|
||||
)
|
||||
return
|
||||
# Send relevant options down to the scanner instance
|
||||
for k, v in self.options.items():
|
||||
@@ -771,12 +809,16 @@ class DupeGuru(Broadcaster):
|
||||
def do(j):
|
||||
j.set_progress(0, tr("Collecting files to scan"))
|
||||
if scanner.scan_type == ScanType.Folders:
|
||||
files = list(self.directories.get_folders(folderclass=se.fs.Folder, j=j))
|
||||
files = list(
|
||||
self.directories.get_folders(folderclass=se.fs.Folder, j=j)
|
||||
)
|
||||
else:
|
||||
files = list(self.directories.get_files(fileclasses=self.fileclasses, j=j))
|
||||
if self.options['ignore_hardlink_matches']:
|
||||
files = list(
|
||||
self.directories.get_files(fileclasses=self.fileclasses, j=j)
|
||||
)
|
||||
if self.options["ignore_hardlink_matches"]:
|
||||
files = self._remove_hardlink_dupes(files)
|
||||
logging.info('Scanning %d files' % len(files))
|
||||
logging.info("Scanning %d files" % len(files))
|
||||
self.results.groups = scanner.get_dupe_groups(files, self.ignore_list, j)
|
||||
self.discarded_file_count = scanner.discarded_file_count
|
||||
|
||||
@@ -792,12 +834,16 @@ class DupeGuru(Broadcaster):
|
||||
markfunc = self.results.mark
|
||||
for dupe in selected:
|
||||
markfunc(dupe)
|
||||
self.notify('marking_changed')
|
||||
self.notify("marking_changed")
|
||||
|
||||
def without_ref(self, dupes):
|
||||
"""Returns ``dupes`` with all reference elements removed.
|
||||
"""
|
||||
return [dupe for dupe in dupes if self.results.get_group_of_duplicate(dupe).ref is not dupe]
|
||||
return [
|
||||
dupe
|
||||
for dupe in dupes
|
||||
if self.results.get_group_of_duplicate(dupe).ref is not dupe
|
||||
]
|
||||
|
||||
def get_default(self, key, fallback_value=None):
|
||||
result = nonone(self.view.get_default(key), fallback_value)
|
||||
@@ -812,7 +858,7 @@ class DupeGuru(Broadcaster):
|
||||
def set_default(self, key, value):
|
||||
self.view.set_default(key, value)
|
||||
|
||||
#--- Properties
|
||||
# --- Properties
|
||||
@property
|
||||
def stat_line(self):
|
||||
result = self.results.stat_line
|
||||
@@ -836,12 +882,21 @@ class DupeGuru(Broadcaster):
|
||||
@property
|
||||
def METADATA_TO_READ(self):
|
||||
if self.app_mode == AppMode.Picture:
|
||||
return ['size', 'mtime', 'dimensions', 'exif_timestamp']
|
||||
return ["size", "mtime", "dimensions", "exif_timestamp"]
|
||||
elif self.app_mode == AppMode.Music:
|
||||
return [
|
||||
'size', 'mtime', 'duration', 'bitrate', 'samplerate', 'title', 'artist',
|
||||
'album', 'genre', 'year', 'track', 'comment'
|
||||
"size",
|
||||
"mtime",
|
||||
"duration",
|
||||
"bitrate",
|
||||
"samplerate",
|
||||
"title",
|
||||
"artist",
|
||||
"album",
|
||||
"genre",
|
||||
"year",
|
||||
"track",
|
||||
"comment",
|
||||
]
|
||||
else:
|
||||
return ['size', 'mtime']
|
||||
|
||||
return ["size", "mtime"]
|
||||
|
||||
@@ -15,12 +15,13 @@ from hscommon.util import FileOrPath
|
||||
from . import fs
|
||||
|
||||
__all__ = [
|
||||
'Directories',
|
||||
'DirectoryState',
|
||||
'AlreadyThereError',
|
||||
'InvalidPathError',
|
||||
"Directories",
|
||||
"DirectoryState",
|
||||
"AlreadyThereError",
|
||||
"InvalidPathError",
|
||||
]
|
||||
|
||||
|
||||
class DirectoryState:
|
||||
"""Enum describing how a folder should be considered.
|
||||
|
||||
@@ -28,16 +29,20 @@ class DirectoryState:
|
||||
* DirectoryState.Reference: Scan files, but make sure never to delete any of them
|
||||
* DirectoryState.Excluded: Don't scan this folder
|
||||
"""
|
||||
|
||||
Normal = 0
|
||||
Reference = 1
|
||||
Excluded = 2
|
||||
|
||||
|
||||
class AlreadyThereError(Exception):
|
||||
"""The path being added is already in the directory list"""
|
||||
|
||||
|
||||
class InvalidPathError(Exception):
|
||||
"""The path being added is invalid"""
|
||||
|
||||
|
||||
class Directories:
|
||||
"""Holds user folder selection.
|
||||
|
||||
@@ -47,7 +52,8 @@ class Directories:
|
||||
Then, when the user starts the scan, :meth:`get_files` is called to retrieve all files (wrapped
|
||||
in :mod:`core.fs`) that have to be scanned according to the chosen folders/states.
|
||||
"""
|
||||
#---Override
|
||||
|
||||
# ---Override
|
||||
def __init__(self):
|
||||
self._dirs = []
|
||||
# {path: state}
|
||||
@@ -68,10 +74,10 @@ class Directories:
|
||||
def __len__(self):
|
||||
return len(self._dirs)
|
||||
|
||||
#---Private
|
||||
# ---Private
|
||||
def _default_state_for_path(self, path):
|
||||
# Override this in subclasses to specify the state of some special folders.
|
||||
if path.name.startswith('.'): # hidden
|
||||
if path.name.startswith("."): # hidden
|
||||
return DirectoryState.Excluded
|
||||
|
||||
def _get_files(self, from_path, fileclasses, j):
|
||||
@@ -83,11 +89,13 @@ class Directories:
|
||||
# Recursively get files from folders with lots of subfolder is expensive. However, there
|
||||
# might be a subfolder in this path that is not excluded. What we want to do is to skim
|
||||
# through self.states and see if we must continue, or we can stop right here to save time
|
||||
if not any(p[:len(root)] == root for p in self.states):
|
||||
if not any(p[: len(root)] == root for p in self.states):
|
||||
del dirs[:]
|
||||
try:
|
||||
if state != DirectoryState.Excluded:
|
||||
found_files = [fs.get_file(root + f, fileclasses=fileclasses) for f in files]
|
||||
found_files = [
|
||||
fs.get_file(root + f, fileclasses=fileclasses) for f in files
|
||||
]
|
||||
found_files = [f for f in found_files if f is not None]
|
||||
# In some cases, directories can be considered as files by dupeGuru, which is
|
||||
# why we have this line below. In fact, there only one case: Bundle files under
|
||||
@@ -97,7 +105,11 @@ class Directories:
|
||||
if f is not None:
|
||||
found_files.append(f)
|
||||
dirs.remove(d)
|
||||
logging.debug("Collected %d files in folder %s", len(found_files), str(from_path))
|
||||
logging.debug(
|
||||
"Collected %d files in folder %s",
|
||||
len(found_files),
|
||||
str(from_path),
|
||||
)
|
||||
for file in found_files:
|
||||
file.is_ref = state == DirectoryState.Reference
|
||||
yield file
|
||||
@@ -118,7 +130,7 @@ class Directories:
|
||||
except (EnvironmentError, fs.InvalidPath):
|
||||
pass
|
||||
|
||||
#---Public
|
||||
# ---Public
|
||||
def add_path(self, path):
|
||||
"""Adds ``path`` to self, if not already there.
|
||||
|
||||
@@ -212,21 +224,21 @@ class Directories:
|
||||
root = ET.parse(infile).getroot()
|
||||
except Exception:
|
||||
return
|
||||
for rdn in root.getiterator('root_directory'):
|
||||
for rdn in root.getiterator("root_directory"):
|
||||
attrib = rdn.attrib
|
||||
if 'path' not in attrib:
|
||||
if "path" not in attrib:
|
||||
continue
|
||||
path = attrib['path']
|
||||
path = attrib["path"]
|
||||
try:
|
||||
self.add_path(Path(path))
|
||||
except (AlreadyThereError, InvalidPathError):
|
||||
pass
|
||||
for sn in root.getiterator('state'):
|
||||
for sn in root.getiterator("state"):
|
||||
attrib = sn.attrib
|
||||
if not ('path' in attrib and 'value' in attrib):
|
||||
if not ("path" in attrib and "value" in attrib):
|
||||
continue
|
||||
path = attrib['path']
|
||||
state = attrib['value']
|
||||
path = attrib["path"]
|
||||
state = attrib["value"]
|
||||
self.states[Path(path)] = int(state)
|
||||
|
||||
def save_to_file(self, outfile):
|
||||
@@ -234,17 +246,17 @@ class Directories:
|
||||
|
||||
:param file outfile: path or file pointer to XML file to save to.
|
||||
"""
|
||||
with FileOrPath(outfile, 'wb') as fp:
|
||||
root = ET.Element('directories')
|
||||
with FileOrPath(outfile, "wb") as fp:
|
||||
root = ET.Element("directories")
|
||||
for root_path in self:
|
||||
root_path_node = ET.SubElement(root, 'root_directory')
|
||||
root_path_node.set('path', str(root_path))
|
||||
root_path_node = ET.SubElement(root, "root_directory")
|
||||
root_path_node.set("path", str(root_path))
|
||||
for path, state in self.states.items():
|
||||
state_node = ET.SubElement(root, 'state')
|
||||
state_node.set('path', str(path))
|
||||
state_node.set('value', str(state))
|
||||
state_node = ET.SubElement(root, "state")
|
||||
state_node.set("path", str(path))
|
||||
state_node.set("value", str(state))
|
||||
tree = ET.ElementTree(root)
|
||||
tree.write(fp, encoding='utf-8')
|
||||
tree.write(fp, encoding="utf-8")
|
||||
|
||||
def set_state(self, path, state):
|
||||
"""Set the state of folder at ``path``.
|
||||
@@ -259,4 +271,3 @@ class Directories:
|
||||
if path.is_parent_of(iter_path):
|
||||
del self.states[iter_path]
|
||||
self.states[path] = state
|
||||
|
||||
|
||||
112
core/engine.py
112
core/engine.py
@@ -17,25 +17,26 @@ from hscommon.util import flatten, multi_replace
|
||||
from hscommon.trans import tr
|
||||
from hscommon.jobprogress import job
|
||||
|
||||
(
|
||||
WEIGHT_WORDS,
|
||||
MATCH_SIMILAR_WORDS,
|
||||
NO_FIELD_ORDER,
|
||||
) = range(3)
|
||||
(WEIGHT_WORDS, MATCH_SIMILAR_WORDS, NO_FIELD_ORDER,) = range(3)
|
||||
|
||||
JOB_REFRESH_RATE = 100
|
||||
|
||||
|
||||
def getwords(s):
|
||||
# We decompose the string so that ascii letters with accents can be part of the word.
|
||||
s = normalize('NFD', s)
|
||||
s = multi_replace(s, "-_&+():;\\[]{}.,<>/?~!@#$*", ' ').lower()
|
||||
s = ''.join(c for c in s if c in string.ascii_letters + string.digits + string.whitespace)
|
||||
return [_f for _f in s.split(' ') if _f] # remove empty elements
|
||||
s = normalize("NFD", s)
|
||||
s = multi_replace(s, "-_&+():;\\[]{}.,<>/?~!@#$*", " ").lower()
|
||||
s = "".join(
|
||||
c for c in s if c in string.ascii_letters + string.digits + string.whitespace
|
||||
)
|
||||
return [_f for _f in s.split(" ") if _f] # remove empty elements
|
||||
|
||||
|
||||
def getfields(s):
|
||||
fields = [getwords(field) for field in s.split(' - ')]
|
||||
fields = [getwords(field) for field in s.split(" - ")]
|
||||
return [_f for _f in fields if _f]
|
||||
|
||||
|
||||
def unpack_fields(fields):
|
||||
result = []
|
||||
for field in fields:
|
||||
@@ -45,6 +46,7 @@ def unpack_fields(fields):
|
||||
result.append(field)
|
||||
return result
|
||||
|
||||
|
||||
def compare(first, second, flags=()):
|
||||
"""Returns the % of words that match between ``first`` and ``second``
|
||||
|
||||
@@ -55,11 +57,11 @@ def compare(first, second, flags=()):
|
||||
return 0
|
||||
if any(isinstance(element, list) for element in first):
|
||||
return compare_fields(first, second, flags)
|
||||
second = second[:] #We must use a copy of second because we remove items from it
|
||||
second = second[:] # We must use a copy of second because we remove items from it
|
||||
match_similar = MATCH_SIMILAR_WORDS in flags
|
||||
weight_words = WEIGHT_WORDS in flags
|
||||
joined = first + second
|
||||
total_count = (sum(len(word) for word in joined) if weight_words else len(joined))
|
||||
total_count = sum(len(word) for word in joined) if weight_words else len(joined)
|
||||
match_count = 0
|
||||
in_order = True
|
||||
for word in first:
|
||||
@@ -71,12 +73,13 @@ def compare(first, second, flags=()):
|
||||
if second[0] != word:
|
||||
in_order = False
|
||||
second.remove(word)
|
||||
match_count += (len(word) if weight_words else 1)
|
||||
match_count += len(word) if weight_words else 1
|
||||
result = round(((match_count * 2) / total_count) * 100)
|
||||
if (result == 100) and (not in_order):
|
||||
result = 99 # We cannot consider a match exact unless the ordering is the same
|
||||
result = 99 # We cannot consider a match exact unless the ordering is the same
|
||||
return result
|
||||
|
||||
|
||||
def compare_fields(first, second, flags=()):
|
||||
"""Returns the score for the lowest matching :ref:`fields`.
|
||||
|
||||
@@ -87,7 +90,7 @@ def compare_fields(first, second, flags=()):
|
||||
return 0
|
||||
if NO_FIELD_ORDER in flags:
|
||||
results = []
|
||||
#We don't want to remove field directly in the list. We must work on a copy.
|
||||
# We don't want to remove field directly in the list. We must work on a copy.
|
||||
second = second[:]
|
||||
for field1 in first:
|
||||
max = 0
|
||||
@@ -101,9 +104,12 @@ def compare_fields(first, second, flags=()):
|
||||
if matched_field:
|
||||
second.remove(matched_field)
|
||||
else:
|
||||
results = [compare(field1, field2, flags) for field1, field2 in zip(first, second)]
|
||||
results = [
|
||||
compare(field1, field2, flags) for field1, field2 in zip(first, second)
|
||||
]
|
||||
return min(results) if results else 0
|
||||
|
||||
|
||||
def build_word_dict(objects, j=job.nulljob):
|
||||
"""Returns a dict of objects mapped by their words.
|
||||
|
||||
@@ -113,11 +119,14 @@ def build_word_dict(objects, j=job.nulljob):
|
||||
The result will be a dict with words as keys, lists of objects as values.
|
||||
"""
|
||||
result = defaultdict(set)
|
||||
for object in j.iter_with_progress(objects, 'Prepared %d/%d files', JOB_REFRESH_RATE):
|
||||
for object in j.iter_with_progress(
|
||||
objects, "Prepared %d/%d files", JOB_REFRESH_RATE
|
||||
):
|
||||
for word in unpack_fields(object.words):
|
||||
result[word].add(object)
|
||||
return result
|
||||
|
||||
|
||||
def merge_similar_words(word_dict):
|
||||
"""Take all keys in ``word_dict`` that are similar, and merge them together.
|
||||
|
||||
@@ -126,7 +135,7 @@ def merge_similar_words(word_dict):
|
||||
a word equal to the other.
|
||||
"""
|
||||
keys = list(word_dict.keys())
|
||||
keys.sort(key=len)# we want the shortest word to stay
|
||||
keys.sort(key=len) # we want the shortest word to stay
|
||||
while keys:
|
||||
key = keys.pop(0)
|
||||
similars = difflib.get_close_matches(key, keys, 100, 0.8)
|
||||
@@ -138,6 +147,7 @@ def merge_similar_words(word_dict):
|
||||
del word_dict[similar]
|
||||
keys.remove(similar)
|
||||
|
||||
|
||||
def reduce_common_words(word_dict, threshold):
|
||||
"""Remove all objects from ``word_dict`` values where the object count >= ``threshold``
|
||||
|
||||
@@ -146,7 +156,9 @@ def reduce_common_words(word_dict, threshold):
|
||||
The exception to this removal are the objects where all the words of the object are common.
|
||||
Because if we remove them, we will miss some duplicates!
|
||||
"""
|
||||
uncommon_words = set(word for word, objects in word_dict.items() if len(objects) < threshold)
|
||||
uncommon_words = set(
|
||||
word for word, objects in word_dict.items() if len(objects) < threshold
|
||||
)
|
||||
for word, objects in list(word_dict.items()):
|
||||
if len(objects) < threshold:
|
||||
continue
|
||||
@@ -159,11 +171,13 @@ def reduce_common_words(word_dict, threshold):
|
||||
else:
|
||||
del word_dict[word]
|
||||
|
||||
|
||||
# Writing docstrings in a namedtuple is tricky. From Python 3.3, it's possible to set __doc__, but
|
||||
# some research allowed me to find a more elegant solution, which is what is done here. See
|
||||
# http://stackoverflow.com/questions/1606436/adding-docstrings-to-namedtuples-in-python
|
||||
|
||||
class Match(namedtuple('Match', 'first second percentage')):
|
||||
|
||||
class Match(namedtuple("Match", "first second percentage")):
|
||||
"""Represents a match between two :class:`~core.fs.File`.
|
||||
|
||||
Regarless of the matching method, when two files are determined to match, a Match pair is created,
|
||||
@@ -182,16 +196,24 @@ class Match(namedtuple('Match', 'first second percentage')):
|
||||
their match level according to the scan method which found the match. int from 1 to 100. For
|
||||
exact scan methods, such as Contents scans, this will always be 100.
|
||||
"""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
|
||||
def get_match(first, second, flags=()):
|
||||
#it is assumed here that first and second both have a "words" attribute
|
||||
# it is assumed here that first and second both have a "words" attribute
|
||||
percentage = compare(first.words, second.words, flags)
|
||||
return Match(first, second, percentage)
|
||||
|
||||
|
||||
def getmatches(
|
||||
objects, min_match_percentage=0, match_similar_words=False, weight_words=False,
|
||||
no_field_order=False, j=job.nulljob):
|
||||
objects,
|
||||
min_match_percentage=0,
|
||||
match_similar_words=False,
|
||||
weight_words=False,
|
||||
no_field_order=False,
|
||||
j=job.nulljob,
|
||||
):
|
||||
"""Returns a list of :class:`Match` within ``objects`` after fuzzily matching their words.
|
||||
|
||||
:param objects: List of :class:`~core.fs.File` to match.
|
||||
@@ -206,7 +228,7 @@ def getmatches(
|
||||
j = j.start_subjob(2)
|
||||
sj = j.start_subjob(2)
|
||||
for o in objects:
|
||||
if not hasattr(o, 'words'):
|
||||
if not hasattr(o, "words"):
|
||||
o.words = getwords(o.name)
|
||||
word_dict = build_word_dict(objects, sj)
|
||||
reduce_common_words(word_dict, COMMON_WORD_THRESHOLD)
|
||||
@@ -241,11 +263,15 @@ def getmatches(
|
||||
except MemoryError:
|
||||
# This is the place where the memory usage is at its peak during the scan.
|
||||
# Just continue the process with an incomplete list of matches.
|
||||
del compared # This should give us enough room to call logging.
|
||||
logging.warning('Memory Overflow. Matches: %d. Word dict: %d' % (len(result), len(word_dict)))
|
||||
del compared # This should give us enough room to call logging.
|
||||
logging.warning(
|
||||
"Memory Overflow. Matches: %d. Word dict: %d"
|
||||
% (len(result), len(word_dict))
|
||||
)
|
||||
return result
|
||||
return result
|
||||
|
||||
|
||||
def getmatches_by_contents(files, j=job.nulljob):
|
||||
"""Returns a list of :class:`Match` within ``files`` if their contents is the same.
|
||||
|
||||
@@ -263,13 +289,14 @@ def getmatches_by_contents(files, j=job.nulljob):
|
||||
for group in possible_matches:
|
||||
for first, second in itertools.combinations(group, 2):
|
||||
if first.is_ref and second.is_ref:
|
||||
continue # Don't spend time comparing two ref pics together.
|
||||
continue # Don't spend time comparing two ref pics together.
|
||||
if first.md5partial == second.md5partial:
|
||||
if first.md5 == second.md5:
|
||||
result.append(Match(first, second, 100))
|
||||
j.add_progress(desc=tr("%d matches found") % len(result))
|
||||
return result
|
||||
|
||||
|
||||
class Group:
|
||||
"""A group of :class:`~core.fs.File` that match together.
|
||||
|
||||
@@ -297,7 +324,8 @@ class Group:
|
||||
|
||||
Average match percentage of match pairs containing :attr:`ref`.
|
||||
"""
|
||||
#---Override
|
||||
|
||||
# ---Override
|
||||
def __init__(self):
|
||||
self._clear()
|
||||
|
||||
@@ -313,7 +341,7 @@ class Group:
|
||||
def __len__(self):
|
||||
return len(self.ordered)
|
||||
|
||||
#---Private
|
||||
# ---Private
|
||||
def _clear(self):
|
||||
self._percentage = None
|
||||
self._matches_for_ref = None
|
||||
@@ -328,7 +356,7 @@ class Group:
|
||||
self._matches_for_ref = [match for match in self.matches if ref in match]
|
||||
return self._matches_for_ref
|
||||
|
||||
#---Public
|
||||
# ---Public
|
||||
def add_match(self, match):
|
||||
"""Adds ``match`` to internal match list and possibly add duplicates to the group.
|
||||
|
||||
@@ -339,6 +367,7 @@ class Group:
|
||||
|
||||
:param tuple match: pair of :class:`~core.fs.File` to add
|
||||
"""
|
||||
|
||||
def add_candidate(item, match):
|
||||
matches = self.candidates[item]
|
||||
matches.add(match)
|
||||
@@ -362,7 +391,11 @@ class Group:
|
||||
|
||||
You can call this after the duplicate scanning process to free a bit of memory.
|
||||
"""
|
||||
discarded = set(m for m in self.matches if not all(obj in self.unordered for obj in [m.first, m.second]))
|
||||
discarded = set(
|
||||
m
|
||||
for m in self.matches
|
||||
if not all(obj in self.unordered for obj in [m.first, m.second])
|
||||
)
|
||||
self.matches -= discarded
|
||||
self.candidates = defaultdict(set)
|
||||
return discarded
|
||||
@@ -409,7 +442,9 @@ class Group:
|
||||
self.unordered.remove(item)
|
||||
self._percentage = None
|
||||
self._matches_for_ref = None
|
||||
if (len(self) > 1) and any(not getattr(item, 'is_ref', False) for item in self):
|
||||
if (len(self) > 1) and any(
|
||||
not getattr(item, "is_ref", False) for item in self
|
||||
):
|
||||
if discard_matches:
|
||||
self.matches = set(m for m in self.matches if item not in m)
|
||||
else:
|
||||
@@ -438,7 +473,9 @@ class Group:
|
||||
if self._percentage is None:
|
||||
if self.dupes:
|
||||
matches = self._get_matches_for_ref()
|
||||
self._percentage = sum(match.percentage for match in matches) // len(matches)
|
||||
self._percentage = sum(match.percentage for match in matches) // len(
|
||||
matches
|
||||
)
|
||||
else:
|
||||
self._percentage = 0
|
||||
return self._percentage
|
||||
@@ -485,7 +522,7 @@ def get_groups(matches):
|
||||
del dupe2group
|
||||
del matches
|
||||
# should free enough memory to continue
|
||||
logging.warning('Memory Overflow. Groups: {0}'.format(len(groups)))
|
||||
logging.warning("Memory Overflow. Groups: {0}".format(len(groups)))
|
||||
# Now that we have a group, we have to discard groups' matches and see if there're any "orphan"
|
||||
# matches, that is, matches that were candidate in a group but that none of their 2 files were
|
||||
# accepted in the group. With these orphan groups, it's safe to build additional groups
|
||||
@@ -493,9 +530,12 @@ def get_groups(matches):
|
||||
orphan_matches = []
|
||||
for group in groups:
|
||||
orphan_matches += {
|
||||
m for m in group.discard_matches()
|
||||
m
|
||||
for m in group.discard_matches()
|
||||
if not any(obj in matched_files for obj in [m.first, m.second])
|
||||
}
|
||||
if groups and orphan_matches:
|
||||
groups += get_groups(orphan_matches) # no job, as it isn't supposed to take a long time
|
||||
groups += get_groups(
|
||||
orphan_matches
|
||||
) # no job, as it isn't supposed to take a long time
|
||||
return groups
|
||||
|
||||
@@ -114,36 +114,42 @@ ROW_TEMPLATE = """
|
||||
|
||||
CELL_TEMPLATE = """<td>{value}</td>"""
|
||||
|
||||
|
||||
def export_to_xhtml(colnames, rows):
|
||||
# a row is a list of values with the first value being a flag indicating if the row should be indented
|
||||
if rows:
|
||||
assert len(rows[0]) == len(colnames) + 1 # + 1 is for the "indented" flag
|
||||
colheaders = ''.join(COLHEADERS_TEMPLATE.format(name=name) for name in colnames)
|
||||
assert len(rows[0]) == len(colnames) + 1 # + 1 is for the "indented" flag
|
||||
colheaders = "".join(COLHEADERS_TEMPLATE.format(name=name) for name in colnames)
|
||||
rendered_rows = []
|
||||
previous_group_id = None
|
||||
for row in rows:
|
||||
# [2:] is to remove the indented flag + filename
|
||||
if row[0] != previous_group_id:
|
||||
# We've just changed dupe group, which means that this dupe is a ref. We don't indent it.
|
||||
indented = ''
|
||||
indented = ""
|
||||
else:
|
||||
indented = 'indented'
|
||||
indented = "indented"
|
||||
filename = row[1]
|
||||
cells = ''.join(CELL_TEMPLATE.format(value=value) for value in row[2:])
|
||||
rendered_rows.append(ROW_TEMPLATE.format(indented=indented, filename=filename, cells=cells))
|
||||
cells = "".join(CELL_TEMPLATE.format(value=value) for value in row[2:])
|
||||
rendered_rows.append(
|
||||
ROW_TEMPLATE.format(indented=indented, filename=filename, cells=cells)
|
||||
)
|
||||
previous_group_id = row[0]
|
||||
rendered_rows = ''.join(rendered_rows)
|
||||
rendered_rows = "".join(rendered_rows)
|
||||
# The main template can't use format because the css code uses {}
|
||||
content = MAIN_TEMPLATE.replace('$colheaders', colheaders).replace('$rows', rendered_rows)
|
||||
content = MAIN_TEMPLATE.replace("$colheaders", colheaders).replace(
|
||||
"$rows", rendered_rows
|
||||
)
|
||||
folder = mkdtemp()
|
||||
destpath = op.join(folder, 'export.htm')
|
||||
fp = open(destpath, 'wt', encoding='utf-8')
|
||||
destpath = op.join(folder, "export.htm")
|
||||
fp = open(destpath, "wt", encoding="utf-8")
|
||||
fp.write(content)
|
||||
fp.close()
|
||||
return destpath
|
||||
|
||||
|
||||
def export_to_csv(dest, colnames, rows):
|
||||
writer = csv.writer(open(dest, 'wt', encoding='utf-8'))
|
||||
writer = csv.writer(open(dest, "wt", encoding="utf-8"))
|
||||
writer.writerow(["Group ID"] + colnames)
|
||||
for row in rows:
|
||||
writer.writerow(row)
|
||||
|
||||
80
core/fs.py
80
core/fs.py
@@ -17,19 +17,20 @@ import logging
|
||||
from hscommon.util import nonone, get_file_ext
|
||||
|
||||
__all__ = [
|
||||
'File',
|
||||
'Folder',
|
||||
'get_file',
|
||||
'get_files',
|
||||
'FSError',
|
||||
'AlreadyExistsError',
|
||||
'InvalidPath',
|
||||
'InvalidDestinationError',
|
||||
'OperationError',
|
||||
"File",
|
||||
"Folder",
|
||||
"get_file",
|
||||
"get_files",
|
||||
"FSError",
|
||||
"AlreadyExistsError",
|
||||
"InvalidPath",
|
||||
"InvalidDestinationError",
|
||||
"OperationError",
|
||||
]
|
||||
|
||||
NOT_SET = object()
|
||||
|
||||
|
||||
class FSError(Exception):
|
||||
cls_message = "An error has occured on '{name}' in '{parent}'"
|
||||
|
||||
@@ -40,8 +41,8 @@ class FSError(Exception):
|
||||
elif isinstance(fsobject, File):
|
||||
name = fsobject.name
|
||||
else:
|
||||
name = ''
|
||||
parentname = str(parent) if parent is not None else ''
|
||||
name = ""
|
||||
parentname = str(parent) if parent is not None else ""
|
||||
Exception.__init__(self, message.format(name=name, parent=parentname))
|
||||
|
||||
|
||||
@@ -49,32 +50,39 @@ class AlreadyExistsError(FSError):
|
||||
"The directory or file name we're trying to add already exists"
|
||||
cls_message = "'{name}' already exists in '{parent}'"
|
||||
|
||||
|
||||
class InvalidPath(FSError):
|
||||
"The path of self is invalid, and cannot be worked with."
|
||||
cls_message = "'{name}' is invalid."
|
||||
|
||||
|
||||
class InvalidDestinationError(FSError):
|
||||
"""A copy/move operation has been called, but the destination is invalid."""
|
||||
|
||||
cls_message = "'{name}' is an invalid destination for this operation."
|
||||
|
||||
|
||||
class OperationError(FSError):
|
||||
"""A copy/move/delete operation has been called, but the checkup after the
|
||||
operation shows that it didn't work."""
|
||||
|
||||
cls_message = "Operation on '{name}' failed."
|
||||
|
||||
|
||||
class File:
|
||||
"""Represents a file and holds metadata to be used for scanning.
|
||||
"""
|
||||
|
||||
INITIAL_INFO = {
|
||||
'size': 0,
|
||||
'mtime': 0,
|
||||
'md5': '',
|
||||
'md5partial': '',
|
||||
"size": 0,
|
||||
"mtime": 0,
|
||||
"md5": "",
|
||||
"md5partial": "",
|
||||
}
|
||||
# Slots for File make us save quite a bit of memory. In a memory test I've made with a lot of
|
||||
# files, I saved 35% memory usage with "unread" files (no _read_info() call) and gains become
|
||||
# even greater when we take into account read attributes (70%!). Yeah, it's worth it.
|
||||
__slots__ = ('path', 'is_ref', 'words') + tuple(INITIAL_INFO.keys())
|
||||
__slots__ = ("path", "is_ref", "words") + tuple(INITIAL_INFO.keys())
|
||||
|
||||
def __init__(self, path):
|
||||
self.path = path
|
||||
@@ -90,25 +98,27 @@ class File:
|
||||
try:
|
||||
self._read_info(attrname)
|
||||
except Exception as e:
|
||||
logging.warning("An error '%s' was raised while decoding '%s'", e, repr(self.path))
|
||||
logging.warning(
|
||||
"An error '%s' was raised while decoding '%s'", e, repr(self.path)
|
||||
)
|
||||
result = object.__getattribute__(self, attrname)
|
||||
if result is NOT_SET:
|
||||
result = self.INITIAL_INFO[attrname]
|
||||
return result
|
||||
|
||||
#This offset is where we should start reading the file to get a partial md5
|
||||
#For audio file, it should be where audio data starts
|
||||
# This offset is where we should start reading the file to get a partial md5
|
||||
# For audio file, it should be where audio data starts
|
||||
def _get_md5partial_offset_and_size(self):
|
||||
return (0x4000, 0x4000) #16Kb
|
||||
return (0x4000, 0x4000) # 16Kb
|
||||
|
||||
def _read_info(self, field):
|
||||
if field in ('size', 'mtime'):
|
||||
if field in ("size", "mtime"):
|
||||
stats = self.path.stat()
|
||||
self.size = nonone(stats.st_size, 0)
|
||||
self.mtime = nonone(stats.st_mtime, 0)
|
||||
elif field == 'md5partial':
|
||||
elif field == "md5partial":
|
||||
try:
|
||||
fp = self.path.open('rb')
|
||||
fp = self.path.open("rb")
|
||||
offset, size = self._get_md5partial_offset_and_size()
|
||||
fp.seek(offset)
|
||||
partialdata = fp.read(size)
|
||||
@@ -117,14 +127,14 @@ class File:
|
||||
fp.close()
|
||||
except Exception:
|
||||
pass
|
||||
elif field == 'md5':
|
||||
elif field == "md5":
|
||||
try:
|
||||
fp = self.path.open('rb')
|
||||
fp = self.path.open("rb")
|
||||
md5 = hashlib.md5()
|
||||
# The goal here is to not run out of memory on really big files. However, the chunk
|
||||
# size has to be large enough so that the python loop isn't too costly in terms of
|
||||
# CPU.
|
||||
CHUNK_SIZE = 1024 * 1024 # 1 mb
|
||||
CHUNK_SIZE = 1024 * 1024 # 1 mb
|
||||
filedata = fp.read(CHUNK_SIZE)
|
||||
while filedata:
|
||||
md5.update(filedata)
|
||||
@@ -144,7 +154,7 @@ class File:
|
||||
for attrname in attrnames:
|
||||
getattr(self, attrname)
|
||||
|
||||
#--- Public
|
||||
# --- Public
|
||||
@classmethod
|
||||
def can_handle(cls, path):
|
||||
"""Returns whether this file wrapper class can handle ``path``.
|
||||
@@ -170,7 +180,7 @@ class File:
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
#--- Properties
|
||||
# --- Properties
|
||||
@property
|
||||
def extension(self):
|
||||
return get_file_ext(self.name)
|
||||
@@ -189,7 +199,8 @@ class Folder(File):
|
||||
|
||||
It has the size/md5 info of a File, but it's value are the sum of its subitems.
|
||||
"""
|
||||
__slots__ = File.__slots__ + ('_subfolders', )
|
||||
|
||||
__slots__ = File.__slots__ + ("_subfolders",)
|
||||
|
||||
def __init__(self, path):
|
||||
File.__init__(self, path)
|
||||
@@ -201,12 +212,12 @@ class Folder(File):
|
||||
return folders + files
|
||||
|
||||
def _read_info(self, field):
|
||||
if field in {'size', 'mtime'}:
|
||||
if field in {"size", "mtime"}:
|
||||
size = sum((f.size for f in self._all_items()), 0)
|
||||
self.size = size
|
||||
stats = self.path.stat()
|
||||
self.mtime = nonone(stats.st_mtime, 0)
|
||||
elif field in {'md5', 'md5partial'}:
|
||||
elif field in {"md5", "md5partial"}:
|
||||
# What's sensitive here is that we must make sure that subfiles'
|
||||
# md5 are always added up in the same order, but we also want a
|
||||
# different md5 if a file gets moved in a different subdirectory.
|
||||
@@ -214,7 +225,7 @@ class Folder(File):
|
||||
items = self._all_items()
|
||||
items.sort(key=lambda f: f.path)
|
||||
md5s = [getattr(f, field) for f in items]
|
||||
return b''.join(md5s)
|
||||
return b"".join(md5s)
|
||||
|
||||
md5 = hashlib.md5(get_dir_md5_concat())
|
||||
digest = md5.digest()
|
||||
@@ -223,7 +234,9 @@ class Folder(File):
|
||||
@property
|
||||
def subfolders(self):
|
||||
if self._subfolders is None:
|
||||
subfolders = [p for p in self.path.listdir() if not p.islink() and p.isdir()]
|
||||
subfolders = [
|
||||
p for p in self.path.listdir() if not p.islink() and p.isdir()
|
||||
]
|
||||
self._subfolders = [self.__class__(p) for p in subfolders]
|
||||
return self._subfolders
|
||||
|
||||
@@ -244,6 +257,7 @@ def get_file(path, fileclasses=[File]):
|
||||
if fileclass.can_handle(path):
|
||||
return fileclass(path)
|
||||
|
||||
|
||||
def get_files(path, fileclasses=[File]):
|
||||
"""Returns a list of :class:`File` for each file contained in ``path``.
|
||||
|
||||
|
||||
@@ -13,4 +13,3 @@ blue, which is supposed to be orange, does the sorting logic, holds selection, e
|
||||
|
||||
.. _cross-toolkit: http://www.hardcoded.net/articles/cross-toolkit-software
|
||||
"""
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
|
||||
from hscommon.notify import Listener
|
||||
|
||||
|
||||
class DupeGuruGUIObject(Listener):
|
||||
def __init__(self, app):
|
||||
Listener.__init__(self, app)
|
||||
@@ -27,4 +28,3 @@ class DupeGuruGUIObject(Listener):
|
||||
|
||||
def results_changed_but_keep_selection(self):
|
||||
pass
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# Created On: 2012-05-30
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
import os
|
||||
@@ -10,42 +10,46 @@ import os
|
||||
from hscommon.gui.base import GUIObject
|
||||
from hscommon.trans import tr
|
||||
|
||||
|
||||
class DeletionOptionsView:
|
||||
"""Expected interface for :class:`DeletionOptions`'s view.
|
||||
|
||||
|
||||
*Not actually used in the code. For documentation purposes only.*
|
||||
|
||||
|
||||
Our view presents the user with an appropriate way (probably a mix of checkboxes and radio
|
||||
buttons) to set the different flags in :class:`DeletionOptions`. Note that
|
||||
:attr:`DeletionOptions.use_hardlinks` is only relevant if :attr:`DeletionOptions.link_deleted`
|
||||
is true. This is why we toggle the "enabled" state of that flag.
|
||||
|
||||
|
||||
We expect the view to set :attr:`DeletionOptions.link_deleted` immediately as the user changes
|
||||
its value because it will toggle :meth:`set_hardlink_option_enabled`
|
||||
|
||||
|
||||
Other than the flags, there's also a prompt message which has a dynamic content, defined by
|
||||
:meth:`update_msg`.
|
||||
"""
|
||||
|
||||
def update_msg(self, msg: str):
|
||||
"""Update the dialog's prompt with ``str``.
|
||||
"""
|
||||
|
||||
|
||||
def show(self):
|
||||
"""Show the dialog in a modal fashion.
|
||||
|
||||
|
||||
Returns whether the dialog was "accepted" (the user pressed OK).
|
||||
"""
|
||||
|
||||
|
||||
def set_hardlink_option_enabled(self, is_enabled: bool):
|
||||
"""Enable or disable the widget controlling :attr:`DeletionOptions.use_hardlinks`.
|
||||
"""
|
||||
|
||||
|
||||
class DeletionOptions(GUIObject):
|
||||
"""Present the user with deletion options before proceeding.
|
||||
|
||||
|
||||
When the user activates "Send to trash", we present him with a couple of options that changes
|
||||
the behavior of that deletion operation.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
GUIObject.__init__(self)
|
||||
#: Whether symlinks or hardlinks are used when doing :attr:`link_deleted`.
|
||||
@@ -54,10 +58,10 @@ class DeletionOptions(GUIObject):
|
||||
#: Delete dupes directly and don't send to trash.
|
||||
#: *bool*. *get/set*
|
||||
self.direct = False
|
||||
|
||||
|
||||
def show(self, mark_count):
|
||||
"""Prompt the user with a modal dialog offering our deletion options.
|
||||
|
||||
|
||||
:param int mark_count: Number of dupes marked for deletion.
|
||||
:rtype: bool
|
||||
:returns: Whether the user accepted the dialog (we cancel deletion if false).
|
||||
@@ -69,7 +73,7 @@ class DeletionOptions(GUIObject):
|
||||
msg = tr("You are sending {} file(s) to the Trash.").format(mark_count)
|
||||
self.view.update_msg(msg)
|
||||
return self.view.show()
|
||||
|
||||
|
||||
def supports_links(self):
|
||||
"""Returns whether our platform supports symlinks.
|
||||
"""
|
||||
@@ -87,21 +91,19 @@ class DeletionOptions(GUIObject):
|
||||
except TypeError:
|
||||
# wrong number of arguments
|
||||
return True
|
||||
|
||||
|
||||
@property
|
||||
def link_deleted(self):
|
||||
"""Replace deleted dupes with symlinks (or hardlinks) to the dupe group reference.
|
||||
|
||||
|
||||
*bool*. *get/set*
|
||||
|
||||
|
||||
Whether the link is a symlink or hardlink is decided by :attr:`use_hardlinks`.
|
||||
"""
|
||||
return self._link_deleted
|
||||
|
||||
|
||||
@link_deleted.setter
|
||||
def link_deleted(self, value):
|
||||
self._link_deleted = value
|
||||
hardlinks_enabled = value and self.supports_links()
|
||||
self.view.set_hardlink_option_enabled(hardlinks_enabled)
|
||||
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
from hscommon.gui.base import GUIObject
|
||||
from .base import DupeGuruGUIObject
|
||||
|
||||
|
||||
class DetailsPanel(GUIObject, DupeGuruGUIObject):
|
||||
def __init__(self, app):
|
||||
GUIObject.__init__(self, multibind=True)
|
||||
@@ -19,7 +20,7 @@ class DetailsPanel(GUIObject, DupeGuruGUIObject):
|
||||
self._refresh()
|
||||
self.view.refresh()
|
||||
|
||||
#--- Private
|
||||
# --- Private
|
||||
def _refresh(self):
|
||||
if self.app.selected_dupes:
|
||||
dupe = self.app.selected_dupes[0]
|
||||
@@ -31,18 +32,19 @@ class DetailsPanel(GUIObject, DupeGuruGUIObject):
|
||||
# we don't want the two sides of the table to display the stats for the same file
|
||||
ref = group.ref if group is not None and group.ref is not dupe else None
|
||||
data2 = self.app.get_display_info(ref, group, False)
|
||||
columns = self.app.result_table.COLUMNS[1:] # first column is the 'marked' column
|
||||
columns = self.app.result_table.COLUMNS[
|
||||
1:
|
||||
] # first column is the 'marked' column
|
||||
self._table = [(c.display, data1[c.name], data2[c.name]) for c in columns]
|
||||
|
||||
#--- Public
|
||||
# --- Public
|
||||
def row_count(self):
|
||||
return len(self._table)
|
||||
|
||||
def row(self, row_index):
|
||||
return self._table[row_index]
|
||||
|
||||
#--- Event Handlers
|
||||
# --- Event Handlers
|
||||
def dupes_selected(self):
|
||||
self._refresh()
|
||||
self.view.refresh()
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2010-02-06
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from hscommon.gui.tree import Tree, Node
|
||||
@@ -13,6 +13,7 @@ from .base import DupeGuruGUIObject
|
||||
|
||||
STATE_ORDER = [DirectoryState.Normal, DirectoryState.Reference, DirectoryState.Excluded]
|
||||
|
||||
|
||||
# Lazily loads children
|
||||
class DirectoryNode(Node):
|
||||
def __init__(self, tree, path, name):
|
||||
@@ -21,29 +22,31 @@ class DirectoryNode(Node):
|
||||
self._directory_path = path
|
||||
self._loaded = False
|
||||
self._state = STATE_ORDER.index(self._tree.app.directories.get_state(path))
|
||||
|
||||
|
||||
def __len__(self):
|
||||
if not self._loaded:
|
||||
self._load()
|
||||
return Node.__len__(self)
|
||||
|
||||
|
||||
def _load(self):
|
||||
self.clear()
|
||||
subpaths = self._tree.app.directories.get_subfolders(self._directory_path)
|
||||
for path in subpaths:
|
||||
self.append(DirectoryNode(self._tree, path, path.name))
|
||||
self._loaded = True
|
||||
|
||||
|
||||
def update_all_states(self):
|
||||
self._state = STATE_ORDER.index(self._tree.app.directories.get_state(self._directory_path))
|
||||
self._state = STATE_ORDER.index(
|
||||
self._tree.app.directories.get_state(self._directory_path)
|
||||
)
|
||||
for node in self:
|
||||
node.update_all_states()
|
||||
|
||||
|
||||
# The state propery is an index to the combobox
|
||||
@property
|
||||
def state(self):
|
||||
return self._state
|
||||
|
||||
|
||||
@state.setter
|
||||
def state(self, value):
|
||||
if value == self._state:
|
||||
@@ -52,29 +55,29 @@ class DirectoryNode(Node):
|
||||
state = STATE_ORDER[value]
|
||||
self._tree.app.directories.set_state(self._directory_path, state)
|
||||
self._tree.update_all_states()
|
||||
|
||||
|
||||
|
||||
class DirectoryTree(Tree, DupeGuruGUIObject):
|
||||
#--- model -> view calls:
|
||||
# --- model -> view calls:
|
||||
# refresh()
|
||||
# refresh_states() # when only states label need to be refreshed
|
||||
#
|
||||
def __init__(self, app):
|
||||
Tree.__init__(self)
|
||||
DupeGuruGUIObject.__init__(self, app)
|
||||
|
||||
|
||||
def _view_updated(self):
|
||||
self._refresh()
|
||||
self.view.refresh()
|
||||
|
||||
|
||||
def _refresh(self):
|
||||
self.clear()
|
||||
for path in self.app.directories:
|
||||
self.append(DirectoryNode(self, path, str(path)))
|
||||
|
||||
|
||||
def add_directory(self, path):
|
||||
self.app.add_directory(path)
|
||||
|
||||
|
||||
def remove_selected(self):
|
||||
selected_paths = self.selected_paths
|
||||
if not selected_paths:
|
||||
@@ -90,18 +93,17 @@ class DirectoryTree(Tree, DupeGuruGUIObject):
|
||||
newstate = DirectoryState.Normal
|
||||
for node in nodes:
|
||||
node.state = newstate
|
||||
|
||||
|
||||
def select_all(self):
|
||||
self.selected_nodes = list(self)
|
||||
self.view.refresh()
|
||||
|
||||
|
||||
def update_all_states(self):
|
||||
for node in self:
|
||||
node.update_all_states()
|
||||
self.view.refresh_states()
|
||||
|
||||
#--- Event Handlers
|
||||
|
||||
# --- Event Handlers
|
||||
def directories_changed(self):
|
||||
self._refresh()
|
||||
self.view.refresh()
|
||||
|
||||
|
||||
@@ -8,8 +8,9 @@
|
||||
from hscommon.trans import tr
|
||||
from .ignore_list_table import IgnoreListTable
|
||||
|
||||
|
||||
class IgnoreListDialog:
|
||||
#--- View interface
|
||||
# --- View interface
|
||||
# show()
|
||||
#
|
||||
|
||||
@@ -21,7 +22,9 @@ class IgnoreListDialog:
|
||||
def clear(self):
|
||||
if not self.ignore_list:
|
||||
return
|
||||
msg = tr("Do you really want to remove all %d items from the ignore list?") % len(self.ignore_list)
|
||||
msg = tr(
|
||||
"Do you really want to remove all %d items from the ignore list?"
|
||||
) % len(self.ignore_list)
|
||||
if self.app.view.ask_yes_no(msg):
|
||||
self.ignore_list.Clear()
|
||||
self.refresh()
|
||||
@@ -36,4 +39,3 @@ class IgnoreListDialog:
|
||||
|
||||
def show(self):
|
||||
self.view.show()
|
||||
|
||||
|
||||
@@ -1,35 +1,36 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2012-03-13
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from hscommon.gui.table import GUITable, Row
|
||||
from hscommon.gui.column import Column, Columns
|
||||
from hscommon.trans import trget
|
||||
|
||||
coltr = trget('columns')
|
||||
coltr = trget("columns")
|
||||
|
||||
|
||||
class IgnoreListTable(GUITable):
|
||||
COLUMNS = [
|
||||
# the str concat below saves us needless localization.
|
||||
Column('path1', coltr("File Path") + " 1"),
|
||||
Column('path2', coltr("File Path") + " 2"),
|
||||
Column("path1", coltr("File Path") + " 1"),
|
||||
Column("path2", coltr("File Path") + " 2"),
|
||||
]
|
||||
|
||||
|
||||
def __init__(self, ignore_list_dialog):
|
||||
GUITable.__init__(self)
|
||||
self.columns = Columns(self)
|
||||
self.view = None
|
||||
self.dialog = ignore_list_dialog
|
||||
|
||||
#--- Override
|
||||
|
||||
# --- Override
|
||||
def _fill(self):
|
||||
for path1, path2 in self.dialog.ignore_list:
|
||||
self.append(IgnoreListRow(self, path1, path2))
|
||||
|
||||
|
||||
|
||||
class IgnoreListRow(Row):
|
||||
def __init__(self, table, path1, path2):
|
||||
@@ -38,4 +39,3 @@ class IgnoreListRow(Row):
|
||||
self.path2_original = path2
|
||||
self.path1 = str(path1)
|
||||
self.path2 = str(path2)
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
from hscommon.gui.base import GUIObject
|
||||
from hscommon.gui.selectable_list import GUISelectableList
|
||||
|
||||
|
||||
class CriterionCategoryList(GUISelectableList):
|
||||
def __init__(self, dialog):
|
||||
self.dialog = dialog
|
||||
@@ -18,6 +19,7 @@ class CriterionCategoryList(GUISelectableList):
|
||||
self.dialog.select_category(self.dialog.categories[self.selected_index])
|
||||
GUISelectableList._update_selection(self)
|
||||
|
||||
|
||||
class PrioritizationList(GUISelectableList):
|
||||
def __init__(self, dialog):
|
||||
self.dialog = dialog
|
||||
@@ -41,6 +43,7 @@ class PrioritizationList(GUISelectableList):
|
||||
del prilist[i]
|
||||
self._refresh_contents()
|
||||
|
||||
|
||||
class PrioritizeDialog(GUIObject):
|
||||
def __init__(self, app):
|
||||
GUIObject.__init__(self)
|
||||
@@ -52,15 +55,15 @@ class PrioritizeDialog(GUIObject):
|
||||
self.prioritizations = []
|
||||
self.prioritization_list = PrioritizationList(self)
|
||||
|
||||
#--- Override
|
||||
# --- Override
|
||||
def _view_updated(self):
|
||||
self.category_list.select(0)
|
||||
|
||||
#--- Private
|
||||
# --- Private
|
||||
def _sort_key(self, dupe):
|
||||
return tuple(crit.sort_key(dupe) for crit in self.prioritizations)
|
||||
|
||||
#--- Public
|
||||
# --- Public
|
||||
def select_category(self, category):
|
||||
self.criteria = category.criteria_list()
|
||||
self.criteria_list[:] = [c.display_value for c in self.criteria]
|
||||
|
||||
@@ -1,29 +1,29 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2010-04-12
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from hscommon import desktop
|
||||
|
||||
from .problem_table import ProblemTable
|
||||
|
||||
|
||||
class ProblemDialog:
|
||||
def __init__(self, app):
|
||||
self.app = app
|
||||
self._selected_dupe = None
|
||||
self.problem_table = ProblemTable(self)
|
||||
|
||||
|
||||
def refresh(self):
|
||||
self._selected_dupe = None
|
||||
self.problem_table.refresh()
|
||||
|
||||
|
||||
def reveal_selected_dupe(self):
|
||||
if self._selected_dupe is not None:
|
||||
desktop.reveal_path(self._selected_dupe.path)
|
||||
|
||||
|
||||
def select_dupe(self, dupe):
|
||||
self._selected_dupe = dupe
|
||||
|
||||
|
||||
@@ -1,39 +1,40 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2010-04-12
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from hscommon.gui.table import GUITable, Row
|
||||
from hscommon.gui.column import Column, Columns
|
||||
from hscommon.trans import trget
|
||||
|
||||
coltr = trget('columns')
|
||||
coltr = trget("columns")
|
||||
|
||||
|
||||
class ProblemTable(GUITable):
|
||||
COLUMNS = [
|
||||
Column('path', coltr("File Path")),
|
||||
Column('msg', coltr("Error Message")),
|
||||
Column("path", coltr("File Path")),
|
||||
Column("msg", coltr("Error Message")),
|
||||
]
|
||||
|
||||
|
||||
def __init__(self, problem_dialog):
|
||||
GUITable.__init__(self)
|
||||
self.columns = Columns(self)
|
||||
self.dialog = problem_dialog
|
||||
|
||||
#--- Override
|
||||
|
||||
# --- Override
|
||||
def _update_selection(self):
|
||||
row = self.selected_row
|
||||
dupe = row.dupe if row is not None else None
|
||||
self.dialog.select_dupe(dupe)
|
||||
|
||||
|
||||
def _fill(self):
|
||||
problems = self.dialog.app.results.problems
|
||||
for dupe, msg in problems:
|
||||
self.append(ProblemRow(self, dupe, msg))
|
||||
|
||||
|
||||
|
||||
class ProblemRow(Row):
|
||||
def __init__(self, table, dupe, msg):
|
||||
@@ -41,4 +42,3 @@ class ProblemRow(Row):
|
||||
self.dupe = dupe
|
||||
self.msg = msg
|
||||
self.path = str(dupe.path)
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2010-02-11
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from operator import attrgetter
|
||||
@@ -13,6 +13,7 @@ from hscommon.gui.column import Columns
|
||||
|
||||
from .base import DupeGuruGUIObject
|
||||
|
||||
|
||||
class DupeRow(Row):
|
||||
def __init__(self, table, group, dupe):
|
||||
Row.__init__(self, table)
|
||||
@@ -22,14 +23,14 @@ class DupeRow(Row):
|
||||
self._data = None
|
||||
self._data_delta = None
|
||||
self._delta_columns = None
|
||||
|
||||
|
||||
def is_cell_delta(self, column_name):
|
||||
"""Returns whether a cell is in delta mode (orange color).
|
||||
|
||||
|
||||
If the result table is in delta mode, returns True if the column is one of the "delta
|
||||
columns", that is, one of the columns that display a a differential value rather than an
|
||||
absolute value.
|
||||
|
||||
|
||||
If not, returns True if the dupe's value is different from its ref value.
|
||||
"""
|
||||
if not self.table.delta_values:
|
||||
@@ -42,62 +43,64 @@ class DupeRow(Row):
|
||||
dupe_info = self.data
|
||||
ref_info = self._group.ref.get_display_info(group=self._group, delta=False)
|
||||
for key, value in dupe_info.items():
|
||||
if (key not in self._delta_columns) and (ref_info[key].lower() != value.lower()):
|
||||
if (key not in self._delta_columns) and (
|
||||
ref_info[key].lower() != value.lower()
|
||||
):
|
||||
self._delta_columns.add(key)
|
||||
return column_name in self._delta_columns
|
||||
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
if self._data is None:
|
||||
self._data = self._app.get_display_info(self._dupe, self._group, False)
|
||||
return self._data
|
||||
|
||||
|
||||
@property
|
||||
def data_delta(self):
|
||||
if self._data_delta is None:
|
||||
self._data_delta = self._app.get_display_info(self._dupe, self._group, True)
|
||||
return self._data_delta
|
||||
|
||||
|
||||
@property
|
||||
def isref(self):
|
||||
return self._dupe is self._group.ref
|
||||
|
||||
|
||||
@property
|
||||
def markable(self):
|
||||
return self._app.results.is_markable(self._dupe)
|
||||
|
||||
|
||||
@property
|
||||
def marked(self):
|
||||
return self._app.results.is_marked(self._dupe)
|
||||
|
||||
|
||||
@marked.setter
|
||||
def marked(self, value):
|
||||
self._app.mark_dupe(self._dupe, value)
|
||||
|
||||
|
||||
|
||||
class ResultTable(GUITable, DupeGuruGUIObject):
|
||||
def __init__(self, app):
|
||||
GUITable.__init__(self)
|
||||
DupeGuruGUIObject.__init__(self, app)
|
||||
self.columns = Columns(self, prefaccess=app, savename='ResultTable')
|
||||
self.columns = Columns(self, prefaccess=app, savename="ResultTable")
|
||||
self._power_marker = False
|
||||
self._delta_values = False
|
||||
self._sort_descriptors = ('name', True)
|
||||
|
||||
#--- Override
|
||||
self._sort_descriptors = ("name", True)
|
||||
|
||||
# --- Override
|
||||
def _view_updated(self):
|
||||
self._refresh_with_view()
|
||||
|
||||
|
||||
def _restore_selection(self, previous_selection):
|
||||
if self.app.selected_dupes:
|
||||
to_find = set(self.app.selected_dupes)
|
||||
indexes = [i for i, r in enumerate(self) if r._dupe in to_find]
|
||||
self.selected_indexes = indexes
|
||||
|
||||
|
||||
def _update_selection(self):
|
||||
rows = self.selected_rows
|
||||
self.app._select_dupes(list(map(attrgetter('_dupe'), rows)))
|
||||
|
||||
self.app._select_dupes(list(map(attrgetter("_dupe"), rows)))
|
||||
|
||||
def _fill(self):
|
||||
if not self.power_marker:
|
||||
for group in self.app.results.groups:
|
||||
@@ -108,22 +111,22 @@ class ResultTable(GUITable, DupeGuruGUIObject):
|
||||
for dupe in self.app.results.dupes:
|
||||
group = self.app.results.get_group_of_duplicate(dupe)
|
||||
self.append(DupeRow(self, group, dupe))
|
||||
|
||||
|
||||
def _refresh_with_view(self):
|
||||
self.refresh()
|
||||
self.view.show_selected_row()
|
||||
|
||||
#--- Public
|
||||
|
||||
# --- Public
|
||||
def get_row_value(self, index, column):
|
||||
try:
|
||||
row = self[index]
|
||||
except IndexError:
|
||||
return '---'
|
||||
return "---"
|
||||
if self.delta_values:
|
||||
return row.data_delta[column]
|
||||
else:
|
||||
return row.data[column]
|
||||
|
||||
|
||||
def rename_selected(self, newname):
|
||||
row = self.selected_row
|
||||
if row is None:
|
||||
@@ -133,7 +136,7 @@ class ResultTable(GUITable, DupeGuruGUIObject):
|
||||
row._data = None
|
||||
row._data_delta = None
|
||||
return self.app.rename_selected(newname)
|
||||
|
||||
|
||||
def sort(self, key, asc):
|
||||
if self.power_marker:
|
||||
self.app.results.sort_dupes(key, asc, self.delta_values)
|
||||
@@ -141,12 +144,12 @@ class ResultTable(GUITable, DupeGuruGUIObject):
|
||||
self.app.results.sort_groups(key, asc)
|
||||
self._sort_descriptors = (key, asc)
|
||||
self._refresh_with_view()
|
||||
|
||||
#--- Properties
|
||||
|
||||
# --- Properties
|
||||
@property
|
||||
def power_marker(self):
|
||||
return self._power_marker
|
||||
|
||||
|
||||
@power_marker.setter
|
||||
def power_marker(self, value):
|
||||
if value == self._power_marker:
|
||||
@@ -155,29 +158,29 @@ class ResultTable(GUITable, DupeGuruGUIObject):
|
||||
key, asc = self._sort_descriptors
|
||||
self.sort(key, asc)
|
||||
# no need to refresh, it has happened in sort()
|
||||
|
||||
|
||||
@property
|
||||
def delta_values(self):
|
||||
return self._delta_values
|
||||
|
||||
|
||||
@delta_values.setter
|
||||
def delta_values(self, value):
|
||||
if value == self._delta_values:
|
||||
return
|
||||
self._delta_values = value
|
||||
self.refresh()
|
||||
|
||||
|
||||
@property
|
||||
def selected_dupe_count(self):
|
||||
return sum(1 for row in self.selected_rows if not row.isref)
|
||||
|
||||
#--- Event Handlers
|
||||
|
||||
# --- Event Handlers
|
||||
def marking_changed(self):
|
||||
self.view.invalidate_markings()
|
||||
|
||||
|
||||
def results_changed(self):
|
||||
self._refresh_with_view()
|
||||
|
||||
|
||||
def results_changed_but_keep_selection(self):
|
||||
# What we want to to here is that instead of restoring selected *dupes* after refresh, we
|
||||
# restore selected *paths*.
|
||||
@@ -185,7 +188,6 @@ class ResultTable(GUITable, DupeGuruGUIObject):
|
||||
self.refresh(refresh_view=False)
|
||||
self.select(indexes)
|
||||
self.view.refresh()
|
||||
|
||||
|
||||
def save_session(self):
|
||||
self.columns.save_columns()
|
||||
|
||||
|
||||
@@ -1,21 +1,23 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2010-02-11
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from .base import DupeGuruGUIObject
|
||||
|
||||
|
||||
class StatsLabel(DupeGuruGUIObject):
|
||||
def _view_updated(self):
|
||||
self.view.refresh()
|
||||
|
||||
|
||||
@property
|
||||
def display(self):
|
||||
return self.app.stat_line
|
||||
|
||||
|
||||
def results_changed(self):
|
||||
self.view.refresh()
|
||||
|
||||
marking_changed = results_changed
|
||||
|
||||
@@ -10,13 +10,15 @@ from xml.etree import ElementTree as ET
|
||||
|
||||
from hscommon.util import FileOrPath
|
||||
|
||||
|
||||
class IgnoreList:
|
||||
"""An ignore list implementation that is iterable, filterable and exportable to XML.
|
||||
|
||||
Call Ignore to add an ignore list entry, and AreIgnore to check if 2 items are in the list.
|
||||
When iterated, 2 sized tuples will be returned, the tuples containing 2 items ignored together.
|
||||
"""
|
||||
#---Override
|
||||
|
||||
# ---Override
|
||||
def __init__(self):
|
||||
self._ignored = {}
|
||||
self._count = 0
|
||||
@@ -29,7 +31,7 @@ class IgnoreList:
|
||||
def __len__(self):
|
||||
return self._count
|
||||
|
||||
#---Public
|
||||
# ---Public
|
||||
def AreIgnored(self, first, second):
|
||||
def do_check(first, second):
|
||||
try:
|
||||
@@ -99,14 +101,14 @@ class IgnoreList:
|
||||
root = ET.parse(infile).getroot()
|
||||
except Exception:
|
||||
return
|
||||
file_elems = (e for e in root if e.tag == 'file')
|
||||
file_elems = (e for e in root if e.tag == "file")
|
||||
for fn in file_elems:
|
||||
file_path = fn.get('path')
|
||||
file_path = fn.get("path")
|
||||
if not file_path:
|
||||
continue
|
||||
subfile_elems = (e for e in fn if e.tag == 'file')
|
||||
subfile_elems = (e for e in fn if e.tag == "file")
|
||||
for sfn in subfile_elems:
|
||||
subfile_path = sfn.get('path')
|
||||
subfile_path = sfn.get("path")
|
||||
if subfile_path:
|
||||
self.Ignore(file_path, subfile_path)
|
||||
|
||||
@@ -115,15 +117,13 @@ class IgnoreList:
|
||||
|
||||
outfile can be a file object or a filename.
|
||||
"""
|
||||
root = ET.Element('ignore_list')
|
||||
root = ET.Element("ignore_list")
|
||||
for filename, subfiles in self._ignored.items():
|
||||
file_node = ET.SubElement(root, 'file')
|
||||
file_node.set('path', filename)
|
||||
file_node = ET.SubElement(root, "file")
|
||||
file_node.set("path", filename)
|
||||
for subfilename in subfiles:
|
||||
subfile_node = ET.SubElement(file_node, 'file')
|
||||
subfile_node.set('path', subfilename)
|
||||
subfile_node = ET.SubElement(file_node, "file")
|
||||
subfile_node.set("path", subfilename)
|
||||
tree = ET.ElementTree(root)
|
||||
with FileOrPath(outfile, 'wb') as fp:
|
||||
tree.write(fp, encoding='utf-8')
|
||||
|
||||
|
||||
with FileOrPath(outfile, "wb") as fp:
|
||||
tree.write(fp, encoding="utf-8")
|
||||
|
||||
@@ -2,40 +2,41 @@
|
||||
# Created On: 2006/02/23
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
|
||||
class Markable:
|
||||
def __init__(self):
|
||||
self.__marked = set()
|
||||
self.__inverted = False
|
||||
|
||||
#---Virtual
|
||||
#About did_mark and did_unmark: They only happen what an object is actually added/removed
|
||||
|
||||
# ---Virtual
|
||||
# About did_mark and did_unmark: They only happen what an object is actually added/removed
|
||||
# in self.__marked, and is not affected by __inverted. Thus, self.mark while __inverted
|
||||
#is True will launch _DidUnmark.
|
||||
# is True will launch _DidUnmark.
|
||||
def _did_mark(self, o):
|
||||
pass
|
||||
|
||||
|
||||
def _did_unmark(self, o):
|
||||
pass
|
||||
|
||||
|
||||
def _get_markable_count(self):
|
||||
return 0
|
||||
|
||||
|
||||
def _is_markable(self, o):
|
||||
return True
|
||||
|
||||
#---Protected
|
||||
|
||||
# ---Protected
|
||||
def _remove_mark_flag(self, o):
|
||||
try:
|
||||
self.__marked.remove(o)
|
||||
self._did_unmark(o)
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
#---Public
|
||||
pass
|
||||
|
||||
# ---Public
|
||||
def is_marked(self, o):
|
||||
if not self._is_markable(o):
|
||||
return False
|
||||
@@ -43,31 +44,31 @@ class Markable:
|
||||
if self.__inverted:
|
||||
is_marked = not is_marked
|
||||
return is_marked
|
||||
|
||||
|
||||
def mark(self, o):
|
||||
if self.is_marked(o):
|
||||
return False
|
||||
if not self._is_markable(o):
|
||||
return False
|
||||
return self.mark_toggle(o)
|
||||
|
||||
|
||||
def mark_multiple(self, objects):
|
||||
for o in objects:
|
||||
self.mark(o)
|
||||
|
||||
|
||||
def mark_all(self):
|
||||
self.mark_none()
|
||||
self.__inverted = True
|
||||
|
||||
|
||||
def mark_invert(self):
|
||||
self.__inverted = not self.__inverted
|
||||
|
||||
|
||||
def mark_none(self):
|
||||
for o in self.__marked:
|
||||
self._did_unmark(o)
|
||||
self.__marked = set()
|
||||
self.__inverted = False
|
||||
|
||||
|
||||
def mark_toggle(self, o):
|
||||
try:
|
||||
self.__marked.remove(o)
|
||||
@@ -78,32 +79,33 @@ class Markable:
|
||||
self.__marked.add(o)
|
||||
self._did_mark(o)
|
||||
return True
|
||||
|
||||
|
||||
def mark_toggle_multiple(self, objects):
|
||||
for o in objects:
|
||||
self.mark_toggle(o)
|
||||
|
||||
|
||||
def unmark(self, o):
|
||||
if not self.is_marked(o):
|
||||
return False
|
||||
return self.mark_toggle(o)
|
||||
|
||||
|
||||
def unmark_multiple(self, objects):
|
||||
for o in objects:
|
||||
self.unmark(o)
|
||||
|
||||
#--- Properties
|
||||
|
||||
# --- Properties
|
||||
@property
|
||||
def mark_count(self):
|
||||
if self.__inverted:
|
||||
return self._get_markable_count() - len(self.__marked)
|
||||
else:
|
||||
return len(self.__marked)
|
||||
|
||||
|
||||
@property
|
||||
def mark_inverted(self):
|
||||
return self.__inverted
|
||||
|
||||
|
||||
class MarkableList(list, Markable):
|
||||
def __init__(self):
|
||||
list.__init__(self)
|
||||
|
||||
@@ -1 +1 @@
|
||||
from . import fs, prioritize, result_table, scanner # noqa
|
||||
from . import fs, prioritize, result_table, scanner # noqa
|
||||
|
||||
@@ -13,25 +13,37 @@ from core.util import format_timestamp, format_perc, format_words, format_dupe_c
|
||||
from core import fs
|
||||
|
||||
TAG_FIELDS = {
|
||||
'audiosize', 'duration', 'bitrate', 'samplerate', 'title', 'artist',
|
||||
'album', 'genre', 'year', 'track', 'comment'
|
||||
"audiosize",
|
||||
"duration",
|
||||
"bitrate",
|
||||
"samplerate",
|
||||
"title",
|
||||
"artist",
|
||||
"album",
|
||||
"genre",
|
||||
"year",
|
||||
"track",
|
||||
"comment",
|
||||
}
|
||||
|
||||
|
||||
class MusicFile(fs.File):
|
||||
INITIAL_INFO = fs.File.INITIAL_INFO.copy()
|
||||
INITIAL_INFO.update({
|
||||
'audiosize': 0,
|
||||
'bitrate': 0,
|
||||
'duration': 0,
|
||||
'samplerate': 0,
|
||||
'artist': '',
|
||||
'album': '',
|
||||
'title': '',
|
||||
'genre': '',
|
||||
'comment': '',
|
||||
'year': '',
|
||||
'track': 0,
|
||||
})
|
||||
INITIAL_INFO.update(
|
||||
{
|
||||
"audiosize": 0,
|
||||
"bitrate": 0,
|
||||
"duration": 0,
|
||||
"samplerate": 0,
|
||||
"artist": "",
|
||||
"album": "",
|
||||
"title": "",
|
||||
"genre": "",
|
||||
"comment": "",
|
||||
"year": "",
|
||||
"track": 0,
|
||||
}
|
||||
)
|
||||
__slots__ = fs.File.__slots__ + tuple(INITIAL_INFO.keys())
|
||||
|
||||
@classmethod
|
||||
@@ -60,26 +72,26 @@ class MusicFile(fs.File):
|
||||
else:
|
||||
percentage = group.percentage
|
||||
dupe_count = len(group.dupes)
|
||||
dupe_folder_path = getattr(self, 'display_folder_path', self.folder_path)
|
||||
dupe_folder_path = getattr(self, "display_folder_path", self.folder_path)
|
||||
return {
|
||||
'name': self.name,
|
||||
'folder_path': str(dupe_folder_path),
|
||||
'size': format_size(size, 2, 2, False),
|
||||
'duration': format_time(duration, with_hours=False),
|
||||
'bitrate': str(bitrate),
|
||||
'samplerate': str(samplerate),
|
||||
'extension': self.extension,
|
||||
'mtime': format_timestamp(mtime, delta and m),
|
||||
'title': self.title,
|
||||
'artist': self.artist,
|
||||
'album': self.album,
|
||||
'genre': self.genre,
|
||||
'year': self.year,
|
||||
'track': str(self.track),
|
||||
'comment': self.comment,
|
||||
'percentage': format_perc(percentage),
|
||||
'words': format_words(self.words) if hasattr(self, 'words') else '',
|
||||
'dupe_count': format_dupe_count(dupe_count),
|
||||
"name": self.name,
|
||||
"folder_path": str(dupe_folder_path),
|
||||
"size": format_size(size, 2, 2, False),
|
||||
"duration": format_time(duration, with_hours=False),
|
||||
"bitrate": str(bitrate),
|
||||
"samplerate": str(samplerate),
|
||||
"extension": self.extension,
|
||||
"mtime": format_timestamp(mtime, delta and m),
|
||||
"title": self.title,
|
||||
"artist": self.artist,
|
||||
"album": self.album,
|
||||
"genre": self.genre,
|
||||
"year": self.year,
|
||||
"track": str(self.track),
|
||||
"comment": self.comment,
|
||||
"percentage": format_perc(percentage),
|
||||
"words": format_words(self.words) if hasattr(self, "words") else "",
|
||||
"dupe_count": format_dupe_count(dupe_count),
|
||||
}
|
||||
|
||||
def _get_md5partial_offset_and_size(self):
|
||||
@@ -101,4 +113,3 @@ class MusicFile(fs.File):
|
||||
self.comment = f.comment
|
||||
self.year = f.year
|
||||
self.track = f.track
|
||||
|
||||
|
||||
@@ -8,11 +8,16 @@
|
||||
from hscommon.trans import trget
|
||||
|
||||
from core.prioritize import (
|
||||
KindCategory, FolderCategory, FilenameCategory, NumericalCategory,
|
||||
SizeCategory, MtimeCategory
|
||||
KindCategory,
|
||||
FolderCategory,
|
||||
FilenameCategory,
|
||||
NumericalCategory,
|
||||
SizeCategory,
|
||||
MtimeCategory,
|
||||
)
|
||||
|
||||
coltr = trget('columns')
|
||||
coltr = trget("columns")
|
||||
|
||||
|
||||
class DurationCategory(NumericalCategory):
|
||||
NAME = coltr("Duration")
|
||||
@@ -20,21 +25,29 @@ class DurationCategory(NumericalCategory):
|
||||
def extract_value(self, dupe):
|
||||
return dupe.duration
|
||||
|
||||
|
||||
class BitrateCategory(NumericalCategory):
|
||||
NAME = coltr("Bitrate")
|
||||
|
||||
def extract_value(self, dupe):
|
||||
return dupe.bitrate
|
||||
|
||||
|
||||
class SamplerateCategory(NumericalCategory):
|
||||
NAME = coltr("Samplerate")
|
||||
|
||||
def extract_value(self, dupe):
|
||||
return dupe.samplerate
|
||||
|
||||
|
||||
def all_categories():
|
||||
return [
|
||||
KindCategory, FolderCategory, FilenameCategory, SizeCategory, DurationCategory,
|
||||
BitrateCategory, SamplerateCategory, MtimeCategory
|
||||
KindCategory,
|
||||
FolderCategory,
|
||||
FilenameCategory,
|
||||
SizeCategory,
|
||||
DurationCategory,
|
||||
BitrateCategory,
|
||||
SamplerateCategory,
|
||||
MtimeCategory,
|
||||
]
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# Created On: 2011-11-27
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from hscommon.gui.column import Column
|
||||
@@ -10,28 +10,29 @@ from hscommon.trans import trget
|
||||
|
||||
from core.gui.result_table import ResultTable as ResultTableBase
|
||||
|
||||
coltr = trget('columns')
|
||||
coltr = trget("columns")
|
||||
|
||||
|
||||
class ResultTable(ResultTableBase):
|
||||
COLUMNS = [
|
||||
Column('marked', ''),
|
||||
Column('name', coltr("Filename")),
|
||||
Column('folder_path', coltr("Folder"), visible=False, optional=True),
|
||||
Column('size', coltr("Size (MB)"), optional=True),
|
||||
Column('duration', coltr("Time"), optional=True),
|
||||
Column('bitrate', coltr("Bitrate"), optional=True),
|
||||
Column('samplerate', coltr("Sample Rate"), visible=False, optional=True),
|
||||
Column('extension', coltr("Kind"), optional=True),
|
||||
Column('mtime', coltr("Modification"), visible=False, optional=True),
|
||||
Column('title', coltr("Title"), visible=False, optional=True),
|
||||
Column('artist', coltr("Artist"), visible=False, optional=True),
|
||||
Column('album', coltr("Album"), visible=False, optional=True),
|
||||
Column('genre', coltr("Genre"), visible=False, optional=True),
|
||||
Column('year', coltr("Year"), visible=False, optional=True),
|
||||
Column('track', coltr("Track Number"), visible=False, optional=True),
|
||||
Column('comment', coltr("Comment"), visible=False, optional=True),
|
||||
Column('percentage', coltr("Match %"), optional=True),
|
||||
Column('words', coltr("Words Used"), visible=False, optional=True),
|
||||
Column('dupe_count', coltr("Dupe Count"), visible=False, optional=True),
|
||||
Column("marked", ""),
|
||||
Column("name", coltr("Filename")),
|
||||
Column("folder_path", coltr("Folder"), visible=False, optional=True),
|
||||
Column("size", coltr("Size (MB)"), optional=True),
|
||||
Column("duration", coltr("Time"), optional=True),
|
||||
Column("bitrate", coltr("Bitrate"), optional=True),
|
||||
Column("samplerate", coltr("Sample Rate"), visible=False, optional=True),
|
||||
Column("extension", coltr("Kind"), optional=True),
|
||||
Column("mtime", coltr("Modification"), visible=False, optional=True),
|
||||
Column("title", coltr("Title"), visible=False, optional=True),
|
||||
Column("artist", coltr("Artist"), visible=False, optional=True),
|
||||
Column("album", coltr("Album"), visible=False, optional=True),
|
||||
Column("genre", coltr("Genre"), visible=False, optional=True),
|
||||
Column("year", coltr("Year"), visible=False, optional=True),
|
||||
Column("track", coltr("Track Number"), visible=False, optional=True),
|
||||
Column("comment", coltr("Comment"), visible=False, optional=True),
|
||||
Column("percentage", coltr("Match %"), optional=True),
|
||||
Column("words", coltr("Words Used"), visible=False, optional=True),
|
||||
Column("dupe_count", coltr("Dupe Count"), visible=False, optional=True),
|
||||
]
|
||||
DELTA_COLUMNS = {'size', 'duration', 'bitrate', 'samplerate', 'mtime'}
|
||||
DELTA_COLUMNS = {"size", "duration", "bitrate", "samplerate", "mtime"}
|
||||
|
||||
@@ -8,6 +8,7 @@ from hscommon.trans import tr
|
||||
|
||||
from core.scanner import Scanner as ScannerBase, ScanOption, ScanType
|
||||
|
||||
|
||||
class ScannerME(ScannerBase):
|
||||
@staticmethod
|
||||
def _key_func(dupe):
|
||||
@@ -22,5 +23,3 @@ class ScannerME(ScannerBase):
|
||||
ScanOption(ScanType.Tag, tr("Tags")),
|
||||
ScanOption(ScanType.Contents, tr("Contents")),
|
||||
]
|
||||
|
||||
|
||||
|
||||
@@ -1 +1,12 @@
|
||||
from . import block, cache, exif, iphoto_plist, matchblock, matchexif, photo, prioritize, result_table, scanner # noqa
|
||||
from . import ( # noqa
|
||||
block,
|
||||
cache,
|
||||
exif,
|
||||
iphoto_plist,
|
||||
matchblock,
|
||||
matchexif,
|
||||
photo,
|
||||
prioritize,
|
||||
result_table,
|
||||
scanner,
|
||||
)
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from ._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2 # NOQA
|
||||
from ._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2 # NOQA
|
||||
|
||||
# Converted to C
|
||||
# def getblock(image):
|
||||
|
||||
@@ -4,7 +4,8 @@
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from ._cache import string_to_colors # noqa
|
||||
from ._cache import string_to_colors # noqa
|
||||
|
||||
|
||||
def colors_to_string(colors):
|
||||
"""Transform the 3 sized tuples 'colors' into a hex string.
|
||||
@@ -12,7 +13,8 @@ def colors_to_string(colors):
|
||||
[(0,100,255)] --> 0064ff
|
||||
[(1,2,3),(4,5,6)] --> 010203040506
|
||||
"""
|
||||
return ''.join('%02x%02x%02x' % (r, g, b) for r, g, b in colors)
|
||||
return "".join("%02x%02x%02x" % (r, g, b) for r, g, b in colors)
|
||||
|
||||
|
||||
# This function is an important bottleneck of dupeGuru PE. It has been converted to C.
|
||||
# def string_to_colors(s):
|
||||
@@ -23,4 +25,3 @@ def colors_to_string(colors):
|
||||
# number = int(s[i:i+6], 16)
|
||||
# result.append((number >> 16, (number >> 8) & 0xff, number & 0xff))
|
||||
# return result
|
||||
|
||||
|
||||
@@ -12,29 +12,36 @@ from collections import namedtuple
|
||||
|
||||
from .cache import string_to_colors, colors_to_string
|
||||
|
||||
|
||||
def wrap_path(path):
|
||||
return 'path:{}'.format(path)
|
||||
return "path:{}".format(path)
|
||||
|
||||
|
||||
def unwrap_path(key):
|
||||
return key[5:]
|
||||
|
||||
|
||||
def wrap_id(path):
|
||||
return 'id:{}'.format(path)
|
||||
return "id:{}".format(path)
|
||||
|
||||
|
||||
def unwrap_id(key):
|
||||
return int(key[3:])
|
||||
|
||||
CacheRow = namedtuple('CacheRow', 'id path blocks mtime')
|
||||
|
||||
CacheRow = namedtuple("CacheRow", "id path blocks mtime")
|
||||
|
||||
|
||||
class ShelveCache:
|
||||
"""A class to cache picture blocks in a shelve backend.
|
||||
"""
|
||||
|
||||
def __init__(self, db=None, readonly=False):
|
||||
self.istmp = db is None
|
||||
if self.istmp:
|
||||
self.dtmp = tempfile.mkdtemp()
|
||||
self.ftmp = db = op.join(self.dtmp, 'tmpdb')
|
||||
flag = 'r' if readonly else 'c'
|
||||
self.ftmp = db = op.join(self.dtmp, "tmpdb")
|
||||
flag = "r" if readonly else "c"
|
||||
self.shelve = shelve.open(db, flag)
|
||||
self.maxid = self._compute_maxid()
|
||||
|
||||
@@ -54,10 +61,10 @@ class ShelveCache:
|
||||
return string_to_colors(self.shelve[skey].blocks)
|
||||
|
||||
def __iter__(self):
|
||||
return (unwrap_path(k) for k in self.shelve if k.startswith('path:'))
|
||||
return (unwrap_path(k) for k in self.shelve if k.startswith("path:"))
|
||||
|
||||
def __len__(self):
|
||||
return sum(1 for k in self.shelve if k.startswith('path:'))
|
||||
return sum(1 for k in self.shelve if k.startswith("path:"))
|
||||
|
||||
def __setitem__(self, path_str, blocks):
|
||||
blocks = colors_to_string(blocks)
|
||||
@@ -74,7 +81,9 @@ class ShelveCache:
|
||||
self.shelve[wrap_id(rowid)] = wrap_path(path_str)
|
||||
|
||||
def _compute_maxid(self):
|
||||
return max((unwrap_id(k) for k in self.shelve if k.startswith('id:')), default=1)
|
||||
return max(
|
||||
(unwrap_id(k) for k in self.shelve if k.startswith("id:")), default=1
|
||||
)
|
||||
|
||||
def _get_new_id(self):
|
||||
self.maxid += 1
|
||||
@@ -133,4 +142,3 @@ class ShelveCache:
|
||||
# #402 and #439. I don't think it hurts to silently ignore the error, so that's
|
||||
# what we do
|
||||
pass
|
||||
|
||||
|
||||
@@ -11,10 +11,12 @@ import sqlite3 as sqlite
|
||||
|
||||
from .cache import string_to_colors, colors_to_string
|
||||
|
||||
|
||||
class SqliteCache:
|
||||
"""A class to cache picture blocks in a sqlite backend.
|
||||
"""
|
||||
def __init__(self, db=':memory:', readonly=False):
|
||||
|
||||
def __init__(self, db=":memory:", readonly=False):
|
||||
# readonly is not used in the sqlite version of the cache
|
||||
self.dbname = db
|
||||
self.con = None
|
||||
@@ -67,34 +69,40 @@ class SqliteCache:
|
||||
try:
|
||||
self.con.execute(sql, [blocks, mtime, path_str])
|
||||
except sqlite.OperationalError:
|
||||
logging.warning('Picture cache could not set value for key %r', path_str)
|
||||
logging.warning("Picture cache could not set value for key %r", path_str)
|
||||
except sqlite.DatabaseError as e:
|
||||
logging.warning('DatabaseError while setting value for key %r: %s', path_str, str(e))
|
||||
logging.warning(
|
||||
"DatabaseError while setting value for key %r: %s", path_str, str(e)
|
||||
)
|
||||
|
||||
def _create_con(self, second_try=False):
|
||||
def create_tables():
|
||||
logging.debug("Creating picture cache tables.")
|
||||
self.con.execute("drop table if exists pictures")
|
||||
self.con.execute("drop index if exists idx_path")
|
||||
self.con.execute("create table pictures(path TEXT, mtime INTEGER, blocks TEXT)")
|
||||
self.con.execute(
|
||||
"create table pictures(path TEXT, mtime INTEGER, blocks TEXT)"
|
||||
)
|
||||
self.con.execute("create index idx_path on pictures (path)")
|
||||
|
||||
self.con = sqlite.connect(self.dbname, isolation_level=None)
|
||||
try:
|
||||
self.con.execute("select path, mtime, blocks from pictures where 1=2")
|
||||
except sqlite.OperationalError: # new db
|
||||
except sqlite.OperationalError: # new db
|
||||
create_tables()
|
||||
except sqlite.DatabaseError as e: # corrupted db
|
||||
except sqlite.DatabaseError as e: # corrupted db
|
||||
if second_try:
|
||||
raise # Something really strange is happening
|
||||
logging.warning('Could not create picture cache because of an error: %s', str(e))
|
||||
raise # Something really strange is happening
|
||||
logging.warning(
|
||||
"Could not create picture cache because of an error: %s", str(e)
|
||||
)
|
||||
self.con.close()
|
||||
os.remove(self.dbname)
|
||||
self._create_con(second_try=True)
|
||||
|
||||
def clear(self):
|
||||
self.close()
|
||||
if self.dbname != ':memory:':
|
||||
if self.dbname != ":memory:":
|
||||
os.remove(self.dbname)
|
||||
self._create_con()
|
||||
|
||||
@@ -117,7 +125,9 @@ class SqliteCache:
|
||||
raise ValueError(path)
|
||||
|
||||
def get_multiple(self, rowids):
|
||||
sql = "select rowid, blocks from pictures where rowid in (%s)" % ','.join(map(str, rowids))
|
||||
sql = "select rowid, blocks from pictures where rowid in (%s)" % ",".join(
|
||||
map(str, rowids)
|
||||
)
|
||||
cur = self.con.execute(sql)
|
||||
return ((rowid, string_to_colors(blocks)) for rowid, blocks in cur)
|
||||
|
||||
@@ -138,6 +148,7 @@ class SqliteCache:
|
||||
continue
|
||||
todelete.append(rowid)
|
||||
if todelete:
|
||||
sql = "delete from pictures where rowid in (%s)" % ','.join(map(str, todelete))
|
||||
sql = "delete from pictures where rowid in (%s)" % ",".join(
|
||||
map(str, todelete)
|
||||
)
|
||||
self.con.execute(sql)
|
||||
|
||||
|
||||
@@ -83,17 +83,17 @@ EXIF_TAGS = {
|
||||
0xA003: "PixelYDimension",
|
||||
0xA004: "RelatedSoundFile",
|
||||
0xA005: "InteroperabilityIFDPointer",
|
||||
0xA20B: "FlashEnergy", # 0x920B in TIFF/EP
|
||||
0xA20C: "SpatialFrequencyResponse", # 0x920C - -
|
||||
0xA20E: "FocalPlaneXResolution", # 0x920E - -
|
||||
0xA20F: "FocalPlaneYResolution", # 0x920F - -
|
||||
0xA210: "FocalPlaneResolutionUnit", # 0x9210 - -
|
||||
0xA214: "SubjectLocation", # 0x9214 - -
|
||||
0xA215: "ExposureIndex", # 0x9215 - -
|
||||
0xA217: "SensingMethod", # 0x9217 - -
|
||||
0xA20B: "FlashEnergy", # 0x920B in TIFF/EP
|
||||
0xA20C: "SpatialFrequencyResponse", # 0x920C - -
|
||||
0xA20E: "FocalPlaneXResolution", # 0x920E - -
|
||||
0xA20F: "FocalPlaneYResolution", # 0x920F - -
|
||||
0xA210: "FocalPlaneResolutionUnit", # 0x9210 - -
|
||||
0xA214: "SubjectLocation", # 0x9214 - -
|
||||
0xA215: "ExposureIndex", # 0x9215 - -
|
||||
0xA217: "SensingMethod", # 0x9217 - -
|
||||
0xA300: "FileSource",
|
||||
0xA301: "SceneType",
|
||||
0xA302: "CFAPattern", # 0x828E in TIFF/EP
|
||||
0xA302: "CFAPattern", # 0x828E in TIFF/EP
|
||||
0xA401: "CustomRendered",
|
||||
0xA402: "ExposureMode",
|
||||
0xA403: "WhiteBalance",
|
||||
@@ -148,17 +148,18 @@ GPS_TA0GS = {
|
||||
0x1B: "GPSProcessingMethod",
|
||||
0x1C: "GPSAreaInformation",
|
||||
0x1D: "GPSDateStamp",
|
||||
0x1E: "GPSDifferential"
|
||||
0x1E: "GPSDifferential",
|
||||
}
|
||||
|
||||
INTEL_ENDIAN = ord('I')
|
||||
MOTOROLA_ENDIAN = ord('M')
|
||||
INTEL_ENDIAN = ord("I")
|
||||
MOTOROLA_ENDIAN = ord("M")
|
||||
|
||||
# About MAX_COUNT: It's possible to have corrupted exif tags where the entry count is way too high
|
||||
# and thus makes us loop, not endlessly, but for heck of a long time for nothing. Therefore, we put
|
||||
# an arbitrary limit on the entry count we'll allow ourselves to read and any IFD reporting more
|
||||
# entries than that will be considered corrupt.
|
||||
MAX_COUNT = 0xffff
|
||||
MAX_COUNT = 0xFFFF
|
||||
|
||||
|
||||
def s2n_motorola(bytes):
|
||||
x = 0
|
||||
@@ -166,6 +167,7 @@ def s2n_motorola(bytes):
|
||||
x = (x << 8) | c
|
||||
return x
|
||||
|
||||
|
||||
def s2n_intel(bytes):
|
||||
x = 0
|
||||
y = 0
|
||||
@@ -174,13 +176,14 @@ def s2n_intel(bytes):
|
||||
y = y + 8
|
||||
return x
|
||||
|
||||
|
||||
class Fraction:
|
||||
def __init__(self, num, den):
|
||||
self.num = num
|
||||
self.den = den
|
||||
|
||||
def __repr__(self):
|
||||
return '%d/%d' % (self.num, self.den)
|
||||
return "%d/%d" % (self.num, self.den)
|
||||
|
||||
|
||||
class TIFF_file:
|
||||
@@ -190,16 +193,22 @@ class TIFF_file:
|
||||
self.s2nfunc = s2n_intel if self.endian == INTEL_ENDIAN else s2n_motorola
|
||||
|
||||
def s2n(self, offset, length, signed=0, debug=False):
|
||||
slice = self.data[offset:offset+length]
|
||||
slice = self.data[offset : offset + length]
|
||||
val = self.s2nfunc(slice)
|
||||
# Sign extension ?
|
||||
if signed:
|
||||
msb = 1 << (8*length - 1)
|
||||
msb = 1 << (8 * length - 1)
|
||||
if val & msb:
|
||||
val = val - (msb << 1)
|
||||
if debug:
|
||||
logging.debug(self.endian)
|
||||
logging.debug("Slice for offset %d length %d: %r and value: %d", offset, length, slice, val)
|
||||
logging.debug(
|
||||
"Slice for offset %d length %d: %r and value: %d",
|
||||
offset,
|
||||
length,
|
||||
slice,
|
||||
val,
|
||||
)
|
||||
return val
|
||||
|
||||
def first_IFD(self):
|
||||
@@ -225,30 +234,31 @@ class TIFF_file:
|
||||
return []
|
||||
a = []
|
||||
for i in range(entries):
|
||||
entry = ifd + 2 + 12*i
|
||||
entry = ifd + 2 + 12 * i
|
||||
tag = self.s2n(entry, 2)
|
||||
type = self.s2n(entry+2, 2)
|
||||
type = self.s2n(entry + 2, 2)
|
||||
if not 1 <= type <= 10:
|
||||
continue # not handled
|
||||
typelen = [1, 1, 2, 4, 8, 1, 1, 2, 4, 8][type-1]
|
||||
count = self.s2n(entry+4, 4)
|
||||
continue # not handled
|
||||
typelen = [1, 1, 2, 4, 8, 1, 1, 2, 4, 8][type - 1]
|
||||
count = self.s2n(entry + 4, 4)
|
||||
if count > MAX_COUNT:
|
||||
logging.debug("Probably corrupt. Aborting.")
|
||||
return []
|
||||
offset = entry+8
|
||||
if count*typelen > 4:
|
||||
offset = entry + 8
|
||||
if count * typelen > 4:
|
||||
offset = self.s2n(offset, 4)
|
||||
if type == 2:
|
||||
# Special case: nul-terminated ASCII string
|
||||
values = str(self.data[offset:offset+count-1], encoding='latin-1')
|
||||
values = str(self.data[offset : offset + count - 1], encoding="latin-1")
|
||||
else:
|
||||
values = []
|
||||
signed = (type == 6 or type >= 8)
|
||||
signed = type == 6 or type >= 8
|
||||
for j in range(count):
|
||||
if type in {5, 10}:
|
||||
# The type is either 5 or 10
|
||||
value_j = Fraction(self.s2n(offset, 4, signed),
|
||||
self.s2n(offset+4, 4, signed))
|
||||
value_j = Fraction(
|
||||
self.s2n(offset, 4, signed), self.s2n(offset + 4, 4, signed)
|
||||
)
|
||||
else:
|
||||
# Not a fraction
|
||||
value_j = self.s2n(offset, typelen, signed)
|
||||
@@ -258,32 +268,37 @@ class TIFF_file:
|
||||
a.append((tag, type, values))
|
||||
return a
|
||||
|
||||
|
||||
def read_exif_header(fp):
|
||||
# If `fp`'s first bytes are not exif, it tries to find it in the next 4kb
|
||||
def isexif(data):
|
||||
return data[0:4] == b'\377\330\377\341' and data[6:10] == b'Exif'
|
||||
return data[0:4] == b"\377\330\377\341" and data[6:10] == b"Exif"
|
||||
|
||||
data = fp.read(12)
|
||||
if isexif(data):
|
||||
return data
|
||||
# ok, not exif, try to find it
|
||||
large_data = fp.read(4096)
|
||||
try:
|
||||
index = large_data.index(b'Exif')
|
||||
data = large_data[index-6:index+6]
|
||||
index = large_data.index(b"Exif")
|
||||
data = large_data[index - 6 : index + 6]
|
||||
# large_data omits the first 12 bytes, and the index is at the middle of the header, so we
|
||||
# must seek index + 18
|
||||
fp.seek(index+18)
|
||||
fp.seek(index + 18)
|
||||
return data
|
||||
except ValueError:
|
||||
raise ValueError("Not an Exif file")
|
||||
|
||||
|
||||
def get_fields(fp):
|
||||
data = read_exif_header(fp)
|
||||
length = data[4] * 256 + data[5]
|
||||
logging.debug("Exif header length: %d bytes", length)
|
||||
data = fp.read(length-8)
|
||||
data = fp.read(length - 8)
|
||||
data_format = data[0]
|
||||
logging.debug("%s format", {INTEL_ENDIAN: 'Intel', MOTOROLA_ENDIAN: 'Motorola'}[data_format])
|
||||
logging.debug(
|
||||
"%s format", {INTEL_ENDIAN: "Intel", MOTOROLA_ENDIAN: "Motorola"}[data_format]
|
||||
)
|
||||
T = TIFF_file(data)
|
||||
# There may be more than one IFD per file, but we only read the first one because others are
|
||||
# most likely thumbnails.
|
||||
@@ -294,9 +309,9 @@ def get_fields(fp):
|
||||
try:
|
||||
stag = EXIF_TAGS[tag]
|
||||
except KeyError:
|
||||
stag = '0x%04X' % tag
|
||||
stag = "0x%04X" % tag
|
||||
if stag in result:
|
||||
return # don't overwrite data
|
||||
return # don't overwrite data
|
||||
result[stag] = values
|
||||
|
||||
logging.debug("IFD at offset %d", main_IFD_offset)
|
||||
|
||||
@@ -1,24 +1,26 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2014-03-15
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
import plistlib
|
||||
|
||||
|
||||
class IPhotoPlistParser(plistlib._PlistParser):
|
||||
"""A parser for iPhoto plists.
|
||||
|
||||
iPhoto plists tend to be malformed, so we have to subclass the built-in parser to be a bit more
|
||||
lenient.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
plistlib._PlistParser.__init__(self, use_builtin_types=True, dict_type=dict)
|
||||
# For debugging purposes, we remember the last bit of data to be analyzed so that we can
|
||||
# log it in case of an exception
|
||||
self.lastdata = ''
|
||||
self.lastdata = ""
|
||||
|
||||
def get_data(self):
|
||||
self.lastdata = plistlib._PlistParser.get_data(self)
|
||||
|
||||
@@ -48,14 +48,18 @@ except Exception:
|
||||
logging.warning("Had problems to determine cpu count on launch.")
|
||||
RESULTS_QUEUE_LIMIT = 8
|
||||
|
||||
|
||||
def get_cache(cache_path, readonly=False):
|
||||
if cache_path.endswith('shelve'):
|
||||
if cache_path.endswith("shelve"):
|
||||
from .cache_shelve import ShelveCache
|
||||
|
||||
return ShelveCache(cache_path, readonly=readonly)
|
||||
else:
|
||||
from .cache_sqlite import SqliteCache
|
||||
|
||||
return SqliteCache(cache_path, readonly=readonly)
|
||||
|
||||
|
||||
def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
|
||||
# The MemoryError handlers in there use logging without first caring about whether or not
|
||||
# there is enough memory left to carry on the operation because it is assumed that the
|
||||
@@ -63,7 +67,7 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
|
||||
# time that MemoryError is raised.
|
||||
cache = get_cache(cache_path)
|
||||
cache.purge_outdated()
|
||||
prepared = [] # only pictures for which there was no error getting blocks
|
||||
prepared = [] # only pictures for which there was no error getting blocks
|
||||
try:
|
||||
for picture in j.iter_with_progress(pictures, tr("Analyzed %d/%d pictures")):
|
||||
if not picture.path:
|
||||
@@ -77,7 +81,7 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
|
||||
picture.unicode_path = str(picture.path)
|
||||
logging.debug("Analyzing picture at %s", picture.unicode_path)
|
||||
if with_dimensions:
|
||||
picture.dimensions # pre-read dimensions
|
||||
picture.dimensions # pre-read dimensions
|
||||
try:
|
||||
if picture.unicode_path not in cache:
|
||||
blocks = picture.get_blocks(BLOCK_COUNT_PER_SIDE)
|
||||
@@ -86,32 +90,45 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
|
||||
except (IOError, ValueError) as e:
|
||||
logging.warning(str(e))
|
||||
except MemoryError:
|
||||
logging.warning("Ran out of memory while reading %s of size %d", picture.unicode_path, picture.size)
|
||||
if picture.size < 10 * 1024 * 1024: # We're really running out of memory
|
||||
logging.warning(
|
||||
"Ran out of memory while reading %s of size %d",
|
||||
picture.unicode_path,
|
||||
picture.size,
|
||||
)
|
||||
if (
|
||||
picture.size < 10 * 1024 * 1024
|
||||
): # We're really running out of memory
|
||||
raise
|
||||
except MemoryError:
|
||||
logging.warning('Ran out of memory while preparing pictures')
|
||||
logging.warning("Ran out of memory while preparing pictures")
|
||||
cache.close()
|
||||
return prepared
|
||||
|
||||
|
||||
def get_chunks(pictures):
|
||||
min_chunk_count = multiprocessing.cpu_count() * 2 # have enough chunks to feed all subprocesses
|
||||
min_chunk_count = (
|
||||
multiprocessing.cpu_count() * 2
|
||||
) # have enough chunks to feed all subprocesses
|
||||
chunk_count = len(pictures) // DEFAULT_CHUNK_SIZE
|
||||
chunk_count = max(min_chunk_count, chunk_count)
|
||||
chunk_size = (len(pictures) // chunk_count) + 1
|
||||
chunk_size = max(MIN_CHUNK_SIZE, chunk_size)
|
||||
logging.info(
|
||||
"Creating %d chunks with a chunk size of %d for %d pictures", chunk_count,
|
||||
chunk_size, len(pictures)
|
||||
"Creating %d chunks with a chunk size of %d for %d pictures",
|
||||
chunk_count,
|
||||
chunk_size,
|
||||
len(pictures),
|
||||
)
|
||||
chunks = [pictures[i:i+chunk_size] for i in range(0, len(pictures), chunk_size)]
|
||||
chunks = [pictures[i : i + chunk_size] for i in range(0, len(pictures), chunk_size)]
|
||||
return chunks
|
||||
|
||||
|
||||
def get_match(first, second, percentage):
|
||||
if percentage < 0:
|
||||
percentage = 0
|
||||
return Match(first, second, percentage)
|
||||
|
||||
|
||||
def async_compare(ref_ids, other_ids, dbname, threshold, picinfo):
|
||||
# The list of ids in ref_ids have to be compared to the list of ids in other_ids. other_ids
|
||||
# can be None. In this case, ref_ids has to be compared with itself
|
||||
@@ -142,6 +159,7 @@ def async_compare(ref_ids, other_ids, dbname, threshold, picinfo):
|
||||
cache.close()
|
||||
return results
|
||||
|
||||
|
||||
def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljob):
|
||||
def get_picinfo(p):
|
||||
if match_scaled:
|
||||
@@ -160,11 +178,16 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
|
||||
async_results.remove(result)
|
||||
comparison_count += 1
|
||||
# About the NOQA below: I think there's a bug in pyflakes. To investigate...
|
||||
progress_msg = tr("Performed %d/%d chunk matches") % (comparison_count, len(comparisons_to_do)) # NOQA
|
||||
progress_msg = tr("Performed %d/%d chunk matches") % (
|
||||
comparison_count,
|
||||
len(comparisons_to_do),
|
||||
) # NOQA
|
||||
j.set_progress(comparison_count, progress_msg)
|
||||
|
||||
j = j.start_subjob([3, 7])
|
||||
pictures = prepare_pictures(pictures, cache_path, with_dimensions=not match_scaled, j=j)
|
||||
pictures = prepare_pictures(
|
||||
pictures, cache_path, with_dimensions=not match_scaled, j=j
|
||||
)
|
||||
j = j.start_subjob([9, 1], tr("Preparing for matching"))
|
||||
cache = get_cache(cache_path)
|
||||
id2picture = {}
|
||||
@@ -175,7 +198,7 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
|
||||
except ValueError:
|
||||
pass
|
||||
cache.close()
|
||||
pictures = [p for p in pictures if hasattr(p, 'cache_id')]
|
||||
pictures = [p for p in pictures if hasattr(p, "cache_id")]
|
||||
pool = multiprocessing.Pool()
|
||||
async_results = []
|
||||
matches = []
|
||||
@@ -203,9 +226,17 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
|
||||
# some wiggle room, log about the incident, and stop matching right here. We then process
|
||||
# the matches we have. The rest of the process doesn't allocate much and we should be
|
||||
# alright.
|
||||
del comparisons_to_do, chunks, pictures # some wiggle room for the next statements
|
||||
logging.warning("Ran out of memory when scanning! We had %d matches.", len(matches))
|
||||
del matches[-len(matches)//3:] # some wiggle room to ensure we don't run out of memory again.
|
||||
del (
|
||||
comparisons_to_do,
|
||||
chunks,
|
||||
pictures,
|
||||
) # some wiggle room for the next statements
|
||||
logging.warning(
|
||||
"Ran out of memory when scanning! We had %d matches.", len(matches)
|
||||
)
|
||||
del matches[
|
||||
-len(matches) // 3 :
|
||||
] # some wiggle room to ensure we don't run out of memory again.
|
||||
pool.close()
|
||||
result = []
|
||||
myiter = j.iter_with_progress(
|
||||
@@ -220,10 +251,10 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
|
||||
if percentage == 100 and ref.md5 != other.md5:
|
||||
percentage = 99
|
||||
if percentage >= threshold:
|
||||
ref.dimensions # pre-read dimensions for display in results
|
||||
ref.dimensions # pre-read dimensions for display in results
|
||||
other.dimensions
|
||||
result.append(get_match(ref, other, percentage))
|
||||
return result
|
||||
|
||||
multiprocessing.freeze_support()
|
||||
|
||||
multiprocessing.freeze_support()
|
||||
|
||||
@@ -13,14 +13,15 @@ from hscommon.trans import tr
|
||||
|
||||
from core.engine import Match
|
||||
|
||||
|
||||
def getmatches(files, match_scaled, j):
|
||||
timestamp2pic = defaultdict(set)
|
||||
for picture in j.iter_with_progress(files, tr("Read EXIF of %d/%d pictures")):
|
||||
timestamp = picture.exif_timestamp
|
||||
if timestamp:
|
||||
timestamp2pic[timestamp].add(picture)
|
||||
if '0000:00:00 00:00:00' in timestamp2pic: # very likely false matches
|
||||
del timestamp2pic['0000:00:00 00:00:00']
|
||||
if "0000:00:00 00:00:00" in timestamp2pic: # very likely false matches
|
||||
del timestamp2pic["0000:00:00 00:00:00"]
|
||||
matches = []
|
||||
for pictures in timestamp2pic.values():
|
||||
for p1, p2 in combinations(pictures, 2):
|
||||
@@ -28,4 +29,3 @@ def getmatches(files, match_scaled, j):
|
||||
continue
|
||||
matches.append(Match(p1, p2, 100))
|
||||
return matches
|
||||
|
||||
|
||||
@@ -14,23 +14,22 @@ from . import exif
|
||||
# This global value is set by the platform-specific subclasser of the Photo base class
|
||||
PLAT_SPECIFIC_PHOTO_CLASS = None
|
||||
|
||||
|
||||
def format_dimensions(dimensions):
|
||||
return '%d x %d' % (dimensions[0], dimensions[1])
|
||||
return "%d x %d" % (dimensions[0], dimensions[1])
|
||||
|
||||
|
||||
def get_delta_dimensions(value, ref_value):
|
||||
return (value[0]-ref_value[0], value[1]-ref_value[1])
|
||||
return (value[0] - ref_value[0], value[1] - ref_value[1])
|
||||
|
||||
|
||||
class Photo(fs.File):
|
||||
INITIAL_INFO = fs.File.INITIAL_INFO.copy()
|
||||
INITIAL_INFO.update({
|
||||
'dimensions': (0, 0),
|
||||
'exif_timestamp': '',
|
||||
})
|
||||
INITIAL_INFO.update({"dimensions": (0, 0), "exif_timestamp": ""})
|
||||
__slots__ = fs.File.__slots__ + tuple(INITIAL_INFO.keys())
|
||||
|
||||
# These extensions are supported on all platforms
|
||||
HANDLED_EXTS = {'png', 'jpg', 'jpeg', 'gif', 'bmp', 'tiff', 'tif'}
|
||||
HANDLED_EXTS = {"png", "jpg", "jpeg", "gif", "bmp", "tiff", "tif"}
|
||||
|
||||
def _plat_get_dimensions(self):
|
||||
raise NotImplementedError()
|
||||
@@ -39,25 +38,25 @@ class Photo(fs.File):
|
||||
raise NotImplementedError()
|
||||
|
||||
def _get_orientation(self):
|
||||
if not hasattr(self, '_cached_orientation'):
|
||||
if not hasattr(self, "_cached_orientation"):
|
||||
try:
|
||||
with self.path.open('rb') as fp:
|
||||
with self.path.open("rb") as fp:
|
||||
exifdata = exif.get_fields(fp)
|
||||
# the value is a list (probably one-sized) of ints
|
||||
orientations = exifdata['Orientation']
|
||||
orientations = exifdata["Orientation"]
|
||||
self._cached_orientation = orientations[0]
|
||||
except Exception: # Couldn't read EXIF data, no transforms
|
||||
except Exception: # Couldn't read EXIF data, no transforms
|
||||
self._cached_orientation = 0
|
||||
return self._cached_orientation
|
||||
|
||||
def _get_exif_timestamp(self):
|
||||
try:
|
||||
with self.path.open('rb') as fp:
|
||||
with self.path.open("rb") as fp:
|
||||
exifdata = exif.get_fields(fp)
|
||||
return exifdata['DateTimeOriginal']
|
||||
return exifdata["DateTimeOriginal"]
|
||||
except Exception:
|
||||
logging.info("Couldn't read EXIF of picture: %s", self.path)
|
||||
return ''
|
||||
return ""
|
||||
|
||||
@classmethod
|
||||
def can_handle(cls, path):
|
||||
@@ -79,28 +78,27 @@ class Photo(fs.File):
|
||||
else:
|
||||
percentage = group.percentage
|
||||
dupe_count = len(group.dupes)
|
||||
dupe_folder_path = getattr(self, 'display_folder_path', self.folder_path)
|
||||
dupe_folder_path = getattr(self, "display_folder_path", self.folder_path)
|
||||
return {
|
||||
'name': self.name,
|
||||
'folder_path': str(dupe_folder_path),
|
||||
'size': format_size(size, 0, 1, False),
|
||||
'extension': self.extension,
|
||||
'dimensions': format_dimensions(dimensions),
|
||||
'exif_timestamp': self.exif_timestamp,
|
||||
'mtime': format_timestamp(mtime, delta and m),
|
||||
'percentage': format_perc(percentage),
|
||||
'dupe_count': format_dupe_count(dupe_count),
|
||||
"name": self.name,
|
||||
"folder_path": str(dupe_folder_path),
|
||||
"size": format_size(size, 0, 1, False),
|
||||
"extension": self.extension,
|
||||
"dimensions": format_dimensions(dimensions),
|
||||
"exif_timestamp": self.exif_timestamp,
|
||||
"mtime": format_timestamp(mtime, delta and m),
|
||||
"percentage": format_perc(percentage),
|
||||
"dupe_count": format_dupe_count(dupe_count),
|
||||
}
|
||||
|
||||
def _read_info(self, field):
|
||||
fs.File._read_info(self, field)
|
||||
if field == 'dimensions':
|
||||
if field == "dimensions":
|
||||
self.dimensions = self._plat_get_dimensions()
|
||||
if self._get_orientation() in {5, 6, 7, 8}:
|
||||
self.dimensions = (self.dimensions[1], self.dimensions[0])
|
||||
elif field == 'exif_timestamp':
|
||||
elif field == "exif_timestamp":
|
||||
self.exif_timestamp = self._get_exif_timestamp()
|
||||
|
||||
def get_blocks(self, block_count_per_side):
|
||||
return self._plat_get_blocks(block_count_per_side, self._get_orientation())
|
||||
|
||||
|
||||
@@ -8,11 +8,16 @@
|
||||
from hscommon.trans import trget
|
||||
|
||||
from core.prioritize import (
|
||||
KindCategory, FolderCategory, FilenameCategory, NumericalCategory,
|
||||
SizeCategory, MtimeCategory
|
||||
KindCategory,
|
||||
FolderCategory,
|
||||
FilenameCategory,
|
||||
NumericalCategory,
|
||||
SizeCategory,
|
||||
MtimeCategory,
|
||||
)
|
||||
|
||||
coltr = trget('columns')
|
||||
coltr = trget("columns")
|
||||
|
||||
|
||||
class DimensionsCategory(NumericalCategory):
|
||||
NAME = coltr("Dimensions")
|
||||
@@ -24,8 +29,13 @@ class DimensionsCategory(NumericalCategory):
|
||||
width, height = value
|
||||
return (-width, -height)
|
||||
|
||||
|
||||
def all_categories():
|
||||
return [
|
||||
KindCategory, FolderCategory, FilenameCategory, SizeCategory, DimensionsCategory,
|
||||
MtimeCategory
|
||||
KindCategory,
|
||||
FolderCategory,
|
||||
FilenameCategory,
|
||||
SizeCategory,
|
||||
DimensionsCategory,
|
||||
MtimeCategory,
|
||||
]
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# Created On: 2011-11-27
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from hscommon.gui.column import Column
|
||||
@@ -10,19 +10,20 @@ from hscommon.trans import trget
|
||||
|
||||
from core.gui.result_table import ResultTable as ResultTableBase
|
||||
|
||||
coltr = trget('columns')
|
||||
coltr = trget("columns")
|
||||
|
||||
|
||||
class ResultTable(ResultTableBase):
|
||||
COLUMNS = [
|
||||
Column('marked', ''),
|
||||
Column('name', coltr("Filename")),
|
||||
Column('folder_path', coltr("Folder"), optional=True),
|
||||
Column('size', coltr("Size (KB)"), optional=True),
|
||||
Column('extension', coltr("Kind"), visible=False, optional=True),
|
||||
Column('dimensions', coltr("Dimensions"), optional=True),
|
||||
Column('exif_timestamp', coltr("EXIF Timestamp"), visible=False, optional=True),
|
||||
Column('mtime', coltr("Modification"), visible=False, optional=True),
|
||||
Column('percentage', coltr("Match %"), optional=True),
|
||||
Column('dupe_count', coltr("Dupe Count"), visible=False, optional=True),
|
||||
Column("marked", ""),
|
||||
Column("name", coltr("Filename")),
|
||||
Column("folder_path", coltr("Folder"), optional=True),
|
||||
Column("size", coltr("Size (KB)"), optional=True),
|
||||
Column("extension", coltr("Kind"), visible=False, optional=True),
|
||||
Column("dimensions", coltr("Dimensions"), optional=True),
|
||||
Column("exif_timestamp", coltr("EXIF Timestamp"), visible=False, optional=True),
|
||||
Column("mtime", coltr("Modification"), visible=False, optional=True),
|
||||
Column("percentage", coltr("Match %"), optional=True),
|
||||
Column("dupe_count", coltr("Dupe Count"), visible=False, optional=True),
|
||||
]
|
||||
DELTA_COLUMNS = {'size', 'dimensions', 'mtime'}
|
||||
DELTA_COLUMNS = {"size", "dimensions", "mtime"}
|
||||
|
||||
@@ -10,6 +10,7 @@ from core.scanner import Scanner, ScanType, ScanOption
|
||||
|
||||
from . import matchblock, matchexif
|
||||
|
||||
|
||||
class ScannerPE(Scanner):
|
||||
cache_path = None
|
||||
match_scaled = False
|
||||
@@ -28,10 +29,9 @@ class ScannerPE(Scanner):
|
||||
cache_path=self.cache_path,
|
||||
threshold=self.min_match_percentage,
|
||||
match_scaled=self.match_scaled,
|
||||
j=j
|
||||
j=j,
|
||||
)
|
||||
elif self.scan_type == ScanType.ExifTimestamp:
|
||||
return matchexif.getmatches(files, self.match_scaled, j)
|
||||
else:
|
||||
raise Exception("Invalid scan type")
|
||||
|
||||
|
||||
@@ -1,48 +1,50 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2011/09/07
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from hscommon.util import dedupe, flatten, rem_file_ext
|
||||
from hscommon.trans import trget, tr
|
||||
|
||||
coltr = trget('columns')
|
||||
coltr = trget("columns")
|
||||
|
||||
|
||||
class CriterionCategory:
|
||||
NAME = "Undefined"
|
||||
|
||||
|
||||
def __init__(self, results):
|
||||
self.results = results
|
||||
|
||||
#--- Virtual
|
||||
|
||||
# --- Virtual
|
||||
def extract_value(self, dupe):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def format_criterion_value(self, value):
|
||||
return value
|
||||
|
||||
|
||||
def sort_key(self, dupe, crit_value):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def criteria_list(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class Criterion:
|
||||
def __init__(self, category, value):
|
||||
self.category = category
|
||||
self.value = value
|
||||
self.display_value = category.format_criterion_value(value)
|
||||
|
||||
|
||||
def sort_key(self, dupe):
|
||||
return self.category.sort_key(dupe, self.value)
|
||||
|
||||
|
||||
@property
|
||||
def display(self):
|
||||
return "{} ({})".format(self.category.NAME, self.display_value)
|
||||
|
||||
|
||||
|
||||
class ValueListCategory(CriterionCategory):
|
||||
def sort_key(self, dupe, crit_value):
|
||||
@@ -52,45 +54,47 @@ class ValueListCategory(CriterionCategory):
|
||||
return 0
|
||||
else:
|
||||
return 1
|
||||
|
||||
|
||||
def criteria_list(self):
|
||||
dupes = flatten(g[:] for g in self.results.groups)
|
||||
values = sorted(dedupe(self.extract_value(d) for d in dupes))
|
||||
return [Criterion(self, value) for value in values]
|
||||
|
||||
|
||||
|
||||
class KindCategory(ValueListCategory):
|
||||
NAME = coltr("Kind")
|
||||
|
||||
|
||||
def extract_value(self, dupe):
|
||||
value = dupe.extension
|
||||
if not value:
|
||||
value = tr("None")
|
||||
return value
|
||||
|
||||
|
||||
class FolderCategory(ValueListCategory):
|
||||
NAME = coltr("Folder")
|
||||
|
||||
|
||||
def extract_value(self, dupe):
|
||||
return dupe.folder_path
|
||||
|
||||
|
||||
def format_criterion_value(self, value):
|
||||
return str(value)
|
||||
|
||||
|
||||
def sort_key(self, dupe, crit_value):
|
||||
value = self.extract_value(dupe)
|
||||
if value[:len(crit_value)] == crit_value:
|
||||
if value[: len(crit_value)] == crit_value:
|
||||
return 0
|
||||
else:
|
||||
return 1
|
||||
|
||||
|
||||
class FilenameCategory(CriterionCategory):
|
||||
NAME = coltr("Filename")
|
||||
ENDS_WITH_NUMBER = 0
|
||||
DOESNT_END_WITH_NUMBER = 1
|
||||
LONGEST = 2
|
||||
SHORTEST = 3
|
||||
|
||||
|
||||
def format_criterion_value(self, value):
|
||||
return {
|
||||
self.ENDS_WITH_NUMBER: tr("Ends with number"),
|
||||
@@ -98,10 +102,10 @@ class FilenameCategory(CriterionCategory):
|
||||
self.LONGEST: tr("Longest"),
|
||||
self.SHORTEST: tr("Shortest"),
|
||||
}[value]
|
||||
|
||||
|
||||
def extract_value(self, dupe):
|
||||
return rem_file_ext(dupe.name)
|
||||
|
||||
|
||||
def sort_key(self, dupe, crit_value):
|
||||
value = self.extract_value(dupe)
|
||||
if crit_value in {self.ENDS_WITH_NUMBER, self.DOESNT_END_WITH_NUMBER}:
|
||||
@@ -113,50 +117,57 @@ class FilenameCategory(CriterionCategory):
|
||||
else:
|
||||
value = len(value)
|
||||
if crit_value == self.LONGEST:
|
||||
value *= -1 # We want the biggest values on top
|
||||
value *= -1 # We want the biggest values on top
|
||||
return value
|
||||
|
||||
|
||||
def criteria_list(self):
|
||||
return [Criterion(self, crit_value) for crit_value in [
|
||||
self.ENDS_WITH_NUMBER,
|
||||
self.DOESNT_END_WITH_NUMBER,
|
||||
self.LONGEST,
|
||||
self.SHORTEST,
|
||||
]]
|
||||
return [
|
||||
Criterion(self, crit_value)
|
||||
for crit_value in [
|
||||
self.ENDS_WITH_NUMBER,
|
||||
self.DOESNT_END_WITH_NUMBER,
|
||||
self.LONGEST,
|
||||
self.SHORTEST,
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
class NumericalCategory(CriterionCategory):
|
||||
HIGHEST = 0
|
||||
LOWEST = 1
|
||||
|
||||
|
||||
def format_criterion_value(self, value):
|
||||
return tr("Highest") if value == self.HIGHEST else tr("Lowest")
|
||||
|
||||
def invert_numerical_value(self, value): # Virtual
|
||||
|
||||
def invert_numerical_value(self, value): # Virtual
|
||||
return value * -1
|
||||
|
||||
|
||||
def sort_key(self, dupe, crit_value):
|
||||
value = self.extract_value(dupe)
|
||||
if crit_value == self.HIGHEST: # we want highest values on top
|
||||
if crit_value == self.HIGHEST: # we want highest values on top
|
||||
value = self.invert_numerical_value(value)
|
||||
return value
|
||||
|
||||
|
||||
def criteria_list(self):
|
||||
return [Criterion(self, self.HIGHEST), Criterion(self, self.LOWEST)]
|
||||
|
||||
|
||||
|
||||
class SizeCategory(NumericalCategory):
|
||||
NAME = coltr("Size")
|
||||
|
||||
|
||||
def extract_value(self, dupe):
|
||||
return dupe.size
|
||||
|
||||
|
||||
class MtimeCategory(NumericalCategory):
|
||||
NAME = coltr("Modification")
|
||||
|
||||
|
||||
def extract_value(self, dupe):
|
||||
return dupe.mtime
|
||||
|
||||
|
||||
def format_criterion_value(self, value):
|
||||
return tr("Newest") if value == self.HIGHEST else tr("Oldest")
|
||||
|
||||
|
||||
def all_categories():
|
||||
return [KindCategory, FolderCategory, FilenameCategory, SizeCategory, MtimeCategory]
|
||||
|
||||
109
core/results.py
109
core/results.py
@@ -20,6 +20,7 @@ from hscommon.trans import tr
|
||||
from . import engine
|
||||
from .markable import Markable
|
||||
|
||||
|
||||
class Results(Markable):
|
||||
"""Manages a collection of duplicate :class:`~core.engine.Group`.
|
||||
|
||||
@@ -34,21 +35,22 @@ class Results(Markable):
|
||||
A list of all duplicates (:class:`~core.fs.File` instances), without ref, contained in the
|
||||
currently managed :attr:`groups`.
|
||||
"""
|
||||
#---Override
|
||||
|
||||
# ---Override
|
||||
def __init__(self, app):
|
||||
Markable.__init__(self)
|
||||
self.__groups = []
|
||||
self.__group_of_duplicate = {}
|
||||
self.__groups_sort_descriptor = None # This is a tuple (key, asc)
|
||||
self.__groups_sort_descriptor = None # This is a tuple (key, asc)
|
||||
self.__dupes = None
|
||||
self.__dupes_sort_descriptor = None # This is a tuple (key, asc, delta)
|
||||
self.__dupes_sort_descriptor = None # This is a tuple (key, asc, delta)
|
||||
self.__filters = None
|
||||
self.__filtered_dupes = None
|
||||
self.__filtered_groups = None
|
||||
self.__recalculate_stats()
|
||||
self.__marked_size = 0
|
||||
self.app = app
|
||||
self.problems = [] # (dupe, error_msg)
|
||||
self.problems = [] # (dupe, error_msg)
|
||||
self.is_modified = False
|
||||
|
||||
def _did_mark(self, dupe):
|
||||
@@ -90,7 +92,7 @@ class Results(Markable):
|
||||
else:
|
||||
Markable.mark_none(self)
|
||||
|
||||
#---Private
|
||||
# ---Private
|
||||
def __get_dupe_list(self):
|
||||
if self.__dupes is None:
|
||||
self.__dupes = flatten(group.dupes for group in self.groups)
|
||||
@@ -98,10 +100,13 @@ class Results(Markable):
|
||||
# This is debug logging to try to figure out #44
|
||||
logging.warning(
|
||||
"There is a None value in the Results' dupe list. dupes: %r groups: %r",
|
||||
self.__dupes, self.groups
|
||||
self.__dupes,
|
||||
self.groups,
|
||||
)
|
||||
if self.__filtered_dupes:
|
||||
self.__dupes = [dupe for dupe in self.__dupes if dupe in self.__filtered_dupes]
|
||||
self.__dupes = [
|
||||
dupe for dupe in self.__dupes if dupe in self.__filtered_dupes
|
||||
]
|
||||
sd = self.__dupes_sort_descriptor
|
||||
if sd:
|
||||
self.sort_dupes(sd[0], sd[1], sd[2])
|
||||
@@ -120,10 +125,18 @@ class Results(Markable):
|
||||
total_count = self.__total_count
|
||||
total_size = self.__total_size
|
||||
else:
|
||||
mark_count = len([dupe for dupe in self.__filtered_dupes if self.is_marked(dupe)])
|
||||
marked_size = sum(dupe.size for dupe in self.__filtered_dupes if self.is_marked(dupe))
|
||||
total_count = len([dupe for dupe in self.__filtered_dupes if self.is_markable(dupe)])
|
||||
total_size = sum(dupe.size for dupe in self.__filtered_dupes if self.is_markable(dupe))
|
||||
mark_count = len(
|
||||
[dupe for dupe in self.__filtered_dupes if self.is_marked(dupe)]
|
||||
)
|
||||
marked_size = sum(
|
||||
dupe.size for dupe in self.__filtered_dupes if self.is_marked(dupe)
|
||||
)
|
||||
total_count = len(
|
||||
[dupe for dupe in self.__filtered_dupes if self.is_markable(dupe)]
|
||||
)
|
||||
total_size = sum(
|
||||
dupe.size for dupe in self.__filtered_dupes if self.is_markable(dupe)
|
||||
)
|
||||
if self.mark_inverted:
|
||||
marked_size = self.__total_size - marked_size
|
||||
result = tr("%d / %d (%s / %s) duplicates marked.") % (
|
||||
@@ -133,7 +146,7 @@ class Results(Markable):
|
||||
format_size(total_size, 2),
|
||||
)
|
||||
if self.__filters:
|
||||
result += tr(" filter: %s") % ' --> '.join(self.__filters)
|
||||
result += tr(" filter: %s") % " --> ".join(self.__filters)
|
||||
return result
|
||||
|
||||
def __recalculate_stats(self):
|
||||
@@ -151,7 +164,7 @@ class Results(Markable):
|
||||
for g in self.__groups:
|
||||
for dupe in g:
|
||||
self.__group_of_duplicate[dupe] = g
|
||||
if not hasattr(dupe, 'is_ref'):
|
||||
if not hasattr(dupe, "is_ref"):
|
||||
dupe.is_ref = False
|
||||
self.is_modified = bool(self.__groups)
|
||||
old_filters = nonone(self.__filters, [])
|
||||
@@ -159,7 +172,7 @@ class Results(Markable):
|
||||
for filter_str in old_filters:
|
||||
self.apply_filter(filter_str)
|
||||
|
||||
#---Public
|
||||
# ---Public
|
||||
def apply_filter(self, filter_str):
|
||||
"""Applies a filter ``filter_str`` to :attr:`groups`
|
||||
|
||||
@@ -182,11 +195,15 @@ class Results(Markable):
|
||||
try:
|
||||
filter_re = re.compile(filter_str, re.IGNORECASE)
|
||||
except re.error:
|
||||
return # don't apply this filter.
|
||||
return # don't apply this filter.
|
||||
self.__filters.append(filter_str)
|
||||
if self.__filtered_dupes is None:
|
||||
self.__filtered_dupes = flatten(g[:] for g in self.groups)
|
||||
self.__filtered_dupes = set(dupe for dupe in self.__filtered_dupes if filter_re.search(str(dupe.path)))
|
||||
self.__filtered_dupes = set(
|
||||
dupe
|
||||
for dupe in self.__filtered_dupes
|
||||
if filter_re.search(str(dupe.path))
|
||||
)
|
||||
filtered_groups = set()
|
||||
for dupe in self.__filtered_dupes:
|
||||
filtered_groups.add(self.get_group_of_duplicate(dupe))
|
||||
@@ -214,6 +231,7 @@ class Results(Markable):
|
||||
:param get_file: a function f(path) returning a :class:`~core.fs.File` wrapping the path.
|
||||
:param j: A :ref:`job progress instance <jobs>`.
|
||||
"""
|
||||
|
||||
def do_match(ref_file, other_files, group):
|
||||
if not other_files:
|
||||
return
|
||||
@@ -223,31 +241,31 @@ class Results(Markable):
|
||||
|
||||
self.apply_filter(None)
|
||||
root = ET.parse(infile).getroot()
|
||||
group_elems = list(root.getiterator('group'))
|
||||
group_elems = list(root.getiterator("group"))
|
||||
groups = []
|
||||
marked = set()
|
||||
for group_elem in j.iter_with_progress(group_elems, every=100):
|
||||
group = engine.Group()
|
||||
dupes = []
|
||||
for file_elem in group_elem.getiterator('file'):
|
||||
path = file_elem.get('path')
|
||||
words = file_elem.get('words', '')
|
||||
for file_elem in group_elem.getiterator("file"):
|
||||
path = file_elem.get("path")
|
||||
words = file_elem.get("words", "")
|
||||
if not path:
|
||||
continue
|
||||
file = get_file(path)
|
||||
if file is None:
|
||||
continue
|
||||
file.words = words.split(',')
|
||||
file.is_ref = file_elem.get('is_ref') == 'y'
|
||||
file.words = words.split(",")
|
||||
file.is_ref = file_elem.get("is_ref") == "y"
|
||||
dupes.append(file)
|
||||
if file_elem.get('marked') == 'y':
|
||||
if file_elem.get("marked") == "y":
|
||||
marked.add(file)
|
||||
for match_elem in group_elem.getiterator('match'):
|
||||
for match_elem in group_elem.getiterator("match"):
|
||||
try:
|
||||
attrs = match_elem.attrib
|
||||
first_file = dupes[int(attrs['first'])]
|
||||
second_file = dupes[int(attrs['second'])]
|
||||
percentage = int(attrs['percentage'])
|
||||
first_file = dupes[int(attrs["first"])]
|
||||
second_file = dupes[int(attrs["second"])]
|
||||
percentage = int(attrs["percentage"])
|
||||
group.add_match(engine.Match(first_file, second_file, percentage))
|
||||
except (IndexError, KeyError, ValueError):
|
||||
# Covers missing attr, non-int values and indexes out of bounds
|
||||
@@ -339,9 +357,9 @@ class Results(Markable):
|
||||
:param outfile: file object or path.
|
||||
"""
|
||||
self.apply_filter(None)
|
||||
root = ET.Element('results')
|
||||
root = ET.Element("results")
|
||||
for g in self.groups:
|
||||
group_elem = ET.SubElement(root, 'group')
|
||||
group_elem = ET.SubElement(root, "group")
|
||||
dupe2index = {}
|
||||
for index, d in enumerate(g):
|
||||
dupe2index[d] = index
|
||||
@@ -349,24 +367,24 @@ class Results(Markable):
|
||||
words = engine.unpack_fields(d.words)
|
||||
except AttributeError:
|
||||
words = ()
|
||||
file_elem = ET.SubElement(group_elem, 'file')
|
||||
file_elem = ET.SubElement(group_elem, "file")
|
||||
try:
|
||||
file_elem.set('path', str(d.path))
|
||||
file_elem.set('words', ','.join(words))
|
||||
except ValueError: # If there's an invalid character, just skip the file
|
||||
file_elem.set('path', '')
|
||||
file_elem.set('is_ref', ('y' if d.is_ref else 'n'))
|
||||
file_elem.set('marked', ('y' if self.is_marked(d) else 'n'))
|
||||
file_elem.set("path", str(d.path))
|
||||
file_elem.set("words", ",".join(words))
|
||||
except ValueError: # If there's an invalid character, just skip the file
|
||||
file_elem.set("path", "")
|
||||
file_elem.set("is_ref", ("y" if d.is_ref else "n"))
|
||||
file_elem.set("marked", ("y" if self.is_marked(d) else "n"))
|
||||
for match in g.matches:
|
||||
match_elem = ET.SubElement(group_elem, 'match')
|
||||
match_elem.set('first', str(dupe2index[match.first]))
|
||||
match_elem.set('second', str(dupe2index[match.second]))
|
||||
match_elem.set('percentage', str(int(match.percentage)))
|
||||
match_elem = ET.SubElement(group_elem, "match")
|
||||
match_elem.set("first", str(dupe2index[match.first]))
|
||||
match_elem.set("second", str(dupe2index[match.second]))
|
||||
match_elem.set("percentage", str(int(match.percentage)))
|
||||
tree = ET.ElementTree(root)
|
||||
|
||||
def do_write(outfile):
|
||||
with FileOrPath(outfile, 'wb') as fp:
|
||||
tree.write(fp, encoding='utf-8')
|
||||
with FileOrPath(outfile, "wb") as fp:
|
||||
tree.write(fp, encoding="utf-8")
|
||||
|
||||
try:
|
||||
do_write(outfile)
|
||||
@@ -392,7 +410,9 @@ class Results(Markable):
|
||||
"""
|
||||
if not self.__dupes:
|
||||
self.__get_dupe_list()
|
||||
keyfunc = lambda d: self.app._get_dupe_sort_key(d, lambda: self.get_group_of_duplicate(d), key, delta)
|
||||
keyfunc = lambda d: self.app._get_dupe_sort_key(
|
||||
d, lambda: self.get_group_of_duplicate(d), key, delta
|
||||
)
|
||||
self.__dupes.sort(key=keyfunc, reverse=not asc)
|
||||
self.__dupes_sort_descriptor = (key, asc, delta)
|
||||
|
||||
@@ -408,8 +428,7 @@ class Results(Markable):
|
||||
self.groups.sort(key=keyfunc, reverse=not asc)
|
||||
self.__groups_sort_descriptor = (key, asc)
|
||||
|
||||
#---Properties
|
||||
# ---Properties
|
||||
dupes = property(__get_dupe_list)
|
||||
groups = property(__get_groups, __set_groups)
|
||||
stat_line = property(__get_stat_line)
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ from . import engine
|
||||
# there will be some nasty bugs popping up (ScanType is used in core when in should exclusively be
|
||||
# used in core_*). One day I'll clean this up.
|
||||
|
||||
|
||||
class ScanType:
|
||||
Filename = 0
|
||||
Fields = 1
|
||||
@@ -27,23 +28,26 @@ class ScanType:
|
||||
Folders = 4
|
||||
Contents = 5
|
||||
|
||||
#PE
|
||||
# PE
|
||||
FuzzyBlock = 10
|
||||
ExifTimestamp = 11
|
||||
|
||||
ScanOption = namedtuple('ScanOption', 'scan_type label')
|
||||
|
||||
SCANNABLE_TAGS = ['track', 'artist', 'album', 'title', 'genre', 'year']
|
||||
ScanOption = namedtuple("ScanOption", "scan_type label")
|
||||
|
||||
SCANNABLE_TAGS = ["track", "artist", "album", "title", "genre", "year"]
|
||||
|
||||
RE_DIGIT_ENDING = re.compile(r"\d+|\(\d+\)|\[\d+\]|{\d+}")
|
||||
|
||||
RE_DIGIT_ENDING = re.compile(r'\d+|\(\d+\)|\[\d+\]|{\d+}')
|
||||
|
||||
def is_same_with_digit(name, refname):
|
||||
# Returns True if name is the same as refname, but with digits (with brackets or not) at the end
|
||||
if not name.startswith(refname):
|
||||
return False
|
||||
end = name[len(refname):].strip()
|
||||
end = name[len(refname) :].strip()
|
||||
return RE_DIGIT_ENDING.match(end) is not None
|
||||
|
||||
|
||||
def remove_dupe_paths(files):
|
||||
# Returns files with duplicates-by-path removed. Files with the exact same path are considered
|
||||
# duplicates and only the first file to have a path is kept. In certain cases, we have files
|
||||
@@ -57,25 +61,29 @@ def remove_dupe_paths(files):
|
||||
if normalized in path2file:
|
||||
try:
|
||||
if op.samefile(normalized, str(path2file[normalized].path)):
|
||||
continue # same file, it's a dupe
|
||||
continue # same file, it's a dupe
|
||||
else:
|
||||
pass # We don't treat them as dupes
|
||||
pass # We don't treat them as dupes
|
||||
except OSError:
|
||||
continue # File doesn't exist? Well, treat them as dupes
|
||||
continue # File doesn't exist? Well, treat them as dupes
|
||||
else:
|
||||
path2file[normalized] = f
|
||||
result.append(f)
|
||||
return result
|
||||
|
||||
|
||||
class Scanner:
|
||||
def __init__(self):
|
||||
self.discarded_file_count = 0
|
||||
|
||||
def _getmatches(self, files, j):
|
||||
if self.size_threshold or self.scan_type in {ScanType.Contents, ScanType.Folders}:
|
||||
if self.size_threshold or self.scan_type in {
|
||||
ScanType.Contents,
|
||||
ScanType.Folders,
|
||||
}:
|
||||
j = j.start_subjob([2, 8])
|
||||
for f in j.iter_with_progress(files, tr("Read size of %d/%d files")):
|
||||
f.size # pre-read, makes a smoother progress if read here (especially for bundles)
|
||||
f.size # pre-read, makes a smoother progress if read here (especially for bundles)
|
||||
if self.size_threshold:
|
||||
files = [f for f in files if f.size >= self.size_threshold]
|
||||
if self.scan_type in {ScanType.Contents, ScanType.Folders}:
|
||||
@@ -83,12 +91,12 @@ class Scanner:
|
||||
else:
|
||||
j = j.start_subjob([2, 8])
|
||||
kw = {}
|
||||
kw['match_similar_words'] = self.match_similar_words
|
||||
kw['weight_words'] = self.word_weighting
|
||||
kw['min_match_percentage'] = self.min_match_percentage
|
||||
kw["match_similar_words"] = self.match_similar_words
|
||||
kw["weight_words"] = self.word_weighting
|
||||
kw["min_match_percentage"] = self.min_match_percentage
|
||||
if self.scan_type == ScanType.FieldsNoOrder:
|
||||
self.scan_type = ScanType.Fields
|
||||
kw['no_field_order'] = True
|
||||
kw["no_field_order"] = True
|
||||
func = {
|
||||
ScanType.Filename: lambda f: engine.getwords(rem_file_ext(f.name)),
|
||||
ScanType.Fields: lambda f: engine.getfields(rem_file_ext(f.name)),
|
||||
@@ -111,9 +119,9 @@ class Scanner:
|
||||
def _tie_breaker(ref, dupe):
|
||||
refname = rem_file_ext(ref.name).lower()
|
||||
dupename = rem_file_ext(dupe.name).lower()
|
||||
if 'copy' in dupename:
|
||||
if "copy" in dupename:
|
||||
return False
|
||||
if 'copy' in refname:
|
||||
if "copy" in refname:
|
||||
return True
|
||||
if is_same_with_digit(dupename, refname):
|
||||
return False
|
||||
@@ -130,12 +138,12 @@ class Scanner:
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_dupe_groups(self, files, ignore_list=None, j=job.nulljob):
|
||||
for f in (f for f in files if not hasattr(f, 'is_ref')):
|
||||
for f in (f for f in files if not hasattr(f, "is_ref")):
|
||||
f.is_ref = False
|
||||
files = remove_dupe_paths(files)
|
||||
logging.info("Getting matches. Scan type: %d", self.scan_type)
|
||||
matches = self._getmatches(files, j)
|
||||
logging.info('Found %d matches' % len(matches))
|
||||
logging.info("Found %d matches" % len(matches))
|
||||
j.set_progress(100, tr("Almost done! Fiddling with results..."))
|
||||
# In removing what we call here "false matches", we first want to remove, if we scan by
|
||||
# folders, we want to remove folder matches for which the parent is also in a match (they're
|
||||
@@ -153,20 +161,38 @@ class Scanner:
|
||||
toremove.add(p)
|
||||
else:
|
||||
last_parent_path = p
|
||||
matches = [m for m in matches if m.first.path not in toremove or m.second.path not in toremove]
|
||||
matches = [
|
||||
m
|
||||
for m in matches
|
||||
if m.first.path not in toremove or m.second.path not in toremove
|
||||
]
|
||||
if not self.mix_file_kind:
|
||||
matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)]
|
||||
matches = [m for m in matches if m.first.path.exists() and m.second.path.exists()]
|
||||
matches = [
|
||||
m
|
||||
for m in matches
|
||||
if get_file_ext(m.first.name) == get_file_ext(m.second.name)
|
||||
]
|
||||
matches = [
|
||||
m for m in matches if m.first.path.exists() and m.second.path.exists()
|
||||
]
|
||||
matches = [m for m in matches if not (m.first.is_ref and m.second.is_ref)]
|
||||
if ignore_list:
|
||||
matches = [
|
||||
m for m in matches
|
||||
m
|
||||
for m in matches
|
||||
if not ignore_list.AreIgnored(str(m.first.path), str(m.second.path))
|
||||
]
|
||||
logging.info('Grouping matches')
|
||||
logging.info("Grouping matches")
|
||||
groups = engine.get_groups(matches)
|
||||
if self.scan_type in {ScanType.Filename, ScanType.Fields, ScanType.FieldsNoOrder, ScanType.Tag}:
|
||||
matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
|
||||
if self.scan_type in {
|
||||
ScanType.Filename,
|
||||
ScanType.Fields,
|
||||
ScanType.FieldsNoOrder,
|
||||
ScanType.Tag,
|
||||
}:
|
||||
matched_files = dedupe(
|
||||
[m.first for m in matches] + [m.second for m in matches]
|
||||
)
|
||||
self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups)
|
||||
else:
|
||||
# Ticket #195
|
||||
@@ -181,7 +207,7 @@ class Scanner:
|
||||
# reporting discarded matches.
|
||||
self.discarded_file_count = 0
|
||||
groups = [g for g in groups if any(not f.is_ref for f in g)]
|
||||
logging.info('Created %d groups' % len(groups))
|
||||
logging.info("Created %d groups" % len(groups))
|
||||
for g in groups:
|
||||
g.prioritize(self._key_func, self._tie_breaker)
|
||||
return groups
|
||||
@@ -190,7 +216,6 @@ class Scanner:
|
||||
min_match_percentage = 80
|
||||
mix_file_kind = True
|
||||
scan_type = ScanType.Filename
|
||||
scanned_tags = {'artist', 'title'}
|
||||
scanned_tags = {"artist", "title"}
|
||||
size_threshold = 0
|
||||
word_weighting = False
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
from . import fs, result_table, scanner # noqa
|
||||
from . import fs, result_table, scanner # noqa
|
||||
|
||||
@@ -11,6 +11,7 @@ from hscommon.util import format_size
|
||||
from core import fs
|
||||
from core.util import format_timestamp, format_perc, format_words, format_dupe_count
|
||||
|
||||
|
||||
def get_display_info(dupe, group, delta):
|
||||
size = dupe.size
|
||||
mtime = dupe.mtime
|
||||
@@ -26,16 +27,17 @@ def get_display_info(dupe, group, delta):
|
||||
percentage = group.percentage
|
||||
dupe_count = len(group.dupes)
|
||||
return {
|
||||
'name': dupe.name,
|
||||
'folder_path': str(dupe.folder_path),
|
||||
'size': format_size(size, 0, 1, False),
|
||||
'extension': dupe.extension,
|
||||
'mtime': format_timestamp(mtime, delta and m),
|
||||
'percentage': format_perc(percentage),
|
||||
'words': format_words(dupe.words) if hasattr(dupe, 'words') else '',
|
||||
'dupe_count': format_dupe_count(dupe_count),
|
||||
"name": dupe.name,
|
||||
"folder_path": str(dupe.folder_path),
|
||||
"size": format_size(size, 0, 1, False),
|
||||
"extension": dupe.extension,
|
||||
"mtime": format_timestamp(mtime, delta and m),
|
||||
"percentage": format_perc(percentage),
|
||||
"words": format_words(dupe.words) if hasattr(dupe, "words") else "",
|
||||
"dupe_count": format_dupe_count(dupe_count),
|
||||
}
|
||||
|
||||
|
||||
class File(fs.File):
|
||||
def get_display_info(self, group, delta):
|
||||
return get_display_info(self, group, delta)
|
||||
@@ -44,4 +46,3 @@ class File(fs.File):
|
||||
class Folder(fs.Folder):
|
||||
def get_display_info(self, group, delta):
|
||||
return get_display_info(self, group, delta)
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# Created On: 2011-11-27
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from hscommon.gui.column import Column
|
||||
@@ -10,18 +10,19 @@ from hscommon.trans import trget
|
||||
|
||||
from core.gui.result_table import ResultTable as ResultTableBase
|
||||
|
||||
coltr = trget('columns')
|
||||
coltr = trget("columns")
|
||||
|
||||
|
||||
class ResultTable(ResultTableBase):
|
||||
COLUMNS = [
|
||||
Column('marked', ''),
|
||||
Column('name', coltr("Filename")),
|
||||
Column('folder_path', coltr("Folder"), optional=True),
|
||||
Column('size', coltr("Size (KB)"), optional=True),
|
||||
Column('extension', coltr("Kind"), visible=False, optional=True),
|
||||
Column('mtime', coltr("Modification"), visible=False, optional=True),
|
||||
Column('percentage', coltr("Match %"), optional=True),
|
||||
Column('words', coltr("Words Used"), visible=False, optional=True),
|
||||
Column('dupe_count', coltr("Dupe Count"), visible=False, optional=True),
|
||||
Column("marked", ""),
|
||||
Column("name", coltr("Filename")),
|
||||
Column("folder_path", coltr("Folder"), optional=True),
|
||||
Column("size", coltr("Size (KB)"), optional=True),
|
||||
Column("extension", coltr("Kind"), visible=False, optional=True),
|
||||
Column("mtime", coltr("Modification"), visible=False, optional=True),
|
||||
Column("percentage", coltr("Match %"), optional=True),
|
||||
Column("words", coltr("Words Used"), visible=False, optional=True),
|
||||
Column("dupe_count", coltr("Dupe Count"), visible=False, optional=True),
|
||||
]
|
||||
DELTA_COLUMNS = {'size', 'mtime'}
|
||||
DELTA_COLUMNS = {"size", "mtime"}
|
||||
|
||||
@@ -8,6 +8,7 @@ from hscommon.trans import tr
|
||||
|
||||
from core.scanner import Scanner as ScannerBase, ScanOption, ScanType
|
||||
|
||||
|
||||
class ScannerSE(ScannerBase):
|
||||
@staticmethod
|
||||
def get_scan_options():
|
||||
@@ -16,4 +17,3 @@ class ScannerSE(ScannerBase):
|
||||
ScanOption(ScanType.Contents, tr("Contents")),
|
||||
ScanOption(ScanType.Folders, tr("Folders")),
|
||||
]
|
||||
|
||||
|
||||
@@ -20,93 +20,106 @@ from .results_test import GetTestGroups
|
||||
from .. import app, fs, engine
|
||||
from ..scanner import ScanType
|
||||
|
||||
|
||||
def add_fake_files_to_directories(directories, files):
|
||||
directories.get_files = lambda j=None: iter(files)
|
||||
directories._dirs.append('this is just so Scan() doesnt return 3')
|
||||
directories._dirs.append("this is just so Scan() doesnt return 3")
|
||||
|
||||
|
||||
class TestCaseDupeGuru:
|
||||
def test_apply_filter_calls_results_apply_filter(self, monkeypatch):
|
||||
dgapp = TestApp().app
|
||||
monkeypatch.setattr(dgapp.results, 'apply_filter', log_calls(dgapp.results.apply_filter))
|
||||
dgapp.apply_filter('foo')
|
||||
monkeypatch.setattr(
|
||||
dgapp.results, "apply_filter", log_calls(dgapp.results.apply_filter)
|
||||
)
|
||||
dgapp.apply_filter("foo")
|
||||
eq_(2, len(dgapp.results.apply_filter.calls))
|
||||
call = dgapp.results.apply_filter.calls[0]
|
||||
assert call['filter_str'] is None
|
||||
assert call["filter_str"] is None
|
||||
call = dgapp.results.apply_filter.calls[1]
|
||||
eq_('foo', call['filter_str'])
|
||||
eq_("foo", call["filter_str"])
|
||||
|
||||
def test_apply_filter_escapes_regexp(self, monkeypatch):
|
||||
dgapp = TestApp().app
|
||||
monkeypatch.setattr(dgapp.results, 'apply_filter', log_calls(dgapp.results.apply_filter))
|
||||
dgapp.apply_filter('()[]\\.|+?^abc')
|
||||
monkeypatch.setattr(
|
||||
dgapp.results, "apply_filter", log_calls(dgapp.results.apply_filter)
|
||||
)
|
||||
dgapp.apply_filter("()[]\\.|+?^abc")
|
||||
call = dgapp.results.apply_filter.calls[1]
|
||||
eq_('\\(\\)\\[\\]\\\\\\.\\|\\+\\?\\^abc', call['filter_str'])
|
||||
dgapp.apply_filter('(*)') # In "simple mode", we want the * to behave as a wilcard
|
||||
eq_("\\(\\)\\[\\]\\\\\\.\\|\\+\\?\\^abc", call["filter_str"])
|
||||
dgapp.apply_filter(
|
||||
"(*)"
|
||||
) # In "simple mode", we want the * to behave as a wilcard
|
||||
call = dgapp.results.apply_filter.calls[3]
|
||||
eq_(r'\(.*\)', call['filter_str'])
|
||||
dgapp.options['escape_filter_regexp'] = False
|
||||
dgapp.apply_filter('(abc)')
|
||||
eq_(r"\(.*\)", call["filter_str"])
|
||||
dgapp.options["escape_filter_regexp"] = False
|
||||
dgapp.apply_filter("(abc)")
|
||||
call = dgapp.results.apply_filter.calls[5]
|
||||
eq_('(abc)', call['filter_str'])
|
||||
eq_("(abc)", call["filter_str"])
|
||||
|
||||
def test_copy_or_move(self, tmpdir, monkeypatch):
|
||||
# The goal here is just to have a test for a previous blowup I had. I know my test coverage
|
||||
# for this unit is pathetic. What's done is done. My approach now is to add tests for
|
||||
# every change I want to make. The blowup was caused by a missing import.
|
||||
p = Path(str(tmpdir))
|
||||
p['foo'].open('w').close()
|
||||
monkeypatch.setattr(hscommon.conflict, 'smart_copy', log_calls(lambda source_path, dest_path: None))
|
||||
p["foo"].open("w").close()
|
||||
monkeypatch.setattr(
|
||||
hscommon.conflict,
|
||||
"smart_copy",
|
||||
log_calls(lambda source_path, dest_path: None),
|
||||
)
|
||||
# XXX This monkeypatch is temporary. will be fixed in a better monkeypatcher.
|
||||
monkeypatch.setattr(app, 'smart_copy', hscommon.conflict.smart_copy)
|
||||
monkeypatch.setattr(os, 'makedirs', lambda path: None) # We don't want the test to create that fake directory
|
||||
monkeypatch.setattr(app, "smart_copy", hscommon.conflict.smart_copy)
|
||||
monkeypatch.setattr(
|
||||
os, "makedirs", lambda path: None
|
||||
) # We don't want the test to create that fake directory
|
||||
dgapp = TestApp().app
|
||||
dgapp.directories.add_path(p)
|
||||
[f] = dgapp.directories.get_files()
|
||||
dgapp.copy_or_move(f, True, 'some_destination', 0)
|
||||
dgapp.copy_or_move(f, True, "some_destination", 0)
|
||||
eq_(1, len(hscommon.conflict.smart_copy.calls))
|
||||
call = hscommon.conflict.smart_copy.calls[0]
|
||||
eq_(call['dest_path'], op.join('some_destination', 'foo'))
|
||||
eq_(call['source_path'], f.path)
|
||||
eq_(call["dest_path"], op.join("some_destination", "foo"))
|
||||
eq_(call["source_path"], f.path)
|
||||
|
||||
def test_copy_or_move_clean_empty_dirs(self, tmpdir, monkeypatch):
|
||||
tmppath = Path(str(tmpdir))
|
||||
sourcepath = tmppath['source']
|
||||
sourcepath = tmppath["source"]
|
||||
sourcepath.mkdir()
|
||||
sourcepath['myfile'].open('w')
|
||||
sourcepath["myfile"].open("w")
|
||||
app = TestApp().app
|
||||
app.directories.add_path(tmppath)
|
||||
[myfile] = app.directories.get_files()
|
||||
monkeypatch.setattr(app, 'clean_empty_dirs', log_calls(lambda path: None))
|
||||
app.copy_or_move(myfile, False, tmppath['dest'], 0)
|
||||
monkeypatch.setattr(app, "clean_empty_dirs", log_calls(lambda path: None))
|
||||
app.copy_or_move(myfile, False, tmppath["dest"], 0)
|
||||
calls = app.clean_empty_dirs.calls
|
||||
eq_(1, len(calls))
|
||||
eq_(sourcepath, calls[0]['path'])
|
||||
eq_(sourcepath, calls[0]["path"])
|
||||
|
||||
def test_Scan_with_objects_evaluating_to_false(self):
|
||||
class FakeFile(fs.File):
|
||||
def __bool__(self):
|
||||
return False
|
||||
|
||||
|
||||
# At some point, any() was used in a wrong way that made Scan() wrongly return 1
|
||||
app = TestApp().app
|
||||
f1, f2 = [FakeFile('foo') for i in range(2)]
|
||||
f1, f2 = [FakeFile("foo") for i in range(2)]
|
||||
f1.is_ref, f2.is_ref = (False, False)
|
||||
assert not (bool(f1) and bool(f2))
|
||||
add_fake_files_to_directories(app.directories, [f1, f2])
|
||||
app.start_scanning() # no exception
|
||||
app.start_scanning() # no exception
|
||||
|
||||
@mark.skipif("not hasattr(os, 'link')")
|
||||
def test_ignore_hardlink_matches(self, tmpdir):
|
||||
# If the ignore_hardlink_matches option is set, don't match files hardlinking to the same
|
||||
# inode.
|
||||
tmppath = Path(str(tmpdir))
|
||||
tmppath['myfile'].open('w').write('foo')
|
||||
os.link(str(tmppath['myfile']), str(tmppath['hardlink']))
|
||||
tmppath["myfile"].open("w").write("foo")
|
||||
os.link(str(tmppath["myfile"]), str(tmppath["hardlink"]))
|
||||
app = TestApp().app
|
||||
app.directories.add_path(tmppath)
|
||||
app.options['scan_type'] = ScanType.Contents
|
||||
app.options['ignore_hardlink_matches'] = True
|
||||
app.options["scan_type"] = ScanType.Contents
|
||||
app.options["ignore_hardlink_matches"] = True
|
||||
app.start_scanning()
|
||||
eq_(len(app.results.groups), 0)
|
||||
|
||||
@@ -116,27 +129,32 @@ class TestCaseDupeGuru:
|
||||
# making the selected row None. Don't crash when it happens.
|
||||
dgapp = TestApp().app
|
||||
# selected_row is None because there's no result.
|
||||
assert not dgapp.result_table.rename_selected('foo') # no crash
|
||||
assert not dgapp.result_table.rename_selected("foo") # no crash
|
||||
|
||||
|
||||
class TestCaseDupeGuru_clean_empty_dirs:
|
||||
def pytest_funcarg__do_setup(self, request):
|
||||
monkeypatch = request.getfuncargvalue('monkeypatch')
|
||||
monkeypatch.setattr(hscommon.util, 'delete_if_empty', log_calls(lambda path, files_to_delete=[]: None))
|
||||
monkeypatch = request.getfuncargvalue("monkeypatch")
|
||||
monkeypatch.setattr(
|
||||
hscommon.util,
|
||||
"delete_if_empty",
|
||||
log_calls(lambda path, files_to_delete=[]: None),
|
||||
)
|
||||
# XXX This monkeypatch is temporary. will be fixed in a better monkeypatcher.
|
||||
monkeypatch.setattr(app, 'delete_if_empty', hscommon.util.delete_if_empty)
|
||||
monkeypatch.setattr(app, "delete_if_empty", hscommon.util.delete_if_empty)
|
||||
self.app = TestApp().app
|
||||
|
||||
def test_option_off(self, do_setup):
|
||||
self.app.clean_empty_dirs(Path('/foo/bar'))
|
||||
self.app.clean_empty_dirs(Path("/foo/bar"))
|
||||
eq_(0, len(hscommon.util.delete_if_empty.calls))
|
||||
|
||||
def test_option_on(self, do_setup):
|
||||
self.app.options['clean_empty_dirs'] = True
|
||||
self.app.clean_empty_dirs(Path('/foo/bar'))
|
||||
self.app.options["clean_empty_dirs"] = True
|
||||
self.app.clean_empty_dirs(Path("/foo/bar"))
|
||||
calls = hscommon.util.delete_if_empty.calls
|
||||
eq_(1, len(calls))
|
||||
eq_(Path('/foo/bar'), calls[0]['path'])
|
||||
eq_(['.DS_Store'], calls[0]['files_to_delete'])
|
||||
eq_(Path("/foo/bar"), calls[0]["path"])
|
||||
eq_([".DS_Store"], calls[0]["files_to_delete"])
|
||||
|
||||
def test_recurse_up(self, do_setup, monkeypatch):
|
||||
# delete_if_empty must be recursively called up in the path until it returns False
|
||||
@@ -144,16 +162,16 @@ class TestCaseDupeGuru_clean_empty_dirs:
|
||||
def mock_delete_if_empty(path, files_to_delete=[]):
|
||||
return len(path) > 1
|
||||
|
||||
monkeypatch.setattr(hscommon.util, 'delete_if_empty', mock_delete_if_empty)
|
||||
monkeypatch.setattr(hscommon.util, "delete_if_empty", mock_delete_if_empty)
|
||||
# XXX This monkeypatch is temporary. will be fixed in a better monkeypatcher.
|
||||
monkeypatch.setattr(app, 'delete_if_empty', mock_delete_if_empty)
|
||||
self.app.options['clean_empty_dirs'] = True
|
||||
self.app.clean_empty_dirs(Path('not-empty/empty/empty'))
|
||||
monkeypatch.setattr(app, "delete_if_empty", mock_delete_if_empty)
|
||||
self.app.options["clean_empty_dirs"] = True
|
||||
self.app.clean_empty_dirs(Path("not-empty/empty/empty"))
|
||||
calls = hscommon.util.delete_if_empty.calls
|
||||
eq_(3, len(calls))
|
||||
eq_(Path('not-empty/empty/empty'), calls[0]['path'])
|
||||
eq_(Path('not-empty/empty'), calls[1]['path'])
|
||||
eq_(Path('not-empty'), calls[2]['path'])
|
||||
eq_(Path("not-empty/empty/empty"), calls[0]["path"])
|
||||
eq_(Path("not-empty/empty"), calls[1]["path"])
|
||||
eq_(Path("not-empty"), calls[2]["path"])
|
||||
|
||||
|
||||
class TestCaseDupeGuruWithResults:
|
||||
@@ -166,10 +184,10 @@ class TestCaseDupeGuruWithResults:
|
||||
self.dtree = app.dtree
|
||||
self.rtable = app.rtable
|
||||
self.rtable.refresh()
|
||||
tmpdir = request.getfuncargvalue('tmpdir')
|
||||
tmpdir = request.getfuncargvalue("tmpdir")
|
||||
tmppath = Path(str(tmpdir))
|
||||
tmppath['foo'].mkdir()
|
||||
tmppath['bar'].mkdir()
|
||||
tmppath["foo"].mkdir()
|
||||
tmppath["bar"].mkdir()
|
||||
self.app.directories.add_path(tmppath)
|
||||
|
||||
def test_GetObjects(self, do_setup):
|
||||
@@ -187,8 +205,8 @@ class TestCaseDupeGuruWithResults:
|
||||
|
||||
def test_GetObjects_after_sort(self, do_setup):
|
||||
objects = self.objects
|
||||
groups = self.groups[:] # we need an un-sorted reference
|
||||
self.rtable.sort('name', False)
|
||||
groups = self.groups[:] # we need an un-sorted reference
|
||||
self.rtable.sort("name", False)
|
||||
r = self.rtable[1]
|
||||
assert r._group is groups[1]
|
||||
assert r._dupe is objects[4]
|
||||
@@ -198,7 +216,7 @@ class TestCaseDupeGuruWithResults:
|
||||
self.rtable.select([1, 2, 3])
|
||||
self.app.remove_selected()
|
||||
# The first 2 dupes have been removed. The 3rd one is a ref. it stays there, in first pos.
|
||||
eq_(self.rtable.selected_indexes, [1]) # no exception
|
||||
eq_(self.rtable.selected_indexes, [1]) # no exception
|
||||
|
||||
def test_selectResultNodePaths(self, do_setup):
|
||||
app = self.app
|
||||
@@ -220,9 +238,9 @@ class TestCaseDupeGuruWithResults:
|
||||
def test_selectResultNodePaths_after_sort(self, do_setup):
|
||||
app = self.app
|
||||
objects = self.objects
|
||||
groups = self.groups[:] #To keep the old order in memory
|
||||
self.rtable.sort('name', False) #0
|
||||
#Now, the group order is supposed to be reversed
|
||||
groups = self.groups[:] # To keep the old order in memory
|
||||
self.rtable.sort("name", False) # 0
|
||||
# Now, the group order is supposed to be reversed
|
||||
self.rtable.select([1, 2, 3])
|
||||
eq_(len(app.selected_dupes), 3)
|
||||
assert app.selected_dupes[0] is objects[4]
|
||||
@@ -242,13 +260,13 @@ class TestCaseDupeGuruWithResults:
|
||||
self.rtable.power_marker = True
|
||||
self.rtable.select([0, 1, 2])
|
||||
app.remove_selected()
|
||||
eq_(self.rtable.selected_indexes, []) # no exception
|
||||
eq_(self.rtable.selected_indexes, []) # no exception
|
||||
|
||||
def test_selectPowerMarkerRows_after_sort(self, do_setup):
|
||||
app = self.app
|
||||
objects = self.objects
|
||||
self.rtable.power_marker = True
|
||||
self.rtable.sort('name', False)
|
||||
self.rtable.sort("name", False)
|
||||
self.rtable.select([0, 1, 2])
|
||||
eq_(len(app.selected_dupes), 3)
|
||||
assert app.selected_dupes[0] is objects[4]
|
||||
@@ -285,11 +303,11 @@ class TestCaseDupeGuruWithResults:
|
||||
|
||||
def test_refreshDetailsWithSelected(self, do_setup):
|
||||
self.rtable.select([1, 4])
|
||||
eq_(self.dpanel.row(0), ('Filename', 'bar bleh', 'foo bar'))
|
||||
self.dpanel.view.check_gui_calls(['refresh'])
|
||||
eq_(self.dpanel.row(0), ("Filename", "bar bleh", "foo bar"))
|
||||
self.dpanel.view.check_gui_calls(["refresh"])
|
||||
self.rtable.select([])
|
||||
eq_(self.dpanel.row(0), ('Filename', '---', '---'))
|
||||
self.dpanel.view.check_gui_calls(['refresh'])
|
||||
eq_(self.dpanel.row(0), ("Filename", "---", "---"))
|
||||
self.dpanel.view.check_gui_calls(["refresh"])
|
||||
|
||||
def test_makeSelectedReference(self, do_setup):
|
||||
app = self.app
|
||||
@@ -300,12 +318,14 @@ class TestCaseDupeGuruWithResults:
|
||||
assert groups[0].ref is objects[1]
|
||||
assert groups[1].ref is objects[4]
|
||||
|
||||
def test_makeSelectedReference_by_selecting_two_dupes_in_the_same_group(self, do_setup):
|
||||
def test_makeSelectedReference_by_selecting_two_dupes_in_the_same_group(
|
||||
self, do_setup
|
||||
):
|
||||
app = self.app
|
||||
objects = self.objects
|
||||
groups = self.groups
|
||||
self.rtable.select([1, 2, 4])
|
||||
#Only [0, 0] and [1, 0] must go ref, not [0, 1] because it is a part of the same group
|
||||
# Only [0, 0] and [1, 0] must go ref, not [0, 1] because it is a part of the same group
|
||||
app.make_selected_reference()
|
||||
assert groups[0].ref is objects[1]
|
||||
assert groups[1].ref is objects[4]
|
||||
@@ -314,7 +334,7 @@ class TestCaseDupeGuruWithResults:
|
||||
app = self.app
|
||||
self.rtable.select([1, 4])
|
||||
app.remove_selected()
|
||||
eq_(len(app.results.dupes), 1) # the first path is now selected
|
||||
eq_(len(app.results.dupes), 1) # the first path is now selected
|
||||
app.remove_selected()
|
||||
eq_(len(app.results.dupes), 0)
|
||||
|
||||
@@ -336,27 +356,27 @@ class TestCaseDupeGuruWithResults:
|
||||
|
||||
def test_addDirectory_does_not_exist(self, do_setup):
|
||||
app = self.app
|
||||
app.add_directory('/does_not_exist')
|
||||
app.add_directory("/does_not_exist")
|
||||
eq_(len(app.view.messages), 1)
|
||||
assert "exist" in app.view.messages[0]
|
||||
|
||||
def test_ignore(self, do_setup):
|
||||
app = self.app
|
||||
self.rtable.select([4]) #The dupe of the second, 2 sized group
|
||||
self.rtable.select([4]) # The dupe of the second, 2 sized group
|
||||
app.add_selected_to_ignore_list()
|
||||
eq_(len(app.ignore_list), 1)
|
||||
self.rtable.select([1]) #first dupe of the 3 dupes group
|
||||
self.rtable.select([1]) # first dupe of the 3 dupes group
|
||||
app.add_selected_to_ignore_list()
|
||||
#BOTH the ref and the other dupe should have been added
|
||||
# BOTH the ref and the other dupe should have been added
|
||||
eq_(len(app.ignore_list), 3)
|
||||
|
||||
def test_purgeIgnoreList(self, do_setup, tmpdir):
|
||||
app = self.app
|
||||
p1 = str(tmpdir.join('file1'))
|
||||
p2 = str(tmpdir.join('file2'))
|
||||
open(p1, 'w').close()
|
||||
open(p2, 'w').close()
|
||||
dne = '/does_not_exist'
|
||||
p1 = str(tmpdir.join("file1"))
|
||||
p2 = str(tmpdir.join("file2"))
|
||||
open(p1, "w").close()
|
||||
open(p2, "w").close()
|
||||
dne = "/does_not_exist"
|
||||
app.ignore_list.Ignore(dne, p1)
|
||||
app.ignore_list.Ignore(p2, dne)
|
||||
app.ignore_list.Ignore(p1, p2)
|
||||
@@ -381,9 +401,11 @@ class TestCaseDupeGuruWithResults:
|
||||
# When doing a scan with results being present prior to the scan, correctly invalidate the
|
||||
# results table.
|
||||
app = self.app
|
||||
app.JOB = Job(1, lambda *args, **kw: False) # Cancels the task
|
||||
add_fake_files_to_directories(app.directories, self.objects) # We want the scan to at least start
|
||||
app.start_scanning() # will be cancelled immediately
|
||||
app.JOB = Job(1, lambda *args, **kw: False) # Cancels the task
|
||||
add_fake_files_to_directories(
|
||||
app.directories, self.objects
|
||||
) # We want the scan to at least start
|
||||
app.start_scanning() # will be cancelled immediately
|
||||
eq_(len(app.result_table), 0)
|
||||
|
||||
def test_selected_dupes_after_removal(self, do_setup):
|
||||
@@ -401,21 +423,21 @@ class TestCaseDupeGuruWithResults:
|
||||
# Ref #238
|
||||
self.rtable.delta_values = True
|
||||
self.rtable.power_marker = True
|
||||
self.rtable.sort('dupe_count', False)
|
||||
self.rtable.sort("dupe_count", False)
|
||||
# don't crash
|
||||
self.rtable.sort('percentage', False)
|
||||
self.rtable.sort("percentage", False)
|
||||
# don't crash
|
||||
|
||||
|
||||
class TestCaseDupeGuru_renameSelected:
|
||||
def pytest_funcarg__do_setup(self, request):
|
||||
tmpdir = request.getfuncargvalue('tmpdir')
|
||||
tmpdir = request.getfuncargvalue("tmpdir")
|
||||
p = Path(str(tmpdir))
|
||||
fp = open(str(p['foo bar 1']), mode='w')
|
||||
fp = open(str(p["foo bar 1"]), mode="w")
|
||||
fp.close()
|
||||
fp = open(str(p['foo bar 2']), mode='w')
|
||||
fp = open(str(p["foo bar 2"]), mode="w")
|
||||
fp.close()
|
||||
fp = open(str(p['foo bar 3']), mode='w')
|
||||
fp = open(str(p["foo bar 3"]), mode="w")
|
||||
fp.close()
|
||||
files = fs.get_files(p)
|
||||
for f in files:
|
||||
@@ -437,46 +459,46 @@ class TestCaseDupeGuru_renameSelected:
|
||||
app = self.app
|
||||
g = self.groups[0]
|
||||
self.rtable.select([1])
|
||||
assert app.rename_selected('renamed')
|
||||
assert app.rename_selected("renamed")
|
||||
names = [p.name for p in self.p.listdir()]
|
||||
assert 'renamed' in names
|
||||
assert 'foo bar 2' not in names
|
||||
eq_(g.dupes[0].name, 'renamed')
|
||||
assert "renamed" in names
|
||||
assert "foo bar 2" not in names
|
||||
eq_(g.dupes[0].name, "renamed")
|
||||
|
||||
def test_none_selected(self, do_setup, monkeypatch):
|
||||
app = self.app
|
||||
g = self.groups[0]
|
||||
self.rtable.select([])
|
||||
monkeypatch.setattr(logging, 'warning', log_calls(lambda msg: None))
|
||||
assert not app.rename_selected('renamed')
|
||||
msg = logging.warning.calls[0]['msg']
|
||||
eq_('dupeGuru Warning: list index out of range', msg)
|
||||
monkeypatch.setattr(logging, "warning", log_calls(lambda msg: None))
|
||||
assert not app.rename_selected("renamed")
|
||||
msg = logging.warning.calls[0]["msg"]
|
||||
eq_("dupeGuru Warning: list index out of range", msg)
|
||||
names = [p.name for p in self.p.listdir()]
|
||||
assert 'renamed' not in names
|
||||
assert 'foo bar 2' in names
|
||||
eq_(g.dupes[0].name, 'foo bar 2')
|
||||
assert "renamed" not in names
|
||||
assert "foo bar 2" in names
|
||||
eq_(g.dupes[0].name, "foo bar 2")
|
||||
|
||||
def test_name_already_exists(self, do_setup, monkeypatch):
|
||||
app = self.app
|
||||
g = self.groups[0]
|
||||
self.rtable.select([1])
|
||||
monkeypatch.setattr(logging, 'warning', log_calls(lambda msg: None))
|
||||
assert not app.rename_selected('foo bar 1')
|
||||
msg = logging.warning.calls[0]['msg']
|
||||
assert msg.startswith('dupeGuru Warning: \'foo bar 1\' already exists in')
|
||||
monkeypatch.setattr(logging, "warning", log_calls(lambda msg: None))
|
||||
assert not app.rename_selected("foo bar 1")
|
||||
msg = logging.warning.calls[0]["msg"]
|
||||
assert msg.startswith("dupeGuru Warning: 'foo bar 1' already exists in")
|
||||
names = [p.name for p in self.p.listdir()]
|
||||
assert 'foo bar 1' in names
|
||||
assert 'foo bar 2' in names
|
||||
eq_(g.dupes[0].name, 'foo bar 2')
|
||||
assert "foo bar 1" in names
|
||||
assert "foo bar 2" in names
|
||||
eq_(g.dupes[0].name, "foo bar 2")
|
||||
|
||||
|
||||
class TestAppWithDirectoriesInTree:
|
||||
def pytest_funcarg__do_setup(self, request):
|
||||
tmpdir = request.getfuncargvalue('tmpdir')
|
||||
tmpdir = request.getfuncargvalue("tmpdir")
|
||||
p = Path(str(tmpdir))
|
||||
p['sub1'].mkdir()
|
||||
p['sub2'].mkdir()
|
||||
p['sub3'].mkdir()
|
||||
p["sub1"].mkdir()
|
||||
p["sub2"].mkdir()
|
||||
p["sub3"].mkdir()
|
||||
app = TestApp()
|
||||
self.app = app.app
|
||||
self.dtree = app.dtree
|
||||
@@ -487,12 +509,11 @@ class TestAppWithDirectoriesInTree:
|
||||
# Setting a node state to something also affect subnodes. These subnodes must be correctly
|
||||
# refreshed.
|
||||
node = self.dtree[0]
|
||||
eq_(len(node), 3) # a len() call is required for subnodes to be loaded
|
||||
eq_(len(node), 3) # a len() call is required for subnodes to be loaded
|
||||
subnode = node[0]
|
||||
node.state = 1 # the state property is a state index
|
||||
node.state = 1 # the state property is a state index
|
||||
node = self.dtree[0]
|
||||
eq_(len(node), 3)
|
||||
subnode = node[0]
|
||||
eq_(subnode.state, 1)
|
||||
self.dtree.view.check_gui_calls(['refresh_states'])
|
||||
|
||||
self.dtree.view.check_gui_calls(["refresh_states"])
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from hscommon.testutil import TestApp as TestAppBase, CallLogger, eq_, with_app # noqa
|
||||
from hscommon.testutil import TestApp as TestAppBase, CallLogger, eq_, with_app # noqa
|
||||
from hscommon.path import Path
|
||||
from hscommon.util import get_file_ext, format_size
|
||||
from hscommon.gui.column import Column
|
||||
@@ -17,6 +17,7 @@ from ..app import DupeGuru as DupeGuruBase
|
||||
from ..gui.result_table import ResultTable as ResultTableBase
|
||||
from ..gui.prioritize_dialog import PrioritizeDialog
|
||||
|
||||
|
||||
class DupeGuruView:
|
||||
JOB = nulljob
|
||||
|
||||
@@ -39,28 +40,32 @@ class DupeGuruView:
|
||||
self.messages.append(msg)
|
||||
|
||||
def ask_yes_no(self, prompt):
|
||||
return True # always answer yes
|
||||
return True # always answer yes
|
||||
|
||||
def create_results_window(self):
|
||||
pass
|
||||
|
||||
|
||||
class ResultTable(ResultTableBase):
|
||||
COLUMNS = [
|
||||
Column('marked', ''),
|
||||
Column('name', 'Filename'),
|
||||
Column('folder_path', 'Directory'),
|
||||
Column('size', 'Size (KB)'),
|
||||
Column('extension', 'Kind'),
|
||||
Column("marked", ""),
|
||||
Column("name", "Filename"),
|
||||
Column("folder_path", "Directory"),
|
||||
Column("size", "Size (KB)"),
|
||||
Column("extension", "Kind"),
|
||||
]
|
||||
DELTA_COLUMNS = {'size', }
|
||||
DELTA_COLUMNS = {
|
||||
"size",
|
||||
}
|
||||
|
||||
|
||||
class DupeGuru(DupeGuruBase):
|
||||
NAME = 'dupeGuru'
|
||||
METADATA_TO_READ = ['size']
|
||||
NAME = "dupeGuru"
|
||||
METADATA_TO_READ = ["size"]
|
||||
|
||||
def __init__(self):
|
||||
DupeGuruBase.__init__(self, DupeGuruView())
|
||||
self.appdata = '/tmp'
|
||||
self.appdata = "/tmp"
|
||||
self._recreate_result_table()
|
||||
|
||||
def _prioritization_categories(self):
|
||||
@@ -78,7 +83,7 @@ class NamedObject:
|
||||
def __init__(self, name="foobar", with_words=False, size=1, folder=None):
|
||||
self.name = name
|
||||
if folder is None:
|
||||
folder = 'basepath'
|
||||
folder = "basepath"
|
||||
self._folder = Path(folder)
|
||||
self.size = size
|
||||
self.md5partial = name
|
||||
@@ -88,7 +93,7 @@ class NamedObject:
|
||||
self.is_ref = False
|
||||
|
||||
def __bool__(self):
|
||||
return False #Make sure that operations are made correctly when the bool value of files is false.
|
||||
return False # Make sure that operations are made correctly when the bool value of files is false.
|
||||
|
||||
def get_display_info(self, group, delta):
|
||||
size = self.size
|
||||
@@ -97,10 +102,10 @@ class NamedObject:
|
||||
r = group.ref
|
||||
size -= r.size
|
||||
return {
|
||||
'name': self.name,
|
||||
'folder_path': str(self.folder_path),
|
||||
'size': format_size(size, 0, 1, False),
|
||||
'extension': self.extension if hasattr(self, 'extension') else '---',
|
||||
"name": self.name,
|
||||
"folder_path": str(self.folder_path),
|
||||
"size": format_size(size, 0, 1, False),
|
||||
"extension": self.extension if hasattr(self, "extension") else "---",
|
||||
}
|
||||
|
||||
@property
|
||||
@@ -115,6 +120,7 @@ class NamedObject:
|
||||
def extension(self):
|
||||
return get_file_ext(self.name)
|
||||
|
||||
|
||||
# Returns a group set that looks like that:
|
||||
# "foo bar" (1)
|
||||
# "bar bleh" (1024)
|
||||
@@ -127,21 +133,24 @@ def GetTestGroups():
|
||||
NamedObject("bar bleh"),
|
||||
NamedObject("foo bleh"),
|
||||
NamedObject("ibabtu"),
|
||||
NamedObject("ibabtu")
|
||||
NamedObject("ibabtu"),
|
||||
]
|
||||
objects[1].size = 1024
|
||||
matches = engine.getmatches(objects) #we should have 5 matches
|
||||
groups = engine.get_groups(matches) #We should have 2 groups
|
||||
matches = engine.getmatches(objects) # we should have 5 matches
|
||||
groups = engine.get_groups(matches) # We should have 2 groups
|
||||
for g in groups:
|
||||
g.prioritize(lambda x: objects.index(x)) #We want the dupes to be in the same order as the list is
|
||||
groups.sort(key=len, reverse=True) # We want the group with 3 members to be first.
|
||||
g.prioritize(
|
||||
lambda x: objects.index(x)
|
||||
) # We want the dupes to be in the same order as the list is
|
||||
groups.sort(key=len, reverse=True) # We want the group with 3 members to be first.
|
||||
return (objects, matches, groups)
|
||||
|
||||
|
||||
class TestApp(TestAppBase):
|
||||
def __init__(self):
|
||||
def link_gui(gui):
|
||||
gui.view = self.make_logger()
|
||||
if hasattr(gui, 'columns'): # tables
|
||||
if hasattr(gui, "columns"): # tables
|
||||
gui.columns.view = self.make_logger()
|
||||
return gui
|
||||
|
||||
@@ -166,7 +175,7 @@ class TestApp(TestAppBase):
|
||||
# rtable is a property because its instance can be replaced during execution
|
||||
return self.app.result_table
|
||||
|
||||
#--- Helpers
|
||||
# --- Helpers
|
||||
def select_pri_criterion(self, name):
|
||||
# Select a main prioritize criterion by name instead of by index. Makes tests more
|
||||
# maintainable.
|
||||
|
||||
@@ -13,13 +13,18 @@ try:
|
||||
except ImportError:
|
||||
skip("Can't import the block module, probably hasn't been compiled.")
|
||||
|
||||
def my_avgdiff(first, second, limit=768, min_iter=3): # this is so I don't have to re-write every call
|
||||
|
||||
def my_avgdiff(
|
||||
first, second, limit=768, min_iter=3
|
||||
): # this is so I don't have to re-write every call
|
||||
return avgdiff(first, second, limit, min_iter)
|
||||
|
||||
|
||||
BLACK = (0, 0, 0)
|
||||
RED = (0xff, 0, 0)
|
||||
GREEN = (0, 0xff, 0)
|
||||
BLUE = (0, 0, 0xff)
|
||||
RED = (0xFF, 0, 0)
|
||||
GREEN = (0, 0xFF, 0)
|
||||
BLUE = (0, 0, 0xFF)
|
||||
|
||||
|
||||
class FakeImage:
|
||||
def __init__(self, size, data):
|
||||
@@ -37,16 +42,20 @@ class FakeImage:
|
||||
pixels.append(pixel)
|
||||
return FakeImage((box[2] - box[0], box[3] - box[1]), pixels)
|
||||
|
||||
|
||||
def empty():
|
||||
return FakeImage((0, 0), [])
|
||||
|
||||
def single_pixel(): #one red pixel
|
||||
return FakeImage((1, 1), [(0xff, 0, 0)])
|
||||
|
||||
def single_pixel(): # one red pixel
|
||||
return FakeImage((1, 1), [(0xFF, 0, 0)])
|
||||
|
||||
|
||||
def four_pixels():
|
||||
pixels = [RED, (0, 0x80, 0xff), (0x80, 0, 0), (0, 0x40, 0x80)]
|
||||
pixels = [RED, (0, 0x80, 0xFF), (0x80, 0, 0), (0, 0x40, 0x80)]
|
||||
return FakeImage((2, 2), pixels)
|
||||
|
||||
|
||||
class TestCasegetblock:
|
||||
def test_single_pixel(self):
|
||||
im = single_pixel()
|
||||
@@ -60,9 +69,9 @@ class TestCasegetblock:
|
||||
def test_four_pixels(self):
|
||||
im = four_pixels()
|
||||
[b] = getblocks2(im, 1)
|
||||
meanred = (0xff + 0x80) // 4
|
||||
meanred = (0xFF + 0x80) // 4
|
||||
meangreen = (0x80 + 0x40) // 4
|
||||
meanblue = (0xff + 0x80) // 4
|
||||
meanblue = (0xFF + 0x80) // 4
|
||||
eq_((meanred, meangreen, meanblue), b)
|
||||
|
||||
|
||||
@@ -158,6 +167,7 @@ class TestCasegetblock:
|
||||
# eq_(BLACK, blocks[3])
|
||||
#
|
||||
|
||||
|
||||
class TestCasegetblocks2:
|
||||
def test_empty_image(self):
|
||||
im = empty()
|
||||
@@ -169,9 +179,9 @@ class TestCasegetblocks2:
|
||||
blocks = getblocks2(im, 1)
|
||||
eq_(1, len(blocks))
|
||||
block = blocks[0]
|
||||
meanred = (0xff + 0x80) // 4
|
||||
meanred = (0xFF + 0x80) // 4
|
||||
meangreen = (0x80 + 0x40) // 4
|
||||
meanblue = (0xff + 0x80) // 4
|
||||
meanblue = (0xFF + 0x80) // 4
|
||||
eq_((meanred, meangreen, meanblue), block)
|
||||
|
||||
def test_four_blocks_all_black(self):
|
||||
@@ -225,25 +235,25 @@ class TestCaseavgdiff:
|
||||
my_avgdiff([b, b], [b])
|
||||
|
||||
def test_first_arg_is_empty_but_not_second(self):
|
||||
#Don't return 0 (as when the 2 lists are empty), raise!
|
||||
# Don't return 0 (as when the 2 lists are empty), raise!
|
||||
b = (0, 0, 0)
|
||||
with raises(DifferentBlockCountError):
|
||||
my_avgdiff([], [b])
|
||||
|
||||
def test_limit(self):
|
||||
ref = (0, 0, 0)
|
||||
b1 = (10, 10, 10) #avg 30
|
||||
b2 = (20, 20, 20) #avg 45
|
||||
b3 = (30, 30, 30) #avg 60
|
||||
b1 = (10, 10, 10) # avg 30
|
||||
b2 = (20, 20, 20) # avg 45
|
||||
b3 = (30, 30, 30) # avg 60
|
||||
blocks1 = [ref, ref, ref]
|
||||
blocks2 = [b1, b2, b3]
|
||||
eq_(45, my_avgdiff(blocks1, blocks2, 44))
|
||||
|
||||
def test_min_iterations(self):
|
||||
ref = (0, 0, 0)
|
||||
b1 = (10, 10, 10) #avg 30
|
||||
b2 = (20, 20, 20) #avg 45
|
||||
b3 = (10, 10, 10) #avg 40
|
||||
b1 = (10, 10, 10) # avg 30
|
||||
b2 = (20, 20, 20) # avg 45
|
||||
b3 = (10, 10, 10) # avg 40
|
||||
blocks1 = [ref, ref, ref]
|
||||
blocks2 = [b1, b2, b3]
|
||||
eq_(40, my_avgdiff(blocks1, blocks2, 45 - 1, 3))
|
||||
|
||||
@@ -16,34 +16,35 @@ try:
|
||||
except ImportError:
|
||||
skip("Can't import the cache module, probably hasn't been compiled.")
|
||||
|
||||
|
||||
class TestCasecolors_to_string:
|
||||
def test_no_color(self):
|
||||
eq_('', colors_to_string([]))
|
||||
eq_("", colors_to_string([]))
|
||||
|
||||
def test_single_color(self):
|
||||
eq_('000000', colors_to_string([(0, 0, 0)]))
|
||||
eq_('010101', colors_to_string([(1, 1, 1)]))
|
||||
eq_('0a141e', colors_to_string([(10, 20, 30)]))
|
||||
eq_("000000", colors_to_string([(0, 0, 0)]))
|
||||
eq_("010101", colors_to_string([(1, 1, 1)]))
|
||||
eq_("0a141e", colors_to_string([(10, 20, 30)]))
|
||||
|
||||
def test_two_colors(self):
|
||||
eq_('000102030405', colors_to_string([(0, 1, 2), (3, 4, 5)]))
|
||||
eq_("000102030405", colors_to_string([(0, 1, 2), (3, 4, 5)]))
|
||||
|
||||
|
||||
class TestCasestring_to_colors:
|
||||
def test_empty(self):
|
||||
eq_([], string_to_colors(''))
|
||||
eq_([], string_to_colors(""))
|
||||
|
||||
def test_single_color(self):
|
||||
eq_([(0, 0, 0)], string_to_colors('000000'))
|
||||
eq_([(2, 3, 4)], string_to_colors('020304'))
|
||||
eq_([(10, 20, 30)], string_to_colors('0a141e'))
|
||||
eq_([(0, 0, 0)], string_to_colors("000000"))
|
||||
eq_([(2, 3, 4)], string_to_colors("020304"))
|
||||
eq_([(10, 20, 30)], string_to_colors("0a141e"))
|
||||
|
||||
def test_two_colors(self):
|
||||
eq_([(10, 20, 30), (40, 50, 60)], string_to_colors('0a141e28323c'))
|
||||
eq_([(10, 20, 30), (40, 50, 60)], string_to_colors("0a141e28323c"))
|
||||
|
||||
def test_incomplete_color(self):
|
||||
# don't return anything if it's not a complete color
|
||||
eq_([], string_to_colors('102'))
|
||||
eq_([], string_to_colors("102"))
|
||||
|
||||
|
||||
class BaseTestCaseCache:
|
||||
@@ -54,58 +55,58 @@ class BaseTestCaseCache:
|
||||
c = self.get_cache()
|
||||
eq_(0, len(c))
|
||||
with raises(KeyError):
|
||||
c['foo']
|
||||
c["foo"]
|
||||
|
||||
def test_set_then_retrieve_blocks(self):
|
||||
c = self.get_cache()
|
||||
b = [(0, 0, 0), (1, 2, 3)]
|
||||
c['foo'] = b
|
||||
eq_(b, c['foo'])
|
||||
c["foo"] = b
|
||||
eq_(b, c["foo"])
|
||||
|
||||
def test_delitem(self):
|
||||
c = self.get_cache()
|
||||
c['foo'] = ''
|
||||
del c['foo']
|
||||
assert 'foo' not in c
|
||||
c["foo"] = ""
|
||||
del c["foo"]
|
||||
assert "foo" not in c
|
||||
with raises(KeyError):
|
||||
del c['foo']
|
||||
del c["foo"]
|
||||
|
||||
def test_persistance(self, tmpdir):
|
||||
DBNAME = tmpdir.join('hstest.db')
|
||||
DBNAME = tmpdir.join("hstest.db")
|
||||
c = self.get_cache(str(DBNAME))
|
||||
c['foo'] = [(1, 2, 3)]
|
||||
c["foo"] = [(1, 2, 3)]
|
||||
del c
|
||||
c = self.get_cache(str(DBNAME))
|
||||
eq_([(1, 2, 3)], c['foo'])
|
||||
eq_([(1, 2, 3)], c["foo"])
|
||||
|
||||
def test_filter(self):
|
||||
c = self.get_cache()
|
||||
c['foo'] = ''
|
||||
c['bar'] = ''
|
||||
c['baz'] = ''
|
||||
c.filter(lambda p: p != 'bar') #only 'bar' is removed
|
||||
c["foo"] = ""
|
||||
c["bar"] = ""
|
||||
c["baz"] = ""
|
||||
c.filter(lambda p: p != "bar") # only 'bar' is removed
|
||||
eq_(2, len(c))
|
||||
assert 'foo' in c
|
||||
assert 'baz' in c
|
||||
assert 'bar' not in c
|
||||
assert "foo" in c
|
||||
assert "baz" in c
|
||||
assert "bar" not in c
|
||||
|
||||
def test_clear(self):
|
||||
c = self.get_cache()
|
||||
c['foo'] = ''
|
||||
c['bar'] = ''
|
||||
c['baz'] = ''
|
||||
c["foo"] = ""
|
||||
c["bar"] = ""
|
||||
c["baz"] = ""
|
||||
c.clear()
|
||||
eq_(0, len(c))
|
||||
assert 'foo' not in c
|
||||
assert 'baz' not in c
|
||||
assert 'bar' not in c
|
||||
assert "foo" not in c
|
||||
assert "baz" not in c
|
||||
assert "bar" not in c
|
||||
|
||||
def test_by_id(self):
|
||||
# it's possible to use the cache by referring to the files by their row_id
|
||||
c = self.get_cache()
|
||||
b = [(0, 0, 0), (1, 2, 3)]
|
||||
c['foo'] = b
|
||||
foo_id = c.get_id('foo')
|
||||
c["foo"] = b
|
||||
foo_id = c.get_id("foo")
|
||||
eq_(c[foo_id], b)
|
||||
|
||||
|
||||
@@ -120,16 +121,16 @@ class TestCaseSqliteCache(BaseTestCaseCache):
|
||||
# If we don't do this monkeypatching, we get a weird exception about trying to flush a
|
||||
# closed file. I've tried setting logging level and stuff, but nothing worked. So, there we
|
||||
# go, a dirty monkeypatch.
|
||||
monkeypatch.setattr(logging, 'warning', lambda *args, **kw: None)
|
||||
dbname = str(tmpdir.join('foo.db'))
|
||||
fp = open(dbname, 'w')
|
||||
fp.write('invalid sqlite content')
|
||||
monkeypatch.setattr(logging, "warning", lambda *args, **kw: None)
|
||||
dbname = str(tmpdir.join("foo.db"))
|
||||
fp = open(dbname, "w")
|
||||
fp.write("invalid sqlite content")
|
||||
fp.close()
|
||||
c = self.get_cache(dbname) # should not raise a DatabaseError
|
||||
c['foo'] = [(1, 2, 3)]
|
||||
c = self.get_cache(dbname) # should not raise a DatabaseError
|
||||
c["foo"] = [(1, 2, 3)]
|
||||
del c
|
||||
c = self.get_cache(dbname)
|
||||
eq_(c['foo'], [(1, 2, 3)])
|
||||
eq_(c["foo"], [(1, 2, 3)])
|
||||
|
||||
|
||||
class TestCaseShelveCache(BaseTestCaseCache):
|
||||
@@ -161,4 +162,3 @@ class TestCaseCacheSQLEscape:
|
||||
del c["foo'bar"]
|
||||
except KeyError:
|
||||
assert False
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
from hscommon.testutil import pytest_funcarg__app # noqa
|
||||
from hscommon.testutil import pytest_funcarg__app # noqa
|
||||
|
||||
@@ -14,91 +14,105 @@ from hscommon.path import Path
|
||||
from hscommon.testutil import eq_
|
||||
|
||||
from ..fs import File
|
||||
from ..directories import Directories, DirectoryState, AlreadyThereError, InvalidPathError
|
||||
from ..directories import (
|
||||
Directories,
|
||||
DirectoryState,
|
||||
AlreadyThereError,
|
||||
InvalidPathError,
|
||||
)
|
||||
|
||||
|
||||
def create_fake_fs(rootpath):
|
||||
# We have it as a separate function because other units are using it.
|
||||
rootpath = rootpath['fs']
|
||||
rootpath = rootpath["fs"]
|
||||
rootpath.mkdir()
|
||||
rootpath['dir1'].mkdir()
|
||||
rootpath['dir2'].mkdir()
|
||||
rootpath['dir3'].mkdir()
|
||||
fp = rootpath['file1.test'].open('w')
|
||||
fp.write('1')
|
||||
rootpath["dir1"].mkdir()
|
||||
rootpath["dir2"].mkdir()
|
||||
rootpath["dir3"].mkdir()
|
||||
fp = rootpath["file1.test"].open("w")
|
||||
fp.write("1")
|
||||
fp.close()
|
||||
fp = rootpath['file2.test'].open('w')
|
||||
fp.write('12')
|
||||
fp = rootpath["file2.test"].open("w")
|
||||
fp.write("12")
|
||||
fp.close()
|
||||
fp = rootpath['file3.test'].open('w')
|
||||
fp.write('123')
|
||||
fp = rootpath["file3.test"].open("w")
|
||||
fp.write("123")
|
||||
fp.close()
|
||||
fp = rootpath['dir1']['file1.test'].open('w')
|
||||
fp.write('1')
|
||||
fp = rootpath["dir1"]["file1.test"].open("w")
|
||||
fp.write("1")
|
||||
fp.close()
|
||||
fp = rootpath['dir2']['file2.test'].open('w')
|
||||
fp.write('12')
|
||||
fp = rootpath["dir2"]["file2.test"].open("w")
|
||||
fp.write("12")
|
||||
fp.close()
|
||||
fp = rootpath['dir3']['file3.test'].open('w')
|
||||
fp.write('123')
|
||||
fp = rootpath["dir3"]["file3.test"].open("w")
|
||||
fp.write("123")
|
||||
fp.close()
|
||||
return rootpath
|
||||
|
||||
|
||||
testpath = None
|
||||
|
||||
|
||||
def setup_module(module):
|
||||
# In this unit, we have tests depending on two directory structure. One with only one file in it
|
||||
# and another with a more complex structure.
|
||||
testpath = Path(tempfile.mkdtemp())
|
||||
module.testpath = testpath
|
||||
rootpath = testpath['onefile']
|
||||
rootpath = testpath["onefile"]
|
||||
rootpath.mkdir()
|
||||
fp = rootpath['test.txt'].open('w')
|
||||
fp.write('test_data')
|
||||
fp = rootpath["test.txt"].open("w")
|
||||
fp.write("test_data")
|
||||
fp.close()
|
||||
create_fake_fs(testpath)
|
||||
|
||||
|
||||
def teardown_module(module):
|
||||
shutil.rmtree(str(module.testpath))
|
||||
|
||||
|
||||
def test_empty():
|
||||
d = Directories()
|
||||
eq_(len(d), 0)
|
||||
assert 'foobar' not in d
|
||||
assert "foobar" not in d
|
||||
|
||||
|
||||
def test_add_path():
|
||||
d = Directories()
|
||||
p = testpath['onefile']
|
||||
p = testpath["onefile"]
|
||||
d.add_path(p)
|
||||
eq_(1, len(d))
|
||||
assert p in d
|
||||
assert (p['foobar']) in d
|
||||
assert (p["foobar"]) in d
|
||||
assert p.parent() not in d
|
||||
p = testpath['fs']
|
||||
p = testpath["fs"]
|
||||
d.add_path(p)
|
||||
eq_(2, len(d))
|
||||
assert p in d
|
||||
|
||||
|
||||
def test_AddPath_when_path_is_already_there():
|
||||
d = Directories()
|
||||
p = testpath['onefile']
|
||||
p = testpath["onefile"]
|
||||
d.add_path(p)
|
||||
with raises(AlreadyThereError):
|
||||
d.add_path(p)
|
||||
with raises(AlreadyThereError):
|
||||
d.add_path(p['foobar'])
|
||||
d.add_path(p["foobar"])
|
||||
eq_(1, len(d))
|
||||
|
||||
|
||||
def test_add_path_containing_paths_already_there():
|
||||
d = Directories()
|
||||
d.add_path(testpath['onefile'])
|
||||
d.add_path(testpath["onefile"])
|
||||
eq_(1, len(d))
|
||||
d.add_path(testpath)
|
||||
eq_(len(d), 1)
|
||||
eq_(d[0], testpath)
|
||||
|
||||
|
||||
def test_AddPath_non_latin(tmpdir):
|
||||
p = Path(str(tmpdir))
|
||||
to_add = p['unicode\u201a']
|
||||
to_add = p["unicode\u201a"]
|
||||
os.mkdir(str(to_add))
|
||||
d = Directories()
|
||||
try:
|
||||
@@ -106,63 +120,69 @@ def test_AddPath_non_latin(tmpdir):
|
||||
except UnicodeDecodeError:
|
||||
assert False
|
||||
|
||||
|
||||
def test_del():
|
||||
d = Directories()
|
||||
d.add_path(testpath['onefile'])
|
||||
d.add_path(testpath["onefile"])
|
||||
try:
|
||||
del d[1]
|
||||
assert False
|
||||
except IndexError:
|
||||
pass
|
||||
d.add_path(testpath['fs'])
|
||||
d.add_path(testpath["fs"])
|
||||
del d[1]
|
||||
eq_(1, len(d))
|
||||
|
||||
|
||||
def test_states():
|
||||
d = Directories()
|
||||
p = testpath['onefile']
|
||||
p = testpath["onefile"]
|
||||
d.add_path(p)
|
||||
eq_(DirectoryState.Normal, d.get_state(p))
|
||||
d.set_state(p, DirectoryState.Reference)
|
||||
eq_(DirectoryState.Reference, d.get_state(p))
|
||||
eq_(DirectoryState.Reference, d.get_state(p['dir1']))
|
||||
eq_(DirectoryState.Reference, d.get_state(p["dir1"]))
|
||||
eq_(1, len(d.states))
|
||||
eq_(p, list(d.states.keys())[0])
|
||||
eq_(DirectoryState.Reference, d.states[p])
|
||||
|
||||
|
||||
def test_get_state_with_path_not_there():
|
||||
# When the path's not there, just return DirectoryState.Normal
|
||||
d = Directories()
|
||||
d.add_path(testpath['onefile'])
|
||||
d.add_path(testpath["onefile"])
|
||||
eq_(d.get_state(testpath), DirectoryState.Normal)
|
||||
|
||||
|
||||
def test_states_overwritten_when_larger_directory_eat_smaller_ones():
|
||||
# ref #248
|
||||
# When setting the state of a folder, we overwrite previously set states for subfolders.
|
||||
d = Directories()
|
||||
p = testpath['onefile']
|
||||
p = testpath["onefile"]
|
||||
d.add_path(p)
|
||||
d.set_state(p, DirectoryState.Excluded)
|
||||
d.add_path(testpath)
|
||||
d.set_state(testpath, DirectoryState.Reference)
|
||||
eq_(d.get_state(p), DirectoryState.Reference)
|
||||
eq_(d.get_state(p['dir1']), DirectoryState.Reference)
|
||||
eq_(d.get_state(p["dir1"]), DirectoryState.Reference)
|
||||
eq_(d.get_state(testpath), DirectoryState.Reference)
|
||||
|
||||
|
||||
def test_get_files():
|
||||
d = Directories()
|
||||
p = testpath['fs']
|
||||
p = testpath["fs"]
|
||||
d.add_path(p)
|
||||
d.set_state(p['dir1'], DirectoryState.Reference)
|
||||
d.set_state(p['dir2'], DirectoryState.Excluded)
|
||||
d.set_state(p["dir1"], DirectoryState.Reference)
|
||||
d.set_state(p["dir2"], DirectoryState.Excluded)
|
||||
files = list(d.get_files())
|
||||
eq_(5, len(files))
|
||||
for f in files:
|
||||
if f.path.parent() == p['dir1']:
|
||||
if f.path.parent() == p["dir1"]:
|
||||
assert f.is_ref
|
||||
else:
|
||||
assert not f.is_ref
|
||||
|
||||
|
||||
def test_get_files_with_folders():
|
||||
# When fileclasses handle folders, return them and stop recursing!
|
||||
class FakeFile(File):
|
||||
@@ -171,106 +191,115 @@ def test_get_files_with_folders():
|
||||
return True
|
||||
|
||||
d = Directories()
|
||||
p = testpath['fs']
|
||||
p = testpath["fs"]
|
||||
d.add_path(p)
|
||||
files = list(d.get_files(fileclasses=[FakeFile]))
|
||||
# We have the 3 root files and the 3 root dirs
|
||||
eq_(6, len(files))
|
||||
|
||||
|
||||
def test_get_folders():
|
||||
d = Directories()
|
||||
p = testpath['fs']
|
||||
p = testpath["fs"]
|
||||
d.add_path(p)
|
||||
d.set_state(p['dir1'], DirectoryState.Reference)
|
||||
d.set_state(p['dir2'], DirectoryState.Excluded)
|
||||
d.set_state(p["dir1"], DirectoryState.Reference)
|
||||
d.set_state(p["dir2"], DirectoryState.Excluded)
|
||||
folders = list(d.get_folders())
|
||||
eq_(len(folders), 3)
|
||||
ref = [f for f in folders if f.is_ref]
|
||||
not_ref = [f for f in folders if not f.is_ref]
|
||||
eq_(len(ref), 1)
|
||||
eq_(ref[0].path, p['dir1'])
|
||||
eq_(ref[0].path, p["dir1"])
|
||||
eq_(len(not_ref), 2)
|
||||
eq_(ref[0].size, 1)
|
||||
|
||||
|
||||
def test_get_files_with_inherited_exclusion():
|
||||
d = Directories()
|
||||
p = testpath['onefile']
|
||||
p = testpath["onefile"]
|
||||
d.add_path(p)
|
||||
d.set_state(p, DirectoryState.Excluded)
|
||||
eq_([], list(d.get_files()))
|
||||
|
||||
|
||||
def test_save_and_load(tmpdir):
|
||||
d1 = Directories()
|
||||
d2 = Directories()
|
||||
p1 = Path(str(tmpdir.join('p1')))
|
||||
p1 = Path(str(tmpdir.join("p1")))
|
||||
p1.mkdir()
|
||||
p2 = Path(str(tmpdir.join('p2')))
|
||||
p2 = Path(str(tmpdir.join("p2")))
|
||||
p2.mkdir()
|
||||
d1.add_path(p1)
|
||||
d1.add_path(p2)
|
||||
d1.set_state(p1, DirectoryState.Reference)
|
||||
d1.set_state(p1['dir1'], DirectoryState.Excluded)
|
||||
tmpxml = str(tmpdir.join('directories_testunit.xml'))
|
||||
d1.set_state(p1["dir1"], DirectoryState.Excluded)
|
||||
tmpxml = str(tmpdir.join("directories_testunit.xml"))
|
||||
d1.save_to_file(tmpxml)
|
||||
d2.load_from_file(tmpxml)
|
||||
eq_(2, len(d2))
|
||||
eq_(DirectoryState.Reference, d2.get_state(p1))
|
||||
eq_(DirectoryState.Excluded, d2.get_state(p1['dir1']))
|
||||
eq_(DirectoryState.Excluded, d2.get_state(p1["dir1"]))
|
||||
|
||||
|
||||
def test_invalid_path():
|
||||
d = Directories()
|
||||
p = Path('does_not_exist')
|
||||
p = Path("does_not_exist")
|
||||
with raises(InvalidPathError):
|
||||
d.add_path(p)
|
||||
eq_(0, len(d))
|
||||
|
||||
|
||||
def test_set_state_on_invalid_path():
|
||||
d = Directories()
|
||||
try:
|
||||
d.set_state(Path('foobar',), DirectoryState.Normal)
|
||||
d.set_state(Path("foobar",), DirectoryState.Normal)
|
||||
except LookupError:
|
||||
assert False
|
||||
|
||||
|
||||
def test_load_from_file_with_invalid_path(tmpdir):
|
||||
#This test simulates a load from file resulting in a
|
||||
#InvalidPath raise. Other directories must be loaded.
|
||||
# This test simulates a load from file resulting in a
|
||||
# InvalidPath raise. Other directories must be loaded.
|
||||
d1 = Directories()
|
||||
d1.add_path(testpath['onefile'])
|
||||
#Will raise InvalidPath upon loading
|
||||
p = Path(str(tmpdir.join('toremove')))
|
||||
d1.add_path(testpath["onefile"])
|
||||
# Will raise InvalidPath upon loading
|
||||
p = Path(str(tmpdir.join("toremove")))
|
||||
p.mkdir()
|
||||
d1.add_path(p)
|
||||
p.rmdir()
|
||||
tmpxml = str(tmpdir.join('directories_testunit.xml'))
|
||||
tmpxml = str(tmpdir.join("directories_testunit.xml"))
|
||||
d1.save_to_file(tmpxml)
|
||||
d2 = Directories()
|
||||
d2.load_from_file(tmpxml)
|
||||
eq_(1, len(d2))
|
||||
|
||||
|
||||
def test_unicode_save(tmpdir):
|
||||
d = Directories()
|
||||
p1 = Path(str(tmpdir))['hello\xe9']
|
||||
p1 = Path(str(tmpdir))["hello\xe9"]
|
||||
p1.mkdir()
|
||||
p1['foo\xe9'].mkdir()
|
||||
p1["foo\xe9"].mkdir()
|
||||
d.add_path(p1)
|
||||
d.set_state(p1['foo\xe9'], DirectoryState.Excluded)
|
||||
tmpxml = str(tmpdir.join('directories_testunit.xml'))
|
||||
d.set_state(p1["foo\xe9"], DirectoryState.Excluded)
|
||||
tmpxml = str(tmpdir.join("directories_testunit.xml"))
|
||||
try:
|
||||
d.save_to_file(tmpxml)
|
||||
except UnicodeDecodeError:
|
||||
assert False
|
||||
|
||||
|
||||
def test_get_files_refreshes_its_directories():
|
||||
d = Directories()
|
||||
p = testpath['fs']
|
||||
p = testpath["fs"]
|
||||
d.add_path(p)
|
||||
files = d.get_files()
|
||||
eq_(6, len(list(files)))
|
||||
time.sleep(1)
|
||||
os.remove(str(p['dir1']['file1.test']))
|
||||
os.remove(str(p["dir1"]["file1.test"]))
|
||||
files = d.get_files()
|
||||
eq_(5, len(list(files)))
|
||||
|
||||
|
||||
def test_get_files_does_not_choke_on_non_existing_directories(tmpdir):
|
||||
d = Directories()
|
||||
p = Path(str(tmpdir))
|
||||
@@ -278,36 +307,37 @@ def test_get_files_does_not_choke_on_non_existing_directories(tmpdir):
|
||||
p.rmtree()
|
||||
eq_([], list(d.get_files()))
|
||||
|
||||
|
||||
def test_get_state_returns_excluded_by_default_for_hidden_directories(tmpdir):
|
||||
d = Directories()
|
||||
p = Path(str(tmpdir))
|
||||
hidden_dir_path = p['.foo']
|
||||
p['.foo'].mkdir()
|
||||
hidden_dir_path = p[".foo"]
|
||||
p[".foo"].mkdir()
|
||||
d.add_path(p)
|
||||
eq_(d.get_state(hidden_dir_path), DirectoryState.Excluded)
|
||||
# But it can be overriden
|
||||
d.set_state(hidden_dir_path, DirectoryState.Normal)
|
||||
eq_(d.get_state(hidden_dir_path), DirectoryState.Normal)
|
||||
|
||||
|
||||
def test_default_path_state_override(tmpdir):
|
||||
# It's possible for a subclass to override the default state of a path
|
||||
class MyDirectories(Directories):
|
||||
def _default_state_for_path(self, path):
|
||||
if 'foobar' in path:
|
||||
if "foobar" in path:
|
||||
return DirectoryState.Excluded
|
||||
|
||||
d = MyDirectories()
|
||||
p1 = Path(str(tmpdir))
|
||||
p1['foobar'].mkdir()
|
||||
p1['foobar/somefile'].open('w').close()
|
||||
p1['foobaz'].mkdir()
|
||||
p1['foobaz/somefile'].open('w').close()
|
||||
p1["foobar"].mkdir()
|
||||
p1["foobar/somefile"].open("w").close()
|
||||
p1["foobaz"].mkdir()
|
||||
p1["foobaz/somefile"].open("w").close()
|
||||
d.add_path(p1)
|
||||
eq_(d.get_state(p1['foobaz']), DirectoryState.Normal)
|
||||
eq_(d.get_state(p1['foobar']), DirectoryState.Excluded)
|
||||
eq_(len(list(d.get_files())), 1) # only the 'foobaz' file is there
|
||||
eq_(d.get_state(p1["foobaz"]), DirectoryState.Normal)
|
||||
eq_(d.get_state(p1["foobar"]), DirectoryState.Excluded)
|
||||
eq_(len(list(d.get_files())), 1) # only the 'foobaz' file is there
|
||||
# However, the default state can be changed
|
||||
d.set_state(p1['foobar'], DirectoryState.Normal)
|
||||
eq_(d.get_state(p1['foobar']), DirectoryState.Normal)
|
||||
d.set_state(p1["foobar"], DirectoryState.Normal)
|
||||
eq_(d.get_state(p1["foobar"]), DirectoryState.Normal)
|
||||
eq_(len(list(d.get_files())), 2)
|
||||
|
||||
|
||||
@@ -13,13 +13,28 @@ from hscommon.testutil import eq_, log_calls
|
||||
from .base import NamedObject
|
||||
from .. import engine
|
||||
from ..engine import (
|
||||
get_match, getwords, Group, getfields, unpack_fields, compare_fields, compare, WEIGHT_WORDS,
|
||||
MATCH_SIMILAR_WORDS, NO_FIELD_ORDER, build_word_dict, get_groups, getmatches, Match,
|
||||
getmatches_by_contents, merge_similar_words, reduce_common_words
|
||||
get_match,
|
||||
getwords,
|
||||
Group,
|
||||
getfields,
|
||||
unpack_fields,
|
||||
compare_fields,
|
||||
compare,
|
||||
WEIGHT_WORDS,
|
||||
MATCH_SIMILAR_WORDS,
|
||||
NO_FIELD_ORDER,
|
||||
build_word_dict,
|
||||
get_groups,
|
||||
getmatches,
|
||||
Match,
|
||||
getmatches_by_contents,
|
||||
merge_similar_words,
|
||||
reduce_common_words,
|
||||
)
|
||||
|
||||
no = NamedObject
|
||||
|
||||
|
||||
def get_match_triangle():
|
||||
o1 = NamedObject(with_words=True)
|
||||
o2 = NamedObject(with_words=True)
|
||||
@@ -29,6 +44,7 @@ def get_match_triangle():
|
||||
m3 = get_match(o2, o3)
|
||||
return [m1, m2, m3]
|
||||
|
||||
|
||||
def get_test_group():
|
||||
m1, m2, m3 = get_match_triangle()
|
||||
result = Group()
|
||||
@@ -37,6 +53,7 @@ def get_test_group():
|
||||
result.add_match(m3)
|
||||
return result
|
||||
|
||||
|
||||
def assert_match(m, name1, name2):
|
||||
# When testing matches, whether objects are in first or second position very often doesn't
|
||||
# matter. This function makes this test more convenient.
|
||||
@@ -46,53 +63,54 @@ def assert_match(m, name1, name2):
|
||||
eq_(m.first.name, name2)
|
||||
eq_(m.second.name, name1)
|
||||
|
||||
|
||||
class TestCasegetwords:
|
||||
def test_spaces(self):
|
||||
eq_(['a', 'b', 'c', 'd'], getwords("a b c d"))
|
||||
eq_(['a', 'b', 'c', 'd'], getwords(" a b c d "))
|
||||
eq_(["a", "b", "c", "d"], getwords("a b c d"))
|
||||
eq_(["a", "b", "c", "d"], getwords(" a b c d "))
|
||||
|
||||
def test_splitter_chars(self):
|
||||
eq_(
|
||||
[chr(i) for i in range(ord('a'), ord('z')+1)],
|
||||
getwords("a-b_c&d+e(f)g;h\\i[j]k{l}m:n.o,p<q>r/s?t~u!v@w#x$y*z")
|
||||
[chr(i) for i in range(ord("a"), ord("z") + 1)],
|
||||
getwords("a-b_c&d+e(f)g;h\\i[j]k{l}m:n.o,p<q>r/s?t~u!v@w#x$y*z"),
|
||||
)
|
||||
|
||||
def test_joiner_chars(self):
|
||||
eq_(["aec"], getwords("a'e\u0301c"))
|
||||
|
||||
def test_empty(self):
|
||||
eq_([], getwords(''))
|
||||
eq_([], getwords(""))
|
||||
|
||||
def test_returns_lowercase(self):
|
||||
eq_(['foo', 'bar'], getwords('FOO BAR'))
|
||||
eq_(["foo", "bar"], getwords("FOO BAR"))
|
||||
|
||||
def test_decompose_unicode(self):
|
||||
eq_(getwords('foo\xe9bar'), ['fooebar'])
|
||||
eq_(getwords("foo\xe9bar"), ["fooebar"])
|
||||
|
||||
|
||||
class TestCasegetfields:
|
||||
def test_simple(self):
|
||||
eq_([['a', 'b'], ['c', 'd', 'e']], getfields('a b - c d e'))
|
||||
eq_([["a", "b"], ["c", "d", "e"]], getfields("a b - c d e"))
|
||||
|
||||
def test_empty(self):
|
||||
eq_([], getfields(''))
|
||||
eq_([], getfields(""))
|
||||
|
||||
def test_cleans_empty_fields(self):
|
||||
expected = [['a', 'bc', 'def']]
|
||||
actual = getfields(' - a bc def')
|
||||
expected = [["a", "bc", "def"]]
|
||||
actual = getfields(" - a bc def")
|
||||
eq_(expected, actual)
|
||||
expected = [['bc', 'def']]
|
||||
expected = [["bc", "def"]]
|
||||
|
||||
|
||||
class TestCaseunpack_fields:
|
||||
def test_with_fields(self):
|
||||
expected = ['a', 'b', 'c', 'd', 'e', 'f']
|
||||
actual = unpack_fields([['a'], ['b', 'c'], ['d', 'e', 'f']])
|
||||
expected = ["a", "b", "c", "d", "e", "f"]
|
||||
actual = unpack_fields([["a"], ["b", "c"], ["d", "e", "f"]])
|
||||
eq_(expected, actual)
|
||||
|
||||
def test_without_fields(self):
|
||||
expected = ['a', 'b', 'c', 'd', 'e', 'f']
|
||||
actual = unpack_fields(['a', 'b', 'c', 'd', 'e', 'f'])
|
||||
expected = ["a", "b", "c", "d", "e", "f"]
|
||||
actual = unpack_fields(["a", "b", "c", "d", "e", "f"])
|
||||
eq_(expected, actual)
|
||||
|
||||
def test_empty(self):
|
||||
@@ -101,134 +119,151 @@ class TestCaseunpack_fields:
|
||||
|
||||
class TestCaseWordCompare:
|
||||
def test_list(self):
|
||||
eq_(100, compare(['a', 'b', 'c', 'd'], ['a', 'b', 'c', 'd']))
|
||||
eq_(86, compare(['a', 'b', 'c', 'd'], ['a', 'b', 'c']))
|
||||
eq_(100, compare(["a", "b", "c", "d"], ["a", "b", "c", "d"]))
|
||||
eq_(86, compare(["a", "b", "c", "d"], ["a", "b", "c"]))
|
||||
|
||||
def test_unordered(self):
|
||||
#Sometimes, users don't want fuzzy matching too much When they set the slider
|
||||
#to 100, they don't expect a filename with the same words, but not the same order, to match.
|
||||
#Thus, we want to return 99 in that case.
|
||||
eq_(99, compare(['a', 'b', 'c', 'd'], ['d', 'b', 'c', 'a']))
|
||||
# Sometimes, users don't want fuzzy matching too much When they set the slider
|
||||
# to 100, they don't expect a filename with the same words, but not the same order, to match.
|
||||
# Thus, we want to return 99 in that case.
|
||||
eq_(99, compare(["a", "b", "c", "d"], ["d", "b", "c", "a"]))
|
||||
|
||||
def test_word_occurs_twice(self):
|
||||
#if a word occurs twice in first, but once in second, we want the word to be only counted once
|
||||
eq_(89, compare(['a', 'b', 'c', 'd', 'a'], ['d', 'b', 'c', 'a']))
|
||||
# if a word occurs twice in first, but once in second, we want the word to be only counted once
|
||||
eq_(89, compare(["a", "b", "c", "d", "a"], ["d", "b", "c", "a"]))
|
||||
|
||||
def test_uses_copy_of_lists(self):
|
||||
first = ['foo', 'bar']
|
||||
second = ['bar', 'bleh']
|
||||
first = ["foo", "bar"]
|
||||
second = ["bar", "bleh"]
|
||||
compare(first, second)
|
||||
eq_(['foo', 'bar'], first)
|
||||
eq_(['bar', 'bleh'], second)
|
||||
eq_(["foo", "bar"], first)
|
||||
eq_(["bar", "bleh"], second)
|
||||
|
||||
def test_word_weight(self):
|
||||
eq_(int((6.0 / 13.0) * 100), compare(['foo', 'bar'], ['bar', 'bleh'], (WEIGHT_WORDS, )))
|
||||
eq_(
|
||||
int((6.0 / 13.0) * 100),
|
||||
compare(["foo", "bar"], ["bar", "bleh"], (WEIGHT_WORDS,)),
|
||||
)
|
||||
|
||||
def test_similar_words(self):
|
||||
eq_(100, compare(['the', 'white', 'stripes'], ['the', 'whites', 'stripe'], (MATCH_SIMILAR_WORDS, )))
|
||||
eq_(
|
||||
100,
|
||||
compare(
|
||||
["the", "white", "stripes"],
|
||||
["the", "whites", "stripe"],
|
||||
(MATCH_SIMILAR_WORDS,),
|
||||
),
|
||||
)
|
||||
|
||||
def test_empty(self):
|
||||
eq_(0, compare([], []))
|
||||
|
||||
def test_with_fields(self):
|
||||
eq_(67, compare([['a', 'b'], ['c', 'd', 'e']], [['a', 'b'], ['c', 'd', 'f']]))
|
||||
eq_(67, compare([["a", "b"], ["c", "d", "e"]], [["a", "b"], ["c", "d", "f"]]))
|
||||
|
||||
def test_propagate_flags_with_fields(self, monkeypatch):
|
||||
def mock_compare(first, second, flags):
|
||||
eq_((0, 1, 2, 3, 5), flags)
|
||||
|
||||
monkeypatch.setattr(engine, 'compare_fields', mock_compare)
|
||||
compare([['a']], [['a']], (0, 1, 2, 3, 5))
|
||||
monkeypatch.setattr(engine, "compare_fields", mock_compare)
|
||||
compare([["a"]], [["a"]], (0, 1, 2, 3, 5))
|
||||
|
||||
|
||||
class TestCaseWordCompareWithFields:
|
||||
def test_simple(self):
|
||||
eq_(67, compare_fields([['a', 'b'], ['c', 'd', 'e']], [['a', 'b'], ['c', 'd', 'f']]))
|
||||
eq_(
|
||||
67,
|
||||
compare_fields(
|
||||
[["a", "b"], ["c", "d", "e"]], [["a", "b"], ["c", "d", "f"]]
|
||||
),
|
||||
)
|
||||
|
||||
def test_empty(self):
|
||||
eq_(0, compare_fields([], []))
|
||||
|
||||
def test_different_length(self):
|
||||
eq_(0, compare_fields([['a'], ['b']], [['a'], ['b'], ['c']]))
|
||||
eq_(0, compare_fields([["a"], ["b"]], [["a"], ["b"], ["c"]]))
|
||||
|
||||
def test_propagates_flags(self, monkeypatch):
|
||||
def mock_compare(first, second, flags):
|
||||
eq_((0, 1, 2, 3, 5), flags)
|
||||
|
||||
monkeypatch.setattr(engine, 'compare_fields', mock_compare)
|
||||
compare_fields([['a']], [['a']], (0, 1, 2, 3, 5))
|
||||
monkeypatch.setattr(engine, "compare_fields", mock_compare)
|
||||
compare_fields([["a"]], [["a"]], (0, 1, 2, 3, 5))
|
||||
|
||||
def test_order(self):
|
||||
first = [['a', 'b'], ['c', 'd', 'e']]
|
||||
second = [['c', 'd', 'f'], ['a', 'b']]
|
||||
first = [["a", "b"], ["c", "d", "e"]]
|
||||
second = [["c", "d", "f"], ["a", "b"]]
|
||||
eq_(0, compare_fields(first, second))
|
||||
|
||||
def test_no_order(self):
|
||||
first = [['a', 'b'], ['c', 'd', 'e']]
|
||||
second = [['c', 'd', 'f'], ['a', 'b']]
|
||||
eq_(67, compare_fields(first, second, (NO_FIELD_ORDER, )))
|
||||
first = [['a', 'b'], ['a', 'b']] #a field can only be matched once.
|
||||
second = [['c', 'd', 'f'], ['a', 'b']]
|
||||
eq_(0, compare_fields(first, second, (NO_FIELD_ORDER, )))
|
||||
first = [['a', 'b'], ['a', 'b', 'c']]
|
||||
second = [['c', 'd', 'f'], ['a', 'b']]
|
||||
eq_(33, compare_fields(first, second, (NO_FIELD_ORDER, )))
|
||||
first = [["a", "b"], ["c", "d", "e"]]
|
||||
second = [["c", "d", "f"], ["a", "b"]]
|
||||
eq_(67, compare_fields(first, second, (NO_FIELD_ORDER,)))
|
||||
first = [["a", "b"], ["a", "b"]] # a field can only be matched once.
|
||||
second = [["c", "d", "f"], ["a", "b"]]
|
||||
eq_(0, compare_fields(first, second, (NO_FIELD_ORDER,)))
|
||||
first = [["a", "b"], ["a", "b", "c"]]
|
||||
second = [["c", "d", "f"], ["a", "b"]]
|
||||
eq_(33, compare_fields(first, second, (NO_FIELD_ORDER,)))
|
||||
|
||||
def test_compare_fields_without_order_doesnt_alter_fields(self):
|
||||
#The NO_ORDER comp type altered the fields!
|
||||
first = [['a', 'b'], ['c', 'd', 'e']]
|
||||
second = [['c', 'd', 'f'], ['a', 'b']]
|
||||
eq_(67, compare_fields(first, second, (NO_FIELD_ORDER, )))
|
||||
eq_([['a', 'b'], ['c', 'd', 'e']], first)
|
||||
eq_([['c', 'd', 'f'], ['a', 'b']], second)
|
||||
# The NO_ORDER comp type altered the fields!
|
||||
first = [["a", "b"], ["c", "d", "e"]]
|
||||
second = [["c", "d", "f"], ["a", "b"]]
|
||||
eq_(67, compare_fields(first, second, (NO_FIELD_ORDER,)))
|
||||
eq_([["a", "b"], ["c", "d", "e"]], first)
|
||||
eq_([["c", "d", "f"], ["a", "b"]], second)
|
||||
|
||||
|
||||
class TestCasebuild_word_dict:
|
||||
def test_with_standard_words(self):
|
||||
l = [NamedObject('foo bar', True)]
|
||||
l.append(NamedObject('bar baz', True))
|
||||
l.append(NamedObject('baz bleh foo', True))
|
||||
d = build_word_dict(l)
|
||||
itemList = [NamedObject("foo bar", True)]
|
||||
itemList.append(NamedObject("bar baz", True))
|
||||
itemList.append(NamedObject("baz bleh foo", True))
|
||||
d = build_word_dict(itemList)
|
||||
eq_(4, len(d))
|
||||
eq_(2, len(d['foo']))
|
||||
assert l[0] in d['foo']
|
||||
assert l[2] in d['foo']
|
||||
eq_(2, len(d['bar']))
|
||||
assert l[0] in d['bar']
|
||||
assert l[1] in d['bar']
|
||||
eq_(2, len(d['baz']))
|
||||
assert l[1] in d['baz']
|
||||
assert l[2] in d['baz']
|
||||
eq_(1, len(d['bleh']))
|
||||
assert l[2] in d['bleh']
|
||||
eq_(2, len(d["foo"]))
|
||||
assert itemList[0] in d["foo"]
|
||||
assert itemList[2] in d["foo"]
|
||||
eq_(2, len(d["bar"]))
|
||||
assert itemList[0] in d["bar"]
|
||||
assert itemList[1] in d["bar"]
|
||||
eq_(2, len(d["baz"]))
|
||||
assert itemList[1] in d["baz"]
|
||||
assert itemList[2] in d["baz"]
|
||||
eq_(1, len(d["bleh"]))
|
||||
assert itemList[2] in d["bleh"]
|
||||
|
||||
def test_unpack_fields(self):
|
||||
o = NamedObject('')
|
||||
o.words = [['foo', 'bar'], ['baz']]
|
||||
o = NamedObject("")
|
||||
o.words = [["foo", "bar"], ["baz"]]
|
||||
d = build_word_dict([o])
|
||||
eq_(3, len(d))
|
||||
eq_(1, len(d['foo']))
|
||||
eq_(1, len(d["foo"]))
|
||||
|
||||
def test_words_are_unaltered(self):
|
||||
o = NamedObject('')
|
||||
o.words = [['foo', 'bar'], ['baz']]
|
||||
o = NamedObject("")
|
||||
o.words = [["foo", "bar"], ["baz"]]
|
||||
build_word_dict([o])
|
||||
eq_([['foo', 'bar'], ['baz']], o.words)
|
||||
eq_([["foo", "bar"], ["baz"]], o.words)
|
||||
|
||||
def test_object_instances_can_only_be_once_in_words_object_list(self):
|
||||
o = NamedObject('foo foo', True)
|
||||
o = NamedObject("foo foo", True)
|
||||
d = build_word_dict([o])
|
||||
eq_(1, len(d['foo']))
|
||||
eq_(1, len(d["foo"]))
|
||||
|
||||
def test_job(self):
|
||||
def do_progress(p, d=''):
|
||||
def do_progress(p, d=""):
|
||||
self.log.append(p)
|
||||
return True
|
||||
|
||||
j = job.Job(1, do_progress)
|
||||
self.log = []
|
||||
s = "foo bar"
|
||||
build_word_dict([NamedObject(s, True), NamedObject(s, True), NamedObject(s, True)], j)
|
||||
build_word_dict(
|
||||
[NamedObject(s, True), NamedObject(s, True), NamedObject(s, True)], j
|
||||
)
|
||||
# We don't have intermediate log because iter_with_progress is called with every > 1
|
||||
eq_(0, self.log[0])
|
||||
eq_(100, self.log[1])
|
||||
@@ -237,51 +272,56 @@ class TestCasebuild_word_dict:
|
||||
class TestCasemerge_similar_words:
|
||||
def test_some_similar_words(self):
|
||||
d = {
|
||||
'foobar': set([1]),
|
||||
'foobar1': set([2]),
|
||||
'foobar2': set([3]),
|
||||
"foobar": set([1]),
|
||||
"foobar1": set([2]),
|
||||
"foobar2": set([3]),
|
||||
}
|
||||
merge_similar_words(d)
|
||||
eq_(1, len(d))
|
||||
eq_(3, len(d['foobar']))
|
||||
|
||||
eq_(3, len(d["foobar"]))
|
||||
|
||||
|
||||
class TestCasereduce_common_words:
|
||||
def test_typical(self):
|
||||
d = {
|
||||
'foo': set([NamedObject('foo bar', True) for i in range(50)]),
|
||||
'bar': set([NamedObject('foo bar', True) for i in range(49)])
|
||||
"foo": set([NamedObject("foo bar", True) for i in range(50)]),
|
||||
"bar": set([NamedObject("foo bar", True) for i in range(49)]),
|
||||
}
|
||||
reduce_common_words(d, 50)
|
||||
assert 'foo' not in d
|
||||
eq_(49, len(d['bar']))
|
||||
assert "foo" not in d
|
||||
eq_(49, len(d["bar"]))
|
||||
|
||||
def test_dont_remove_objects_with_only_common_words(self):
|
||||
d = {
|
||||
'common': set([NamedObject("common uncommon", True) for i in range(50)] + [NamedObject("common", True)]),
|
||||
'uncommon': set([NamedObject("common uncommon", True)])
|
||||
"common": set(
|
||||
[NamedObject("common uncommon", True) for i in range(50)]
|
||||
+ [NamedObject("common", True)]
|
||||
),
|
||||
"uncommon": set([NamedObject("common uncommon", True)]),
|
||||
}
|
||||
reduce_common_words(d, 50)
|
||||
eq_(1, len(d['common']))
|
||||
eq_(1, len(d['uncommon']))
|
||||
eq_(1, len(d["common"]))
|
||||
eq_(1, len(d["uncommon"]))
|
||||
|
||||
def test_values_still_are_set_instances(self):
|
||||
d = {
|
||||
'common': set([NamedObject("common uncommon", True) for i in range(50)] + [NamedObject("common", True)]),
|
||||
'uncommon': set([NamedObject("common uncommon", True)])
|
||||
"common": set(
|
||||
[NamedObject("common uncommon", True) for i in range(50)]
|
||||
+ [NamedObject("common", True)]
|
||||
),
|
||||
"uncommon": set([NamedObject("common uncommon", True)]),
|
||||
}
|
||||
reduce_common_words(d, 50)
|
||||
assert isinstance(d['common'], set)
|
||||
assert isinstance(d['uncommon'], set)
|
||||
assert isinstance(d["common"], set)
|
||||
assert isinstance(d["uncommon"], set)
|
||||
|
||||
def test_dont_raise_KeyError_when_a_word_has_been_removed(self):
|
||||
#If a word has been removed by the reduce, an object in a subsequent common word that
|
||||
#contains the word that has been removed would cause a KeyError.
|
||||
# If a word has been removed by the reduce, an object in a subsequent common word that
|
||||
# contains the word that has been removed would cause a KeyError.
|
||||
d = {
|
||||
'foo': set([NamedObject('foo bar baz', True) for i in range(50)]),
|
||||
'bar': set([NamedObject('foo bar baz', True) for i in range(50)]),
|
||||
'baz': set([NamedObject('foo bar baz', True) for i in range(49)])
|
||||
"foo": set([NamedObject("foo bar baz", True) for i in range(50)]),
|
||||
"bar": set([NamedObject("foo bar baz", True) for i in range(50)]),
|
||||
"baz": set([NamedObject("foo bar baz", True) for i in range(49)]),
|
||||
}
|
||||
try:
|
||||
reduce_common_words(d, 50)
|
||||
@@ -289,35 +329,37 @@ class TestCasereduce_common_words:
|
||||
self.fail()
|
||||
|
||||
def test_unpack_fields(self):
|
||||
#object.words may be fields.
|
||||
# object.words may be fields.
|
||||
def create_it():
|
||||
o = NamedObject('')
|
||||
o.words = [['foo', 'bar'], ['baz']]
|
||||
o = NamedObject("")
|
||||
o.words = [["foo", "bar"], ["baz"]]
|
||||
return o
|
||||
|
||||
d = {
|
||||
'foo': set([create_it() for i in range(50)])
|
||||
}
|
||||
d = {"foo": set([create_it() for i in range(50)])}
|
||||
try:
|
||||
reduce_common_words(d, 50)
|
||||
except TypeError:
|
||||
self.fail("must support fields.")
|
||||
|
||||
def test_consider_a_reduced_common_word_common_even_after_reduction(self):
|
||||
#There was a bug in the code that causeda word that has already been reduced not to
|
||||
#be counted as a common word for subsequent words. For example, if 'foo' is processed
|
||||
#as a common word, keeping a "foo bar" file in it, and the 'bar' is processed, "foo bar"
|
||||
#would not stay in 'bar' because 'foo' is not a common word anymore.
|
||||
only_common = NamedObject('foo bar', True)
|
||||
# There was a bug in the code that causeda word that has already been reduced not to
|
||||
# be counted as a common word for subsequent words. For example, if 'foo' is processed
|
||||
# as a common word, keeping a "foo bar" file in it, and the 'bar' is processed, "foo bar"
|
||||
# would not stay in 'bar' because 'foo' is not a common word anymore.
|
||||
only_common = NamedObject("foo bar", True)
|
||||
d = {
|
||||
'foo': set([NamedObject('foo bar baz', True) for i in range(49)] + [only_common]),
|
||||
'bar': set([NamedObject('foo bar baz', True) for i in range(49)] + [only_common]),
|
||||
'baz': set([NamedObject('foo bar baz', True) for i in range(49)])
|
||||
"foo": set(
|
||||
[NamedObject("foo bar baz", True) for i in range(49)] + [only_common]
|
||||
),
|
||||
"bar": set(
|
||||
[NamedObject("foo bar baz", True) for i in range(49)] + [only_common]
|
||||
),
|
||||
"baz": set([NamedObject("foo bar baz", True) for i in range(49)]),
|
||||
}
|
||||
reduce_common_words(d, 50)
|
||||
eq_(1, len(d['foo']))
|
||||
eq_(1, len(d['bar']))
|
||||
eq_(49, len(d['baz']))
|
||||
eq_(1, len(d["foo"]))
|
||||
eq_(1, len(d["bar"]))
|
||||
eq_(49, len(d["baz"]))
|
||||
|
||||
|
||||
class TestCaseget_match:
|
||||
@@ -326,8 +368,8 @@ class TestCaseget_match:
|
||||
o2 = NamedObject("bar bleh", True)
|
||||
m = get_match(o1, o2)
|
||||
eq_(50, m.percentage)
|
||||
eq_(['foo', 'bar'], m.first.words)
|
||||
eq_(['bar', 'bleh'], m.second.words)
|
||||
eq_(["foo", "bar"], m.first.words)
|
||||
eq_(["bar", "bleh"], m.second.words)
|
||||
assert m.first is o1
|
||||
assert m.second is o2
|
||||
|
||||
@@ -340,7 +382,9 @@ class TestCaseget_match:
|
||||
assert object() not in m
|
||||
|
||||
def test_word_weight(self):
|
||||
m = get_match(NamedObject("foo bar", True), NamedObject("bar bleh", True), (WEIGHT_WORDS, ))
|
||||
m = get_match(
|
||||
NamedObject("foo bar", True), NamedObject("bar bleh", True), (WEIGHT_WORDS,)
|
||||
)
|
||||
eq_(m.percentage, int((6.0 / 13.0) * 100))
|
||||
|
||||
|
||||
@@ -349,54 +393,59 @@ class TestCaseGetMatches:
|
||||
eq_(getmatches([]), [])
|
||||
|
||||
def test_simple(self):
|
||||
l = [NamedObject("foo bar"), NamedObject("bar bleh"), NamedObject("a b c foo")]
|
||||
r = getmatches(l)
|
||||
itemList = [NamedObject("foo bar"), NamedObject("bar bleh"), NamedObject("a b c foo")]
|
||||
r = getmatches(itemList)
|
||||
eq_(2, len(r))
|
||||
m = first(m for m in r if m.percentage == 50) #"foo bar" and "bar bleh"
|
||||
assert_match(m, 'foo bar', 'bar bleh')
|
||||
m = first(m for m in r if m.percentage == 33) #"foo bar" and "a b c foo"
|
||||
assert_match(m, 'foo bar', 'a b c foo')
|
||||
m = first(m for m in r if m.percentage == 50) # "foo bar" and "bar bleh"
|
||||
assert_match(m, "foo bar", "bar bleh")
|
||||
m = first(m for m in r if m.percentage == 33) # "foo bar" and "a b c foo"
|
||||
assert_match(m, "foo bar", "a b c foo")
|
||||
|
||||
def test_null_and_unrelated_objects(self):
|
||||
l = [NamedObject("foo bar"), NamedObject("bar bleh"), NamedObject(""), NamedObject("unrelated object")]
|
||||
r = getmatches(l)
|
||||
itemList = [
|
||||
NamedObject("foo bar"),
|
||||
NamedObject("bar bleh"),
|
||||
NamedObject(""),
|
||||
NamedObject("unrelated object"),
|
||||
]
|
||||
r = getmatches(itemList)
|
||||
eq_(len(r), 1)
|
||||
m = r[0]
|
||||
eq_(m.percentage, 50)
|
||||
assert_match(m, 'foo bar', 'bar bleh')
|
||||
assert_match(m, "foo bar", "bar bleh")
|
||||
|
||||
def test_twice_the_same_word(self):
|
||||
l = [NamedObject("foo foo bar"), NamedObject("bar bleh")]
|
||||
r = getmatches(l)
|
||||
itemList = [NamedObject("foo foo bar"), NamedObject("bar bleh")]
|
||||
r = getmatches(itemList)
|
||||
eq_(1, len(r))
|
||||
|
||||
def test_twice_the_same_word_when_preworded(self):
|
||||
l = [NamedObject("foo foo bar", True), NamedObject("bar bleh", True)]
|
||||
r = getmatches(l)
|
||||
itemList = [NamedObject("foo foo bar", True), NamedObject("bar bleh", True)]
|
||||
r = getmatches(itemList)
|
||||
eq_(1, len(r))
|
||||
|
||||
def test_two_words_match(self):
|
||||
l = [NamedObject("foo bar"), NamedObject("foo bar bleh")]
|
||||
r = getmatches(l)
|
||||
itemList = [NamedObject("foo bar"), NamedObject("foo bar bleh")]
|
||||
r = getmatches(itemList)
|
||||
eq_(1, len(r))
|
||||
|
||||
def test_match_files_with_only_common_words(self):
|
||||
#If a word occurs more than 50 times, it is excluded from the matching process
|
||||
#The problem with the common_word_threshold is that the files containing only common
|
||||
#words will never be matched together. We *should* match them.
|
||||
# If a word occurs more than 50 times, it is excluded from the matching process
|
||||
# The problem with the common_word_threshold is that the files containing only common
|
||||
# words will never be matched together. We *should* match them.
|
||||
# This test assumes that the common word threashold const is 50
|
||||
l = [NamedObject("foo") for i in range(50)]
|
||||
r = getmatches(l)
|
||||
itemList = [NamedObject("foo") for i in range(50)]
|
||||
r = getmatches(itemList)
|
||||
eq_(1225, len(r))
|
||||
|
||||
def test_use_words_already_there_if_there(self):
|
||||
o1 = NamedObject('foo')
|
||||
o2 = NamedObject('bar')
|
||||
o2.words = ['foo']
|
||||
o1 = NamedObject("foo")
|
||||
o2 = NamedObject("bar")
|
||||
o2.words = ["foo"]
|
||||
eq_(1, len(getmatches([o1, o2])))
|
||||
|
||||
def test_job(self):
|
||||
def do_progress(p, d=''):
|
||||
def do_progress(p, d=""):
|
||||
self.log.append(p)
|
||||
return True
|
||||
|
||||
@@ -409,28 +458,28 @@ class TestCaseGetMatches:
|
||||
eq_(100, self.log[-1])
|
||||
|
||||
def test_weight_words(self):
|
||||
l = [NamedObject("foo bar"), NamedObject("bar bleh")]
|
||||
m = getmatches(l, weight_words=True)[0]
|
||||
itemList = [NamedObject("foo bar"), NamedObject("bar bleh")]
|
||||
m = getmatches(itemList, weight_words=True)[0]
|
||||
eq_(int((6.0 / 13.0) * 100), m.percentage)
|
||||
|
||||
def test_similar_word(self):
|
||||
l = [NamedObject("foobar"), NamedObject("foobars")]
|
||||
eq_(len(getmatches(l, match_similar_words=True)), 1)
|
||||
eq_(getmatches(l, match_similar_words=True)[0].percentage, 100)
|
||||
l = [NamedObject("foobar"), NamedObject("foo")]
|
||||
eq_(len(getmatches(l, match_similar_words=True)), 0) #too far
|
||||
l = [NamedObject("bizkit"), NamedObject("bizket")]
|
||||
eq_(len(getmatches(l, match_similar_words=True)), 1)
|
||||
l = [NamedObject("foobar"), NamedObject("foosbar")]
|
||||
eq_(len(getmatches(l, match_similar_words=True)), 1)
|
||||
itemList = [NamedObject("foobar"), NamedObject("foobars")]
|
||||
eq_(len(getmatches(itemList, match_similar_words=True)), 1)
|
||||
eq_(getmatches(itemList, match_similar_words=True)[0].percentage, 100)
|
||||
itemList = [NamedObject("foobar"), NamedObject("foo")]
|
||||
eq_(len(getmatches(itemList, match_similar_words=True)), 0) # too far
|
||||
itemList = [NamedObject("bizkit"), NamedObject("bizket")]
|
||||
eq_(len(getmatches(itemList, match_similar_words=True)), 1)
|
||||
itemList = [NamedObject("foobar"), NamedObject("foosbar")]
|
||||
eq_(len(getmatches(itemList, match_similar_words=True)), 1)
|
||||
|
||||
def test_single_object_with_similar_words(self):
|
||||
l = [NamedObject("foo foos")]
|
||||
eq_(len(getmatches(l, match_similar_words=True)), 0)
|
||||
itemList = [NamedObject("foo foos")]
|
||||
eq_(len(getmatches(itemList, match_similar_words=True)), 0)
|
||||
|
||||
def test_double_words_get_counted_only_once(self):
|
||||
l = [NamedObject("foo bar foo bleh"), NamedObject("foo bar bleh bar")]
|
||||
m = getmatches(l)[0]
|
||||
itemList = [NamedObject("foo bar foo bleh"), NamedObject("foo bar bleh bar")]
|
||||
m = getmatches(itemList)[0]
|
||||
eq_(75, m.percentage)
|
||||
|
||||
def test_with_fields(self):
|
||||
@@ -450,13 +499,13 @@ class TestCaseGetMatches:
|
||||
eq_(m.percentage, 50)
|
||||
|
||||
def test_only_match_similar_when_the_option_is_set(self):
|
||||
l = [NamedObject("foobar"), NamedObject("foobars")]
|
||||
eq_(len(getmatches(l, match_similar_words=False)), 0)
|
||||
itemList = [NamedObject("foobar"), NamedObject("foobars")]
|
||||
eq_(len(getmatches(itemList, match_similar_words=False)), 0)
|
||||
|
||||
def test_dont_recurse_do_match(self):
|
||||
# with nosetests, the stack is increased. The number has to be high enough not to be failing falsely
|
||||
sys.setrecursionlimit(200)
|
||||
files = [NamedObject('foo bar') for i in range(201)]
|
||||
files = [NamedObject("foo bar") for i in range(201)]
|
||||
try:
|
||||
getmatches(files)
|
||||
except RuntimeError:
|
||||
@@ -465,9 +514,9 @@ class TestCaseGetMatches:
|
||||
sys.setrecursionlimit(1000)
|
||||
|
||||
def test_min_match_percentage(self):
|
||||
l = [NamedObject("foo bar"), NamedObject("bar bleh"), NamedObject("a b c foo")]
|
||||
r = getmatches(l, min_match_percentage=50)
|
||||
eq_(1, len(r)) #Only "foo bar" / "bar bleh" should match
|
||||
itemList = [NamedObject("foo bar"), NamedObject("bar bleh"), NamedObject("a b c foo")]
|
||||
r = getmatches(itemList, min_match_percentage=50)
|
||||
eq_(1, len(r)) # Only "foo bar" / "bar bleh" should match
|
||||
|
||||
def test_MemoryError(self, monkeypatch):
|
||||
@log_calls
|
||||
@@ -476,12 +525,12 @@ class TestCaseGetMatches:
|
||||
raise MemoryError()
|
||||
return Match(first, second, 0)
|
||||
|
||||
objects = [NamedObject() for i in range(10)] # results in 45 matches
|
||||
monkeypatch.setattr(engine, 'get_match', mocked_match)
|
||||
objects = [NamedObject() for i in range(10)] # results in 45 matches
|
||||
monkeypatch.setattr(engine, "get_match", mocked_match)
|
||||
try:
|
||||
r = getmatches(objects)
|
||||
except MemoryError:
|
||||
self.fail('MemorryError must be handled')
|
||||
self.fail("MemorryError must be handled")
|
||||
eq_(42, len(r))
|
||||
|
||||
|
||||
@@ -599,7 +648,7 @@ class TestCaseGroup:
|
||||
eq_([o1], g.dupes)
|
||||
g.switch_ref(o2)
|
||||
assert o2 is g.ref
|
||||
g.switch_ref(NamedObject('', True))
|
||||
g.switch_ref(NamedObject("", True))
|
||||
assert o2 is g.ref
|
||||
|
||||
def test_switch_ref_from_ref_dir(self):
|
||||
@@ -620,11 +669,11 @@ class TestCaseGroup:
|
||||
m = g.get_match_of(o)
|
||||
assert g.ref in m
|
||||
assert o in m
|
||||
assert g.get_match_of(NamedObject('', True)) is None
|
||||
assert g.get_match_of(NamedObject("", True)) is None
|
||||
assert g.get_match_of(g.ref) is None
|
||||
|
||||
def test_percentage(self):
|
||||
#percentage should return the avg percentage in relation to the ref
|
||||
# percentage should return the avg percentage in relation to the ref
|
||||
m1, m2, m3 = get_match_triangle()
|
||||
m1 = Match(m1[0], m1[1], 100)
|
||||
m2 = Match(m2[0], m2[1], 50)
|
||||
@@ -651,9 +700,9 @@ class TestCaseGroup:
|
||||
o1 = m1.first
|
||||
o2 = m1.second
|
||||
o3 = m2.second
|
||||
o1.name = 'c'
|
||||
o2.name = 'b'
|
||||
o3.name = 'a'
|
||||
o1.name = "c"
|
||||
o2.name = "b"
|
||||
o3.name = "a"
|
||||
g = Group()
|
||||
g.add_match(m1)
|
||||
g.add_match(m2)
|
||||
@@ -709,9 +758,9 @@ class TestCaseGroup:
|
||||
def test_prioritize_nothing_changes(self):
|
||||
# prioritize() returns False when nothing changes in the group.
|
||||
g = get_test_group()
|
||||
g[0].name = 'a'
|
||||
g[1].name = 'b'
|
||||
g[2].name = 'c'
|
||||
g[0].name = "a"
|
||||
g[1].name = "b"
|
||||
g[2].name = "c"
|
||||
assert not g.prioritize(lambda x: x.name)
|
||||
|
||||
def test_list_like(self):
|
||||
@@ -723,7 +772,11 @@ class TestCaseGroup:
|
||||
|
||||
def test_discard_matches(self):
|
||||
g = Group()
|
||||
o1, o2, o3 = (NamedObject("foo", True), NamedObject("bar", True), NamedObject("baz", True))
|
||||
o1, o2, o3 = (
|
||||
NamedObject("foo", True),
|
||||
NamedObject("bar", True),
|
||||
NamedObject("baz", True),
|
||||
)
|
||||
g.add_match(get_match(o1, o2))
|
||||
g.add_match(get_match(o1, o3))
|
||||
g.discard_matches()
|
||||
@@ -737,8 +790,8 @@ class TestCaseget_groups:
|
||||
eq_([], r)
|
||||
|
||||
def test_simple(self):
|
||||
l = [NamedObject("foo bar"), NamedObject("bar bleh")]
|
||||
matches = getmatches(l)
|
||||
itemList = [NamedObject("foo bar"), NamedObject("bar bleh")]
|
||||
matches = getmatches(itemList)
|
||||
m = matches[0]
|
||||
r = get_groups(matches)
|
||||
eq_(1, len(r))
|
||||
@@ -747,28 +800,39 @@ class TestCaseget_groups:
|
||||
eq_([m.second], g.dupes)
|
||||
|
||||
def test_group_with_multiple_matches(self):
|
||||
#This results in 3 matches
|
||||
l = [NamedObject("foo"), NamedObject("foo"), NamedObject("foo")]
|
||||
matches = getmatches(l)
|
||||
# This results in 3 matches
|
||||
itemList = [NamedObject("foo"), NamedObject("foo"), NamedObject("foo")]
|
||||
matches = getmatches(itemList)
|
||||
r = get_groups(matches)
|
||||
eq_(1, len(r))
|
||||
g = r[0]
|
||||
eq_(3, len(g))
|
||||
|
||||
def test_must_choose_a_group(self):
|
||||
l = [NamedObject("a b"), NamedObject("a b"), NamedObject("b c"), NamedObject("c d"), NamedObject("c d")]
|
||||
#There will be 2 groups here: group "a b" and group "c d"
|
||||
#"b c" can go either of them, but not both.
|
||||
matches = getmatches(l)
|
||||
itemList = [
|
||||
NamedObject("a b"),
|
||||
NamedObject("a b"),
|
||||
NamedObject("b c"),
|
||||
NamedObject("c d"),
|
||||
NamedObject("c d"),
|
||||
]
|
||||
# There will be 2 groups here: group "a b" and group "c d"
|
||||
# "b c" can go either of them, but not both.
|
||||
matches = getmatches(itemList)
|
||||
r = get_groups(matches)
|
||||
eq_(2, len(r))
|
||||
eq_(5, len(r[0])+len(r[1]))
|
||||
eq_(5, len(r[0]) + len(r[1]))
|
||||
|
||||
def test_should_all_go_in_the_same_group(self):
|
||||
l = [NamedObject("a b"), NamedObject("a b"), NamedObject("a b"), NamedObject("a b")]
|
||||
#There will be 2 groups here: group "a b" and group "c d"
|
||||
#"b c" can fit in both, but it must be in only one of them
|
||||
matches = getmatches(l)
|
||||
itemList = [
|
||||
NamedObject("a b"),
|
||||
NamedObject("a b"),
|
||||
NamedObject("a b"),
|
||||
NamedObject("a b"),
|
||||
]
|
||||
# There will be 2 groups here: group "a b" and group "c d"
|
||||
# "b c" can fit in both, but it must be in only one of them
|
||||
matches = getmatches(itemList)
|
||||
r = get_groups(matches)
|
||||
eq_(1, len(r))
|
||||
|
||||
@@ -787,8 +851,8 @@ class TestCaseget_groups:
|
||||
assert o3 in g
|
||||
|
||||
def test_four_sized_group(self):
|
||||
l = [NamedObject("foobar") for i in range(4)]
|
||||
m = getmatches(l)
|
||||
itemList = [NamedObject("foobar") for i in range(4)]
|
||||
m = getmatches(itemList)
|
||||
r = get_groups(m)
|
||||
eq_(1, len(r))
|
||||
eq_(4, len(r[0]))
|
||||
@@ -808,10 +872,12 @@ class TestCaseget_groups:
|
||||
# (A, B) match is the highest (thus resulting in an (A, B) group), still match C and D
|
||||
# in a separate group instead of discarding them.
|
||||
A, B, C, D = [NamedObject() for _ in range(4)]
|
||||
m1 = Match(A, B, 90) # This is the strongest "A" match
|
||||
m2 = Match(A, C, 80) # Because C doesn't match with B, it won't be in the group
|
||||
m3 = Match(A, D, 80) # Same thing for D
|
||||
m4 = Match(C, D, 70) # However, because C and D match, they should have their own group.
|
||||
m1 = Match(A, B, 90) # This is the strongest "A" match
|
||||
m2 = Match(A, C, 80) # Because C doesn't match with B, it won't be in the group
|
||||
m3 = Match(A, D, 80) # Same thing for D
|
||||
m4 = Match(
|
||||
C, D, 70
|
||||
) # However, because C and D match, they should have their own group.
|
||||
groups = get_groups([m1, m2, m3, m4])
|
||||
eq_(len(groups), 2)
|
||||
g1, g2 = groups
|
||||
@@ -819,4 +885,3 @@ class TestCaseget_groups:
|
||||
assert B in g1
|
||||
assert C in g2
|
||||
assert D in g2
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2009-10-23
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
import hashlib
|
||||
@@ -14,32 +14,35 @@ from core.tests.directories_test import create_fake_fs
|
||||
|
||||
from .. import fs
|
||||
|
||||
|
||||
def test_size_aggregates_subfiles(tmpdir):
|
||||
p = create_fake_fs(Path(str(tmpdir)))
|
||||
b = fs.Folder(p)
|
||||
eq_(b.size, 12)
|
||||
|
||||
|
||||
def test_md5_aggregate_subfiles_sorted(tmpdir):
|
||||
#dir.allfiles can return child in any order. Thus, bundle.md5 must aggregate
|
||||
#all files' md5 it contains, but it must make sure that it does so in the
|
||||
#same order everytime.
|
||||
# dir.allfiles can return child in any order. Thus, bundle.md5 must aggregate
|
||||
# all files' md5 it contains, but it must make sure that it does so in the
|
||||
# same order everytime.
|
||||
p = create_fake_fs(Path(str(tmpdir)))
|
||||
b = fs.Folder(p)
|
||||
md51 = fs.File(p['dir1']['file1.test']).md5
|
||||
md52 = fs.File(p['dir2']['file2.test']).md5
|
||||
md53 = fs.File(p['dir3']['file3.test']).md5
|
||||
md54 = fs.File(p['file1.test']).md5
|
||||
md55 = fs.File(p['file2.test']).md5
|
||||
md56 = fs.File(p['file3.test']).md5
|
||||
md51 = fs.File(p["dir1"]["file1.test"]).md5
|
||||
md52 = fs.File(p["dir2"]["file2.test"]).md5
|
||||
md53 = fs.File(p["dir3"]["file3.test"]).md5
|
||||
md54 = fs.File(p["file1.test"]).md5
|
||||
md55 = fs.File(p["file2.test"]).md5
|
||||
md56 = fs.File(p["file3.test"]).md5
|
||||
# The expected md5 is the md5 of md5s for folders and the direct md5 for files
|
||||
folder_md51 = hashlib.md5(md51).digest()
|
||||
folder_md52 = hashlib.md5(md52).digest()
|
||||
folder_md53 = hashlib.md5(md53).digest()
|
||||
md5 = hashlib.md5(folder_md51+folder_md52+folder_md53+md54+md55+md56)
|
||||
md5 = hashlib.md5(folder_md51 + folder_md52 + folder_md53 + md54 + md55 + md56)
|
||||
eq_(b.md5, md5.digest())
|
||||
|
||||
|
||||
def test_has_file_attrs(tmpdir):
|
||||
#a Folder must behave like a file, so it must have mtime attributes
|
||||
# a Folder must behave like a file, so it must have mtime attributes
|
||||
b = fs.Folder(Path(str(tmpdir)))
|
||||
assert b.mtime > 0
|
||||
eq_(b.extension, '')
|
||||
eq_(b.extension, "")
|
||||
|
||||
@@ -12,152 +12,172 @@ from hscommon.testutil import eq_
|
||||
|
||||
from ..ignore import IgnoreList
|
||||
|
||||
|
||||
def test_empty():
|
||||
il = IgnoreList()
|
||||
eq_(0, len(il))
|
||||
assert not il.AreIgnored('foo', 'bar')
|
||||
assert not il.AreIgnored("foo", "bar")
|
||||
|
||||
|
||||
def test_simple():
|
||||
il = IgnoreList()
|
||||
il.Ignore('foo', 'bar')
|
||||
assert il.AreIgnored('foo', 'bar')
|
||||
assert il.AreIgnored('bar', 'foo')
|
||||
assert not il.AreIgnored('foo', 'bleh')
|
||||
assert not il.AreIgnored('bleh', 'bar')
|
||||
il.Ignore("foo", "bar")
|
||||
assert il.AreIgnored("foo", "bar")
|
||||
assert il.AreIgnored("bar", "foo")
|
||||
assert not il.AreIgnored("foo", "bleh")
|
||||
assert not il.AreIgnored("bleh", "bar")
|
||||
eq_(1, len(il))
|
||||
|
||||
|
||||
def test_multiple():
|
||||
il = IgnoreList()
|
||||
il.Ignore('foo', 'bar')
|
||||
il.Ignore('foo', 'bleh')
|
||||
il.Ignore('bleh', 'bar')
|
||||
il.Ignore('aybabtu', 'bleh')
|
||||
assert il.AreIgnored('foo', 'bar')
|
||||
assert il.AreIgnored('bar', 'foo')
|
||||
assert il.AreIgnored('foo', 'bleh')
|
||||
assert il.AreIgnored('bleh', 'bar')
|
||||
assert not il.AreIgnored('aybabtu', 'bar')
|
||||
il.Ignore("foo", "bar")
|
||||
il.Ignore("foo", "bleh")
|
||||
il.Ignore("bleh", "bar")
|
||||
il.Ignore("aybabtu", "bleh")
|
||||
assert il.AreIgnored("foo", "bar")
|
||||
assert il.AreIgnored("bar", "foo")
|
||||
assert il.AreIgnored("foo", "bleh")
|
||||
assert il.AreIgnored("bleh", "bar")
|
||||
assert not il.AreIgnored("aybabtu", "bar")
|
||||
eq_(4, len(il))
|
||||
|
||||
|
||||
def test_clear():
|
||||
il = IgnoreList()
|
||||
il.Ignore('foo', 'bar')
|
||||
il.Ignore("foo", "bar")
|
||||
il.Clear()
|
||||
assert not il.AreIgnored('foo', 'bar')
|
||||
assert not il.AreIgnored('bar', 'foo')
|
||||
assert not il.AreIgnored("foo", "bar")
|
||||
assert not il.AreIgnored("bar", "foo")
|
||||
eq_(0, len(il))
|
||||
|
||||
|
||||
def test_add_same_twice():
|
||||
il = IgnoreList()
|
||||
il.Ignore('foo', 'bar')
|
||||
il.Ignore('bar', 'foo')
|
||||
il.Ignore("foo", "bar")
|
||||
il.Ignore("bar", "foo")
|
||||
eq_(1, len(il))
|
||||
|
||||
|
||||
def test_save_to_xml():
|
||||
il = IgnoreList()
|
||||
il.Ignore('foo', 'bar')
|
||||
il.Ignore('foo', 'bleh')
|
||||
il.Ignore('bleh', 'bar')
|
||||
il.Ignore("foo", "bar")
|
||||
il.Ignore("foo", "bleh")
|
||||
il.Ignore("bleh", "bar")
|
||||
f = io.BytesIO()
|
||||
il.save_to_xml(f)
|
||||
f.seek(0)
|
||||
doc = ET.parse(f)
|
||||
root = doc.getroot()
|
||||
eq_(root.tag, 'ignore_list')
|
||||
eq_(root.tag, "ignore_list")
|
||||
eq_(len(root), 2)
|
||||
eq_(len([c for c in root if c.tag == 'file']), 2)
|
||||
eq_(len([c for c in root if c.tag == "file"]), 2)
|
||||
f1, f2 = root[:]
|
||||
subchildren = [c for c in f1 if c.tag == 'file'] + [c for c in f2 if c.tag == 'file']
|
||||
subchildren = [c for c in f1 if c.tag == "file"] + [
|
||||
c for c in f2 if c.tag == "file"
|
||||
]
|
||||
eq_(len(subchildren), 3)
|
||||
|
||||
|
||||
def test_SaveThenLoad():
|
||||
il = IgnoreList()
|
||||
il.Ignore('foo', 'bar')
|
||||
il.Ignore('foo', 'bleh')
|
||||
il.Ignore('bleh', 'bar')
|
||||
il.Ignore('\u00e9', 'bar')
|
||||
il.Ignore("foo", "bar")
|
||||
il.Ignore("foo", "bleh")
|
||||
il.Ignore("bleh", "bar")
|
||||
il.Ignore("\u00e9", "bar")
|
||||
f = io.BytesIO()
|
||||
il.save_to_xml(f)
|
||||
f.seek(0)
|
||||
il = IgnoreList()
|
||||
il.load_from_xml(f)
|
||||
eq_(4, len(il))
|
||||
assert il.AreIgnored('\u00e9', 'bar')
|
||||
assert il.AreIgnored("\u00e9", "bar")
|
||||
|
||||
|
||||
def test_LoadXML_with_empty_file_tags():
|
||||
f = io.BytesIO()
|
||||
f.write(b'<?xml version="1.0" encoding="utf-8"?><ignore_list><file><file/></file></ignore_list>')
|
||||
f.write(
|
||||
b'<?xml version="1.0" encoding="utf-8"?><ignore_list><file><file/></file></ignore_list>'
|
||||
)
|
||||
f.seek(0)
|
||||
il = IgnoreList()
|
||||
il.load_from_xml(f)
|
||||
eq_(0, len(il))
|
||||
|
||||
|
||||
def test_AreIgnore_works_when_a_child_is_a_key_somewhere_else():
|
||||
il = IgnoreList()
|
||||
il.Ignore('foo', 'bar')
|
||||
il.Ignore('bar', 'baz')
|
||||
assert il.AreIgnored('bar', 'foo')
|
||||
il.Ignore("foo", "bar")
|
||||
il.Ignore("bar", "baz")
|
||||
assert il.AreIgnored("bar", "foo")
|
||||
|
||||
|
||||
def test_no_dupes_when_a_child_is_a_key_somewhere_else():
|
||||
il = IgnoreList()
|
||||
il.Ignore('foo', 'bar')
|
||||
il.Ignore('bar', 'baz')
|
||||
il.Ignore('bar', 'foo')
|
||||
il.Ignore("foo", "bar")
|
||||
il.Ignore("bar", "baz")
|
||||
il.Ignore("bar", "foo")
|
||||
eq_(2, len(il))
|
||||
|
||||
|
||||
def test_iterate():
|
||||
#It must be possible to iterate through ignore list
|
||||
# It must be possible to iterate through ignore list
|
||||
il = IgnoreList()
|
||||
expected = [('foo', 'bar'), ('bar', 'baz'), ('foo', 'baz')]
|
||||
expected = [("foo", "bar"), ("bar", "baz"), ("foo", "baz")]
|
||||
for i in expected:
|
||||
il.Ignore(i[0], i[1])
|
||||
for i in il:
|
||||
expected.remove(i) #No exception should be raised
|
||||
assert not expected #expected should be empty
|
||||
expected.remove(i) # No exception should be raised
|
||||
assert not expected # expected should be empty
|
||||
|
||||
|
||||
def test_filter():
|
||||
il = IgnoreList()
|
||||
il.Ignore('foo', 'bar')
|
||||
il.Ignore('bar', 'baz')
|
||||
il.Ignore('foo', 'baz')
|
||||
il.Filter(lambda f, s: f == 'bar')
|
||||
il.Ignore("foo", "bar")
|
||||
il.Ignore("bar", "baz")
|
||||
il.Ignore("foo", "baz")
|
||||
il.Filter(lambda f, s: f == "bar")
|
||||
eq_(1, len(il))
|
||||
assert not il.AreIgnored('foo', 'bar')
|
||||
assert il.AreIgnored('bar', 'baz')
|
||||
assert not il.AreIgnored("foo", "bar")
|
||||
assert il.AreIgnored("bar", "baz")
|
||||
|
||||
|
||||
def test_save_with_non_ascii_items():
|
||||
il = IgnoreList()
|
||||
il.Ignore('\xac', '\xbf')
|
||||
il.Ignore("\xac", "\xbf")
|
||||
f = io.BytesIO()
|
||||
try:
|
||||
il.save_to_xml(f)
|
||||
except Exception as e:
|
||||
raise AssertionError(str(e))
|
||||
|
||||
|
||||
def test_len():
|
||||
il = IgnoreList()
|
||||
eq_(0, len(il))
|
||||
il.Ignore('foo', 'bar')
|
||||
il.Ignore("foo", "bar")
|
||||
eq_(1, len(il))
|
||||
|
||||
|
||||
def test_nonzero():
|
||||
il = IgnoreList()
|
||||
assert not il
|
||||
il.Ignore('foo', 'bar')
|
||||
il.Ignore("foo", "bar")
|
||||
assert il
|
||||
|
||||
|
||||
def test_remove():
|
||||
il = IgnoreList()
|
||||
il.Ignore('foo', 'bar')
|
||||
il.Ignore('foo', 'baz')
|
||||
il.remove('bar', 'foo')
|
||||
il.Ignore("foo", "bar")
|
||||
il.Ignore("foo", "baz")
|
||||
il.remove("bar", "foo")
|
||||
eq_(len(il), 1)
|
||||
assert not il.AreIgnored('foo', 'bar')
|
||||
assert not il.AreIgnored("foo", "bar")
|
||||
|
||||
|
||||
def test_remove_non_existant():
|
||||
il = IgnoreList()
|
||||
il.Ignore('foo', 'bar')
|
||||
il.Ignore('foo', 'baz')
|
||||
il.Ignore("foo", "bar")
|
||||
il.Ignore("foo", "baz")
|
||||
with raises(ValueError):
|
||||
il.remove('foo', 'bleh')
|
||||
il.remove("foo", "bleh")
|
||||
|
||||
@@ -8,33 +8,39 @@ from hscommon.testutil import eq_
|
||||
|
||||
from ..markable import MarkableList, Markable
|
||||
|
||||
|
||||
def gen():
|
||||
ml = MarkableList()
|
||||
ml.extend(list(range(10)))
|
||||
return ml
|
||||
|
||||
|
||||
def test_unmarked():
|
||||
ml = gen()
|
||||
for i in ml:
|
||||
assert not ml.is_marked(i)
|
||||
|
||||
|
||||
def test_mark():
|
||||
ml = gen()
|
||||
assert ml.mark(3)
|
||||
assert ml.is_marked(3)
|
||||
assert not ml.is_marked(2)
|
||||
|
||||
|
||||
def test_unmark():
|
||||
ml = gen()
|
||||
ml.mark(4)
|
||||
assert ml.unmark(4)
|
||||
assert not ml.is_marked(4)
|
||||
|
||||
|
||||
def test_unmark_unmarked():
|
||||
ml = gen()
|
||||
assert not ml.unmark(4)
|
||||
assert not ml.is_marked(4)
|
||||
|
||||
|
||||
def test_mark_twice_and_unmark():
|
||||
ml = gen()
|
||||
assert ml.mark(5)
|
||||
@@ -42,6 +48,7 @@ def test_mark_twice_and_unmark():
|
||||
ml.unmark(5)
|
||||
assert not ml.is_marked(5)
|
||||
|
||||
|
||||
def test_mark_toggle():
|
||||
ml = gen()
|
||||
ml.mark_toggle(6)
|
||||
@@ -51,22 +58,25 @@ def test_mark_toggle():
|
||||
ml.mark_toggle(6)
|
||||
assert ml.is_marked(6)
|
||||
|
||||
|
||||
def test_is_markable():
|
||||
class Foobar(Markable):
|
||||
def _is_markable(self, o):
|
||||
return o == 'foobar'
|
||||
return o == "foobar"
|
||||
|
||||
f = Foobar()
|
||||
assert not f.is_marked('foobar')
|
||||
assert not f.mark('foo')
|
||||
assert not f.is_marked('foo')
|
||||
f.mark_toggle('foo')
|
||||
assert not f.is_marked('foo')
|
||||
f.mark('foobar')
|
||||
assert f.is_marked('foobar')
|
||||
assert not f.is_marked("foobar")
|
||||
assert not f.mark("foo")
|
||||
assert not f.is_marked("foo")
|
||||
f.mark_toggle("foo")
|
||||
assert not f.is_marked("foo")
|
||||
f.mark("foobar")
|
||||
assert f.is_marked("foobar")
|
||||
ml = gen()
|
||||
ml.mark(11)
|
||||
assert not ml.is_marked(11)
|
||||
|
||||
|
||||
def test_change_notifications():
|
||||
class Foobar(Markable):
|
||||
def _did_mark(self, o):
|
||||
@@ -77,13 +87,14 @@ def test_change_notifications():
|
||||
|
||||
f = Foobar()
|
||||
f.log = []
|
||||
f.mark('foo')
|
||||
f.mark('foo')
|
||||
f.mark_toggle('bar')
|
||||
f.unmark('foo')
|
||||
f.unmark('foo')
|
||||
f.mark_toggle('bar')
|
||||
eq_([(True, 'foo'), (True, 'bar'), (False, 'foo'), (False, 'bar')], f.log)
|
||||
f.mark("foo")
|
||||
f.mark("foo")
|
||||
f.mark_toggle("bar")
|
||||
f.unmark("foo")
|
||||
f.unmark("foo")
|
||||
f.mark_toggle("bar")
|
||||
eq_([(True, "foo"), (True, "bar"), (False, "foo"), (False, "bar")], f.log)
|
||||
|
||||
|
||||
def test_mark_count():
|
||||
ml = gen()
|
||||
@@ -93,6 +104,7 @@ def test_mark_count():
|
||||
ml.mark(11)
|
||||
eq_(1, ml.mark_count)
|
||||
|
||||
|
||||
def test_mark_none():
|
||||
log = []
|
||||
ml = gen()
|
||||
@@ -104,6 +116,7 @@ def test_mark_none():
|
||||
eq_(0, ml.mark_count)
|
||||
eq_([1, 2], log)
|
||||
|
||||
|
||||
def test_mark_all():
|
||||
ml = gen()
|
||||
eq_(0, ml.mark_count)
|
||||
@@ -111,6 +124,7 @@ def test_mark_all():
|
||||
eq_(10, ml.mark_count)
|
||||
assert ml.is_marked(1)
|
||||
|
||||
|
||||
def test_mark_invert():
|
||||
ml = gen()
|
||||
ml.mark(1)
|
||||
@@ -118,6 +132,7 @@ def test_mark_invert():
|
||||
assert not ml.is_marked(1)
|
||||
assert ml.is_marked(2)
|
||||
|
||||
|
||||
def test_mark_while_inverted():
|
||||
log = []
|
||||
ml = gen()
|
||||
@@ -134,6 +149,7 @@ def test_mark_while_inverted():
|
||||
eq_(7, ml.mark_count)
|
||||
eq_([(True, 1), (False, 1), (True, 2), (True, 1), (True, 3)], log)
|
||||
|
||||
|
||||
def test_remove_mark_flag():
|
||||
ml = gen()
|
||||
ml.mark(1)
|
||||
@@ -145,10 +161,12 @@ def test_remove_mark_flag():
|
||||
ml._remove_mark_flag(1)
|
||||
assert ml.is_marked(1)
|
||||
|
||||
|
||||
def test_is_marked_returns_false_if_object_not_markable():
|
||||
class MyMarkableList(MarkableList):
|
||||
def _is_markable(self, o):
|
||||
return o != 4
|
||||
|
||||
ml = MyMarkableList()
|
||||
ml.extend(list(range(10)))
|
||||
ml.mark_invert()
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2011/09/07
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
import os.path as op
|
||||
@@ -14,6 +14,7 @@ from ..engine import Group, Match
|
||||
|
||||
no = NamedObject
|
||||
|
||||
|
||||
def app_with_dupes(dupes):
|
||||
# Creates an app with specified dupes. dupes is a list of lists, each list in the list being
|
||||
# a dupe group. We cheat a little bit by creating dupe groups manually instead of running a
|
||||
@@ -29,57 +30,63 @@ def app_with_dupes(dupes):
|
||||
app.app._results_changed()
|
||||
return app
|
||||
|
||||
#---
|
||||
|
||||
# ---
|
||||
def app_normal_results():
|
||||
# Just some results, with different extensions and size, for good measure.
|
||||
dupes = [
|
||||
[
|
||||
no('foo1.ext1', size=1, folder='folder1'),
|
||||
no('foo2.ext2', size=2, folder='folder2')
|
||||
no("foo1.ext1", size=1, folder="folder1"),
|
||||
no("foo2.ext2", size=2, folder="folder2"),
|
||||
],
|
||||
]
|
||||
return app_with_dupes(dupes)
|
||||
|
||||
|
||||
@with_app(app_normal_results)
|
||||
def test_kind_subcrit(app):
|
||||
# The subcriteria of the "Kind" criteria is a list of extensions contained in the dupes.
|
||||
app.select_pri_criterion("Kind")
|
||||
eq_(app.pdialog.criteria_list[:], ['ext1', 'ext2'])
|
||||
eq_(app.pdialog.criteria_list[:], ["ext1", "ext2"])
|
||||
|
||||
|
||||
@with_app(app_normal_results)
|
||||
def test_kind_reprioritization(app):
|
||||
# Just a simple test of the system as a whole.
|
||||
# select a criterion, and perform re-prioritization and see if it worked.
|
||||
app.select_pri_criterion("Kind")
|
||||
app.pdialog.criteria_list.select([1]) # ext2
|
||||
app.pdialog.criteria_list.select([1]) # ext2
|
||||
app.pdialog.add_selected()
|
||||
app.pdialog.perform_reprioritization()
|
||||
eq_(app.rtable[0].data['name'], 'foo2.ext2')
|
||||
eq_(app.rtable[0].data["name"], "foo2.ext2")
|
||||
|
||||
|
||||
@with_app(app_normal_results)
|
||||
def test_folder_subcrit(app):
|
||||
app.select_pri_criterion("Folder")
|
||||
eq_(app.pdialog.criteria_list[:], ['folder1', 'folder2'])
|
||||
eq_(app.pdialog.criteria_list[:], ["folder1", "folder2"])
|
||||
|
||||
|
||||
@with_app(app_normal_results)
|
||||
def test_folder_reprioritization(app):
|
||||
app.select_pri_criterion("Folder")
|
||||
app.pdialog.criteria_list.select([1]) # folder2
|
||||
app.pdialog.criteria_list.select([1]) # folder2
|
||||
app.pdialog.add_selected()
|
||||
app.pdialog.perform_reprioritization()
|
||||
eq_(app.rtable[0].data['name'], 'foo2.ext2')
|
||||
eq_(app.rtable[0].data["name"], "foo2.ext2")
|
||||
|
||||
|
||||
@with_app(app_normal_results)
|
||||
def test_prilist_display(app):
|
||||
# The prioritization list displays selected criteria correctly.
|
||||
app.select_pri_criterion("Kind")
|
||||
app.pdialog.criteria_list.select([1]) # ext2
|
||||
app.pdialog.criteria_list.select([1]) # ext2
|
||||
app.pdialog.add_selected()
|
||||
app.select_pri_criterion("Folder")
|
||||
app.pdialog.criteria_list.select([1]) # folder2
|
||||
app.pdialog.criteria_list.select([1]) # folder2
|
||||
app.pdialog.add_selected()
|
||||
app.select_pri_criterion("Size")
|
||||
app.pdialog.criteria_list.select([1]) # Lowest
|
||||
app.pdialog.criteria_list.select([1]) # Lowest
|
||||
app.pdialog.add_selected()
|
||||
expected = [
|
||||
"Kind (ext2)",
|
||||
@@ -88,23 +95,26 @@ def test_prilist_display(app):
|
||||
]
|
||||
eq_(app.pdialog.prioritization_list[:], expected)
|
||||
|
||||
|
||||
@with_app(app_normal_results)
|
||||
def test_size_subcrit(app):
|
||||
app.select_pri_criterion("Size")
|
||||
eq_(app.pdialog.criteria_list[:], ['Highest', 'Lowest'])
|
||||
eq_(app.pdialog.criteria_list[:], ["Highest", "Lowest"])
|
||||
|
||||
|
||||
@with_app(app_normal_results)
|
||||
def test_size_reprioritization(app):
|
||||
app.select_pri_criterion("Size")
|
||||
app.pdialog.criteria_list.select([0]) # highest
|
||||
app.pdialog.criteria_list.select([0]) # highest
|
||||
app.pdialog.add_selected()
|
||||
app.pdialog.perform_reprioritization()
|
||||
eq_(app.rtable[0].data['name'], 'foo2.ext2')
|
||||
eq_(app.rtable[0].data["name"], "foo2.ext2")
|
||||
|
||||
|
||||
@with_app(app_normal_results)
|
||||
def test_reorder_prioritizations(app):
|
||||
app.add_pri_criterion("Kind", 0) # ext1
|
||||
app.add_pri_criterion("Kind", 1) # ext2
|
||||
app.add_pri_criterion("Kind", 0) # ext1
|
||||
app.add_pri_criterion("Kind", 1) # ext2
|
||||
app.pdialog.prioritization_list.move_indexes([1], 0)
|
||||
expected = [
|
||||
"Kind (ext2)",
|
||||
@@ -112,6 +122,7 @@ def test_reorder_prioritizations(app):
|
||||
]
|
||||
eq_(app.pdialog.prioritization_list[:], expected)
|
||||
|
||||
|
||||
@with_app(app_normal_results)
|
||||
def test_remove_crit_from_list(app):
|
||||
app.add_pri_criterion("Kind", 0)
|
||||
@@ -123,75 +134,72 @@ def test_remove_crit_from_list(app):
|
||||
]
|
||||
eq_(app.pdialog.prioritization_list[:], expected)
|
||||
|
||||
|
||||
@with_app(app_normal_results)
|
||||
def test_add_crit_without_selection(app):
|
||||
# Adding a criterion without having made a selection doesn't cause a crash.
|
||||
app.pdialog.add_selected() # no crash
|
||||
app.pdialog.add_selected() # no crash
|
||||
|
||||
#---
|
||||
|
||||
# ---
|
||||
def app_one_name_ends_with_number():
|
||||
dupes = [
|
||||
[
|
||||
no('foo.ext'),
|
||||
no('foo1.ext'),
|
||||
],
|
||||
[no("foo.ext"), no("foo1.ext")],
|
||||
]
|
||||
return app_with_dupes(dupes)
|
||||
|
||||
|
||||
@with_app(app_one_name_ends_with_number)
|
||||
def test_filename_reprioritization(app):
|
||||
app.add_pri_criterion("Filename", 0) # Ends with a number
|
||||
app.add_pri_criterion("Filename", 0) # Ends with a number
|
||||
app.pdialog.perform_reprioritization()
|
||||
eq_(app.rtable[0].data['name'], 'foo1.ext')
|
||||
eq_(app.rtable[0].data["name"], "foo1.ext")
|
||||
|
||||
#---
|
||||
|
||||
# ---
|
||||
def app_with_subfolders():
|
||||
dupes = [
|
||||
[
|
||||
no('foo1', folder='baz'),
|
||||
no('foo2', folder='foo/bar'),
|
||||
],
|
||||
[
|
||||
no('foo3', folder='baz'),
|
||||
no('foo4', folder='foo'),
|
||||
],
|
||||
[no("foo1", folder="baz"), no("foo2", folder="foo/bar")],
|
||||
[no("foo3", folder="baz"), no("foo4", folder="foo")],
|
||||
]
|
||||
return app_with_dupes(dupes)
|
||||
|
||||
|
||||
@with_app(app_with_subfolders)
|
||||
def test_folder_crit_is_sorted(app):
|
||||
# Folder subcriteria are sorted.
|
||||
app.select_pri_criterion("Folder")
|
||||
eq_(app.pdialog.criteria_list[:], ['baz', 'foo', op.join('foo', 'bar')])
|
||||
eq_(app.pdialog.criteria_list[:], ["baz", "foo", op.join("foo", "bar")])
|
||||
|
||||
|
||||
@with_app(app_with_subfolders)
|
||||
def test_folder_crit_includes_subfolders(app):
|
||||
# When selecting a folder crit, dupes in a subfolder are also considered as affected by that
|
||||
# crit.
|
||||
app.add_pri_criterion("Folder", 1) # foo
|
||||
app.add_pri_criterion("Folder", 1) # foo
|
||||
app.pdialog.perform_reprioritization()
|
||||
# Both foo and foo/bar dupes will be prioritized
|
||||
eq_(app.rtable[0].data['name'], 'foo2')
|
||||
eq_(app.rtable[2].data['name'], 'foo4')
|
||||
eq_(app.rtable[0].data["name"], "foo2")
|
||||
eq_(app.rtable[2].data["name"], "foo4")
|
||||
|
||||
|
||||
@with_app(app_with_subfolders)
|
||||
def test_display_something_on_empty_extensions(app):
|
||||
# When there's no extension, display "None" instead of nothing at all.
|
||||
app.select_pri_criterion("Kind")
|
||||
eq_(app.pdialog.criteria_list[:], ['None'])
|
||||
eq_(app.pdialog.criteria_list[:], ["None"])
|
||||
|
||||
#---
|
||||
|
||||
# ---
|
||||
def app_one_name_longer_than_the_other():
|
||||
dupes = [
|
||||
[
|
||||
no('shortest.ext'),
|
||||
no('loooongest.ext'),
|
||||
],
|
||||
[no("shortest.ext"), no("loooongest.ext")],
|
||||
]
|
||||
return app_with_dupes(dupes)
|
||||
|
||||
|
||||
@with_app(app_one_name_longer_than_the_other)
|
||||
def test_longest_filename_prioritization(app):
|
||||
app.add_pri_criterion("Filename", 2) # Longest
|
||||
app.add_pri_criterion("Filename", 2) # Longest
|
||||
app.pdialog.perform_reprioritization()
|
||||
eq_(app.rtable[0].data['name'], 'loooongest.ext')
|
||||
eq_(app.rtable[0].data["name"], "loooongest.ext")
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2013-07-28
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
from .base import TestApp, GetTestGroups
|
||||
|
||||
|
||||
def app_with_results():
|
||||
app = TestApp()
|
||||
objects, matches, groups = GetTestGroups()
|
||||
@@ -15,23 +16,26 @@ def app_with_results():
|
||||
app.rtable.refresh()
|
||||
return app
|
||||
|
||||
|
||||
def test_delta_flags_delta_mode_off():
|
||||
app = app_with_results()
|
||||
# When the delta mode is off, we never have delta values flags
|
||||
app.rtable.delta_values = False
|
||||
# Ref file, always false anyway
|
||||
assert not app.rtable[0].is_cell_delta('size')
|
||||
assert not app.rtable[0].is_cell_delta("size")
|
||||
# False because delta mode is off
|
||||
assert not app.rtable[1].is_cell_delta('size')
|
||||
|
||||
assert not app.rtable[1].is_cell_delta("size")
|
||||
|
||||
|
||||
def test_delta_flags_delta_mode_on_delta_columns():
|
||||
# When the delta mode is on, delta columns always have a delta flag, except for ref rows
|
||||
app = app_with_results()
|
||||
app.rtable.delta_values = True
|
||||
# Ref file, always false anyway
|
||||
assert not app.rtable[0].is_cell_delta('size')
|
||||
assert not app.rtable[0].is_cell_delta("size")
|
||||
# But for a dupe, the flag is on
|
||||
assert app.rtable[1].is_cell_delta('size')
|
||||
assert app.rtable[1].is_cell_delta("size")
|
||||
|
||||
|
||||
def test_delta_flags_delta_mode_on_non_delta_columns():
|
||||
# When the delta mode is on, non-delta columns have a delta flag if their value differs from
|
||||
@@ -39,11 +43,12 @@ def test_delta_flags_delta_mode_on_non_delta_columns():
|
||||
app = app_with_results()
|
||||
app.rtable.delta_values = True
|
||||
# "bar bleh" != "foo bar", flag on
|
||||
assert app.rtable[1].is_cell_delta('name')
|
||||
assert app.rtable[1].is_cell_delta("name")
|
||||
# "ibabtu" row, but it's a ref, flag off
|
||||
assert not app.rtable[3].is_cell_delta('name')
|
||||
assert not app.rtable[3].is_cell_delta("name")
|
||||
# "ibabtu" == "ibabtu", flag off
|
||||
assert not app.rtable[4].is_cell_delta('name')
|
||||
assert not app.rtable[4].is_cell_delta("name")
|
||||
|
||||
|
||||
def test_delta_flags_delta_mode_on_non_delta_columns_case_insensitive():
|
||||
# Comparison that occurs for non-numeric columns to check whether they're delta is case
|
||||
@@ -53,4 +58,4 @@ def test_delta_flags_delta_mode_on_non_delta_columns_case_insensitive():
|
||||
app.app.results.groups[1].dupes[0].name = "IBaBTU"
|
||||
app.rtable.delta_values = True
|
||||
# "ibAbtu" == "IBaBTU", flag off
|
||||
assert not app.rtable[4].is_cell_delta('name')
|
||||
assert not app.rtable[4].is_cell_delta("name")
|
||||
|
||||
@@ -17,6 +17,7 @@ from .. import engine
|
||||
from .base import NamedObject, GetTestGroups, DupeGuru
|
||||
from ..results import Results
|
||||
|
||||
|
||||
class TestCaseResultsEmpty:
|
||||
def setup_method(self, method):
|
||||
self.app = DupeGuru()
|
||||
@@ -24,8 +25,8 @@ class TestCaseResultsEmpty:
|
||||
|
||||
def test_apply_invalid_filter(self):
|
||||
# If the applied filter is an invalid regexp, just ignore the filter.
|
||||
self.results.apply_filter('[') # invalid
|
||||
self.test_stat_line() # make sure that the stats line isn't saying we applied a '[' filter
|
||||
self.results.apply_filter("[") # invalid
|
||||
self.test_stat_line() # make sure that the stats line isn't saying we applied a '[' filter
|
||||
|
||||
def test_stat_line(self):
|
||||
eq_("0 / 0 (0.00 B / 0.00 B) duplicates marked.", self.results.stat_line)
|
||||
@@ -34,7 +35,7 @@ class TestCaseResultsEmpty:
|
||||
eq_(0, len(self.results.groups))
|
||||
|
||||
def test_get_group_of_duplicate(self):
|
||||
assert self.results.get_group_of_duplicate('foo') is None
|
||||
assert self.results.get_group_of_duplicate("foo") is None
|
||||
|
||||
def test_save_to_xml(self):
|
||||
f = io.BytesIO()
|
||||
@@ -42,7 +43,7 @@ class TestCaseResultsEmpty:
|
||||
f.seek(0)
|
||||
doc = ET.parse(f)
|
||||
root = doc.getroot()
|
||||
eq_('results', root.tag)
|
||||
eq_("results", root.tag)
|
||||
|
||||
def test_is_modified(self):
|
||||
assert not self.results.is_modified
|
||||
@@ -59,10 +60,10 @@ class TestCaseResultsEmpty:
|
||||
# would have been some kind of feedback to the user, but the work involved for something
|
||||
# that simply never happens (I never received a report of this crash, I experienced it
|
||||
# while fooling around) is too much. Instead, use standard name conflict resolution.
|
||||
folderpath = tmpdir.join('foo')
|
||||
folderpath = tmpdir.join("foo")
|
||||
folderpath.mkdir()
|
||||
self.results.save_to_xml(str(folderpath)) # no crash
|
||||
assert tmpdir.join('[000] foo').check()
|
||||
self.results.save_to_xml(str(folderpath)) # no crash
|
||||
assert tmpdir.join("[000] foo").check()
|
||||
|
||||
|
||||
class TestCaseResultsWithSomeGroups:
|
||||
@@ -116,18 +117,22 @@ class TestCaseResultsWithSomeGroups:
|
||||
assert d is g.ref
|
||||
|
||||
def test_sort_groups(self):
|
||||
self.results.make_ref(self.objects[1]) #We want to make the 1024 sized object to go ref.
|
||||
self.results.make_ref(
|
||||
self.objects[1]
|
||||
) # We want to make the 1024 sized object to go ref.
|
||||
g1, g2 = self.groups
|
||||
self.results.sort_groups('size')
|
||||
self.results.sort_groups("size")
|
||||
assert self.results.groups[0] is g2
|
||||
assert self.results.groups[1] is g1
|
||||
self.results.sort_groups('size', False)
|
||||
self.results.sort_groups("size", False)
|
||||
assert self.results.groups[0] is g1
|
||||
assert self.results.groups[1] is g2
|
||||
|
||||
def test_set_groups_when_sorted(self):
|
||||
self.results.make_ref(self.objects[1]) #We want to make the 1024 sized object to go ref.
|
||||
self.results.sort_groups('size')
|
||||
self.results.make_ref(
|
||||
self.objects[1]
|
||||
) # We want to make the 1024 sized object to go ref.
|
||||
self.results.sort_groups("size")
|
||||
objects, matches, groups = GetTestGroups()
|
||||
g1, g2 = groups
|
||||
g1.switch_ref(objects[1])
|
||||
@@ -158,9 +163,9 @@ class TestCaseResultsWithSomeGroups:
|
||||
o3.size = 3
|
||||
o4.size = 2
|
||||
o5.size = 1
|
||||
self.results.sort_dupes('size')
|
||||
self.results.sort_dupes("size")
|
||||
eq_([o5, o3, o2], self.results.dupes)
|
||||
self.results.sort_dupes('size', False)
|
||||
self.results.sort_dupes("size", False)
|
||||
eq_([o2, o3, o5], self.results.dupes)
|
||||
|
||||
def test_dupe_list_remember_sort(self):
|
||||
@@ -170,25 +175,25 @@ class TestCaseResultsWithSomeGroups:
|
||||
o3.size = 3
|
||||
o4.size = 2
|
||||
o5.size = 1
|
||||
self.results.sort_dupes('size')
|
||||
self.results.sort_dupes("size")
|
||||
self.results.make_ref(o2)
|
||||
eq_([o5, o3, o1], self.results.dupes)
|
||||
|
||||
def test_dupe_list_sort_delta_values(self):
|
||||
o1, o2, o3, o4, o5 = self.objects
|
||||
o1.size = 10
|
||||
o2.size = 2 #-8
|
||||
o3.size = 3 #-7
|
||||
o2.size = 2 # -8
|
||||
o3.size = 3 # -7
|
||||
o4.size = 20
|
||||
o5.size = 1 #-19
|
||||
self.results.sort_dupes('size', delta=True)
|
||||
o5.size = 1 # -19
|
||||
self.results.sort_dupes("size", delta=True)
|
||||
eq_([o5, o2, o3], self.results.dupes)
|
||||
|
||||
def test_sort_empty_list(self):
|
||||
#There was an infinite loop when sorting an empty list.
|
||||
# There was an infinite loop when sorting an empty list.
|
||||
app = DupeGuru()
|
||||
r = app.results
|
||||
r.sort_dupes('name')
|
||||
r.sort_dupes("name")
|
||||
eq_([], r.dupes)
|
||||
|
||||
def test_dupe_list_update_on_remove_duplicates(self):
|
||||
@@ -209,7 +214,7 @@ class TestCaseResultsWithSomeGroups:
|
||||
f = io.BytesIO()
|
||||
self.results.save_to_xml(f)
|
||||
assert not self.results.is_modified
|
||||
self.results.groups = self.groups # sets the flag back
|
||||
self.results.groups = self.groups # sets the flag back
|
||||
f.seek(0)
|
||||
self.results.load_from_xml(f, get_file)
|
||||
assert not self.results.is_modified
|
||||
@@ -236,7 +241,7 @@ class TestCaseResultsWithSomeGroups:
|
||||
# "aaa" makes our dupe go first in alphabetical order, but since we have the same value as
|
||||
# ref, we're going last.
|
||||
g2r.name = g2d1.name = "aaa"
|
||||
self.results.sort_dupes('name', delta=True)
|
||||
self.results.sort_dupes("name", delta=True)
|
||||
eq_("aaa", self.results.dupes[2].name)
|
||||
|
||||
def test_dupe_list_sort_delta_values_nonnumeric_case_insensitive(self):
|
||||
@@ -244,9 +249,10 @@ class TestCaseResultsWithSomeGroups:
|
||||
g1r, g1d1, g1d2, g2r, g2d1 = self.objects
|
||||
g2r.name = "AaA"
|
||||
g2d1.name = "aAa"
|
||||
self.results.sort_dupes('name', delta=True)
|
||||
self.results.sort_dupes("name", delta=True)
|
||||
eq_("aAa", self.results.dupes[2].name)
|
||||
|
||||
|
||||
class TestCaseResultsWithSavedResults:
|
||||
def setup_method(self, method):
|
||||
self.app = DupeGuru()
|
||||
@@ -266,7 +272,7 @@ class TestCaseResultsWithSavedResults:
|
||||
def get_file(path):
|
||||
return [f for f in self.objects if str(f.path) == path][0]
|
||||
|
||||
self.results.groups = self.groups # sets the flag back
|
||||
self.results.groups = self.groups # sets the flag back
|
||||
self.results.load_from_xml(self.f, get_file)
|
||||
assert not self.results.is_modified
|
||||
|
||||
@@ -299,7 +305,7 @@ class TestCaseResultsMarkings:
|
||||
self.results.mark(self.objects[2])
|
||||
self.results.mark(self.objects[4])
|
||||
eq_("2 / 3 (2.00 B / 1.01 KB) duplicates marked.", self.results.stat_line)
|
||||
self.results.mark(self.objects[0]) #this is a ref, it can't be counted
|
||||
self.results.mark(self.objects[0]) # this is a ref, it can't be counted
|
||||
eq_("2 / 3 (2.00 B / 1.01 KB) duplicates marked.", self.results.stat_line)
|
||||
self.results.groups = self.groups
|
||||
eq_("0 / 3 (0.00 B / 1.01 KB) duplicates marked.", self.results.stat_line)
|
||||
@@ -335,7 +341,7 @@ class TestCaseResultsMarkings:
|
||||
def log_object(o):
|
||||
log.append(o)
|
||||
if o is self.objects[1]:
|
||||
raise EnvironmentError('foobar')
|
||||
raise EnvironmentError("foobar")
|
||||
|
||||
log = []
|
||||
self.results.mark_all()
|
||||
@@ -350,7 +356,7 @@ class TestCaseResultsMarkings:
|
||||
eq_(len(self.results.problems), 1)
|
||||
dupe, msg = self.results.problems[0]
|
||||
assert dupe is self.objects[1]
|
||||
eq_(msg, 'foobar')
|
||||
eq_(msg, "foobar")
|
||||
|
||||
def test_perform_on_marked_with_ref(self):
|
||||
def log_object(o):
|
||||
@@ -408,20 +414,20 @@ class TestCaseResultsMarkings:
|
||||
f.seek(0)
|
||||
doc = ET.parse(f)
|
||||
root = doc.getroot()
|
||||
g1, g2 = root.getiterator('group')
|
||||
d1, d2, d3 = g1.getiterator('file')
|
||||
eq_('n', d1.get('marked'))
|
||||
eq_('n', d2.get('marked'))
|
||||
eq_('y', d3.get('marked'))
|
||||
d1, d2 = g2.getiterator('file')
|
||||
eq_('n', d1.get('marked'))
|
||||
eq_('y', d2.get('marked'))
|
||||
g1, g2 = root.getiterator("group")
|
||||
d1, d2, d3 = g1.getiterator("file")
|
||||
eq_("n", d1.get("marked"))
|
||||
eq_("n", d2.get("marked"))
|
||||
eq_("y", d3.get("marked"))
|
||||
d1, d2 = g2.getiterator("file")
|
||||
eq_("n", d1.get("marked"))
|
||||
eq_("y", d2.get("marked"))
|
||||
|
||||
def test_LoadXML(self):
|
||||
def get_file(path):
|
||||
return [f for f in self.objects if str(f.path) == path][0]
|
||||
|
||||
self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
|
||||
self.objects[4].name = "ibabtu 2" # we can't have 2 files with the same path
|
||||
self.results.mark(self.objects[1])
|
||||
self.results.mark_invert()
|
||||
f = io.BytesIO()
|
||||
@@ -444,51 +450,51 @@ class TestCaseResultsXML:
|
||||
self.objects, self.matches, self.groups = GetTestGroups()
|
||||
self.results.groups = self.groups
|
||||
|
||||
def get_file(self, path): # use this as a callback for load_from_xml
|
||||
def get_file(self, path): # use this as a callback for load_from_xml
|
||||
return [o for o in self.objects if o.path == path][0]
|
||||
|
||||
def test_save_to_xml(self):
|
||||
self.objects[0].is_ref = True
|
||||
self.objects[0].words = [['foo', 'bar']]
|
||||
self.objects[0].words = [["foo", "bar"]]
|
||||
f = io.BytesIO()
|
||||
self.results.save_to_xml(f)
|
||||
f.seek(0)
|
||||
doc = ET.parse(f)
|
||||
root = doc.getroot()
|
||||
eq_('results', root.tag)
|
||||
eq_("results", root.tag)
|
||||
eq_(2, len(root))
|
||||
eq_(2, len([c for c in root if c.tag == 'group']))
|
||||
eq_(2, len([c for c in root if c.tag == "group"]))
|
||||
g1, g2 = root
|
||||
eq_(6, len(g1))
|
||||
eq_(3, len([c for c in g1 if c.tag == 'file']))
|
||||
eq_(3, len([c for c in g1 if c.tag == 'match']))
|
||||
d1, d2, d3 = [c for c in g1 if c.tag == 'file']
|
||||
eq_(op.join('basepath', 'foo bar'), d1.get('path'))
|
||||
eq_(op.join('basepath', 'bar bleh'), d2.get('path'))
|
||||
eq_(op.join('basepath', 'foo bleh'), d3.get('path'))
|
||||
eq_('y', d1.get('is_ref'))
|
||||
eq_('n', d2.get('is_ref'))
|
||||
eq_('n', d3.get('is_ref'))
|
||||
eq_('foo,bar', d1.get('words'))
|
||||
eq_('bar,bleh', d2.get('words'))
|
||||
eq_('foo,bleh', d3.get('words'))
|
||||
eq_(3, len([c for c in g1 if c.tag == "file"]))
|
||||
eq_(3, len([c for c in g1 if c.tag == "match"]))
|
||||
d1, d2, d3 = [c for c in g1 if c.tag == "file"]
|
||||
eq_(op.join("basepath", "foo bar"), d1.get("path"))
|
||||
eq_(op.join("basepath", "bar bleh"), d2.get("path"))
|
||||
eq_(op.join("basepath", "foo bleh"), d3.get("path"))
|
||||
eq_("y", d1.get("is_ref"))
|
||||
eq_("n", d2.get("is_ref"))
|
||||
eq_("n", d3.get("is_ref"))
|
||||
eq_("foo,bar", d1.get("words"))
|
||||
eq_("bar,bleh", d2.get("words"))
|
||||
eq_("foo,bleh", d3.get("words"))
|
||||
eq_(3, len(g2))
|
||||
eq_(2, len([c for c in g2 if c.tag == 'file']))
|
||||
eq_(1, len([c for c in g2 if c.tag == 'match']))
|
||||
d1, d2 = [c for c in g2 if c.tag == 'file']
|
||||
eq_(op.join('basepath', 'ibabtu'), d1.get('path'))
|
||||
eq_(op.join('basepath', 'ibabtu'), d2.get('path'))
|
||||
eq_('n', d1.get('is_ref'))
|
||||
eq_('n', d2.get('is_ref'))
|
||||
eq_('ibabtu', d1.get('words'))
|
||||
eq_('ibabtu', d2.get('words'))
|
||||
eq_(2, len([c for c in g2 if c.tag == "file"]))
|
||||
eq_(1, len([c for c in g2 if c.tag == "match"]))
|
||||
d1, d2 = [c for c in g2 if c.tag == "file"]
|
||||
eq_(op.join("basepath", "ibabtu"), d1.get("path"))
|
||||
eq_(op.join("basepath", "ibabtu"), d2.get("path"))
|
||||
eq_("n", d1.get("is_ref"))
|
||||
eq_("n", d2.get("is_ref"))
|
||||
eq_("ibabtu", d1.get("words"))
|
||||
eq_("ibabtu", d2.get("words"))
|
||||
|
||||
def test_LoadXML(self):
|
||||
def get_file(path):
|
||||
return [f for f in self.objects if str(f.path) == path][0]
|
||||
|
||||
self.objects[0].is_ref = True
|
||||
self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
|
||||
self.objects[4].name = "ibabtu 2" # we can't have 2 files with the same path
|
||||
f = io.BytesIO()
|
||||
self.results.save_to_xml(f)
|
||||
f.seek(0)
|
||||
@@ -504,23 +510,23 @@ class TestCaseResultsXML:
|
||||
assert g1[0] is self.objects[0]
|
||||
assert g1[1] is self.objects[1]
|
||||
assert g1[2] is self.objects[2]
|
||||
eq_(['foo', 'bar'], g1[0].words)
|
||||
eq_(['bar', 'bleh'], g1[1].words)
|
||||
eq_(['foo', 'bleh'], g1[2].words)
|
||||
eq_(["foo", "bar"], g1[0].words)
|
||||
eq_(["bar", "bleh"], g1[1].words)
|
||||
eq_(["foo", "bleh"], g1[2].words)
|
||||
eq_(2, len(g2))
|
||||
assert not g2[0].is_ref
|
||||
assert not g2[1].is_ref
|
||||
assert g2[0] is self.objects[3]
|
||||
assert g2[1] is self.objects[4]
|
||||
eq_(['ibabtu'], g2[0].words)
|
||||
eq_(['ibabtu'], g2[1].words)
|
||||
eq_(["ibabtu"], g2[0].words)
|
||||
eq_(["ibabtu"], g2[1].words)
|
||||
|
||||
def test_LoadXML_with_filename(self, tmpdir):
|
||||
def get_file(path):
|
||||
return [f for f in self.objects if str(f.path) == path][0]
|
||||
|
||||
filename = str(tmpdir.join('dupeguru_results.xml'))
|
||||
self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
|
||||
filename = str(tmpdir.join("dupeguru_results.xml"))
|
||||
self.objects[4].name = "ibabtu 2" # we can't have 2 files with the same path
|
||||
self.results.save_to_xml(filename)
|
||||
app = DupeGuru()
|
||||
r = Results(app)
|
||||
@@ -529,11 +535,11 @@ class TestCaseResultsXML:
|
||||
|
||||
def test_LoadXML_with_some_files_that_dont_exist_anymore(self):
|
||||
def get_file(path):
|
||||
if path.endswith('ibabtu 2'):
|
||||
if path.endswith("ibabtu 2"):
|
||||
return None
|
||||
return [f for f in self.objects if str(f.path) == path][0]
|
||||
|
||||
self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
|
||||
self.objects[4].name = "ibabtu 2" # we can't have 2 files with the same path
|
||||
f = io.BytesIO()
|
||||
self.results.save_to_xml(f)
|
||||
f.seek(0)
|
||||
@@ -547,36 +553,36 @@ class TestCaseResultsXML:
|
||||
def get_file(path):
|
||||
return [f for f in self.objects if str(f.path) == path][0]
|
||||
|
||||
root = ET.Element('foobar') #The root element shouldn't matter, really.
|
||||
group_node = ET.SubElement(root, 'group')
|
||||
dupe_node = ET.SubElement(group_node, 'file') #Perfectly correct file
|
||||
dupe_node.set('path', op.join('basepath', 'foo bar'))
|
||||
dupe_node.set('is_ref', 'y')
|
||||
dupe_node.set('words', 'foo, bar')
|
||||
dupe_node = ET.SubElement(group_node, 'file') #is_ref missing, default to 'n'
|
||||
dupe_node.set('path', op.join('basepath', 'foo bleh'))
|
||||
dupe_node.set('words', 'foo, bleh')
|
||||
dupe_node = ET.SubElement(group_node, 'file') #words are missing, valid.
|
||||
dupe_node.set('path', op.join('basepath', 'bar bleh'))
|
||||
dupe_node = ET.SubElement(group_node, 'file') #path is missing, invalid.
|
||||
dupe_node.set('words', 'foo, bleh')
|
||||
dupe_node = ET.SubElement(group_node, 'foobar') #Invalid element name
|
||||
dupe_node.set('path', op.join('basepath', 'bar bleh'))
|
||||
dupe_node.set('is_ref', 'y')
|
||||
dupe_node.set('words', 'bar, bleh')
|
||||
match_node = ET.SubElement(group_node, 'match') # match pointing to a bad index
|
||||
match_node.set('first', '42')
|
||||
match_node.set('second', '45')
|
||||
match_node = ET.SubElement(group_node, 'match') # match with missing attrs
|
||||
match_node = ET.SubElement(group_node, 'match') # match with non-int values
|
||||
match_node.set('first', 'foo')
|
||||
match_node.set('second', 'bar')
|
||||
match_node.set('percentage', 'baz')
|
||||
group_node = ET.SubElement(root, 'foobar') #invalid group
|
||||
group_node = ET.SubElement(root, 'group') #empty group
|
||||
root = ET.Element("foobar") # The root element shouldn't matter, really.
|
||||
group_node = ET.SubElement(root, "group")
|
||||
dupe_node = ET.SubElement(group_node, "file") # Perfectly correct file
|
||||
dupe_node.set("path", op.join("basepath", "foo bar"))
|
||||
dupe_node.set("is_ref", "y")
|
||||
dupe_node.set("words", "foo, bar")
|
||||
dupe_node = ET.SubElement(group_node, "file") # is_ref missing, default to 'n'
|
||||
dupe_node.set("path", op.join("basepath", "foo bleh"))
|
||||
dupe_node.set("words", "foo, bleh")
|
||||
dupe_node = ET.SubElement(group_node, "file") # words are missing, valid.
|
||||
dupe_node.set("path", op.join("basepath", "bar bleh"))
|
||||
dupe_node = ET.SubElement(group_node, "file") # path is missing, invalid.
|
||||
dupe_node.set("words", "foo, bleh")
|
||||
dupe_node = ET.SubElement(group_node, "foobar") # Invalid element name
|
||||
dupe_node.set("path", op.join("basepath", "bar bleh"))
|
||||
dupe_node.set("is_ref", "y")
|
||||
dupe_node.set("words", "bar, bleh")
|
||||
match_node = ET.SubElement(group_node, "match") # match pointing to a bad index
|
||||
match_node.set("first", "42")
|
||||
match_node.set("second", "45")
|
||||
match_node = ET.SubElement(group_node, "match") # match with missing attrs
|
||||
match_node = ET.SubElement(group_node, "match") # match with non-int values
|
||||
match_node.set("first", "foo")
|
||||
match_node.set("second", "bar")
|
||||
match_node.set("percentage", "baz")
|
||||
group_node = ET.SubElement(root, "foobar") # invalid group
|
||||
group_node = ET.SubElement(root, "group") # empty group
|
||||
f = io.BytesIO()
|
||||
tree = ET.ElementTree(root)
|
||||
tree.write(f, encoding='utf-8')
|
||||
tree.write(f, encoding="utf-8")
|
||||
f.seek(0)
|
||||
app = DupeGuru()
|
||||
r = Results(app)
|
||||
@@ -586,16 +592,18 @@ class TestCaseResultsXML:
|
||||
|
||||
def test_xml_non_ascii(self):
|
||||
def get_file(path):
|
||||
if path == op.join('basepath', '\xe9foo bar'):
|
||||
if path == op.join("basepath", "\xe9foo bar"):
|
||||
return objects[0]
|
||||
if path == op.join('basepath', 'bar bleh'):
|
||||
if path == op.join("basepath", "bar bleh"):
|
||||
return objects[1]
|
||||
|
||||
objects = [NamedObject("\xe9foo bar", True), NamedObject("bar bleh", True)]
|
||||
matches = engine.getmatches(objects) #we should have 5 matches
|
||||
groups = engine.get_groups(matches) #We should have 2 groups
|
||||
matches = engine.getmatches(objects) # we should have 5 matches
|
||||
groups = engine.get_groups(matches) # We should have 2 groups
|
||||
for g in groups:
|
||||
g.prioritize(lambda x: objects.index(x)) #We want the dupes to be in the same order as the list is
|
||||
g.prioritize(
|
||||
lambda x: objects.index(x)
|
||||
) # We want the dupes to be in the same order as the list is
|
||||
app = DupeGuru()
|
||||
results = Results(app)
|
||||
results.groups = groups
|
||||
@@ -607,11 +615,11 @@ class TestCaseResultsXML:
|
||||
r.load_from_xml(f, get_file)
|
||||
g = r.groups[0]
|
||||
eq_("\xe9foo bar", g[0].name)
|
||||
eq_(['efoo', 'bar'], g[0].words)
|
||||
eq_(["efoo", "bar"], g[0].words)
|
||||
|
||||
def test_load_invalid_xml(self):
|
||||
f = io.BytesIO()
|
||||
f.write(b'<this is invalid')
|
||||
f.write(b"<this is invalid")
|
||||
f.seek(0)
|
||||
app = DupeGuru()
|
||||
r = Results(app)
|
||||
@@ -623,7 +631,7 @@ class TestCaseResultsXML:
|
||||
app = DupeGuru()
|
||||
r = Results(app)
|
||||
with raises(IOError):
|
||||
r.load_from_xml('does_not_exist.xml', None)
|
||||
r.load_from_xml("does_not_exist.xml", None)
|
||||
eq_(0, len(r.groups))
|
||||
|
||||
def test_remember_match_percentage(self):
|
||||
@@ -643,12 +651,12 @@ class TestCaseResultsXML:
|
||||
results.load_from_xml(f, self.get_file)
|
||||
group = results.groups[0]
|
||||
d1, d2, d3 = group
|
||||
match = group.get_match_of(d2) #d1 - d2
|
||||
match = group.get_match_of(d2) # d1 - d2
|
||||
eq_(42, match[2])
|
||||
match = group.get_match_of(d3) #d1 - d3
|
||||
match = group.get_match_of(d3) # d1 - d3
|
||||
eq_(43, match[2])
|
||||
group.switch_ref(d2)
|
||||
match = group.get_match_of(d3) #d2 - d3
|
||||
match = group.get_match_of(d3) # d2 - d3
|
||||
eq_(46, match[2])
|
||||
|
||||
def test_save_and_load(self):
|
||||
@@ -661,13 +669,13 @@ class TestCaseResultsXML:
|
||||
|
||||
def test_apply_filter_works_on_paths(self):
|
||||
# apply_filter() searches on the whole path, not just on the filename.
|
||||
self.results.apply_filter('basepath')
|
||||
self.results.apply_filter("basepath")
|
||||
eq_(len(self.results.groups), 2)
|
||||
|
||||
def test_save_xml_with_invalid_characters(self):
|
||||
# Don't crash when saving files that have invalid xml characters in their path
|
||||
self.objects[0].name = 'foo\x19'
|
||||
self.results.save_to_xml(io.BytesIO()) # don't crash
|
||||
self.objects[0].name = "foo\x19"
|
||||
self.results.save_to_xml(io.BytesIO()) # don't crash
|
||||
|
||||
|
||||
class TestCaseResultsFilter:
|
||||
@@ -676,7 +684,7 @@ class TestCaseResultsFilter:
|
||||
self.results = self.app.results
|
||||
self.objects, self.matches, self.groups = GetTestGroups()
|
||||
self.results.groups = self.groups
|
||||
self.results.apply_filter(r'foo')
|
||||
self.results.apply_filter(r"foo")
|
||||
|
||||
def test_groups(self):
|
||||
eq_(1, len(self.results.groups))
|
||||
@@ -694,7 +702,7 @@ class TestCaseResultsFilter:
|
||||
|
||||
def test_dupes_reconstructed_filtered(self):
|
||||
# make_ref resets self.__dupes to None. When it's reconstructed, we want it filtered
|
||||
dupe = self.results.dupes[0] #3rd object
|
||||
dupe = self.results.dupes[0] # 3rd object
|
||||
self.results.make_ref(dupe)
|
||||
eq_(1, len(self.results.dupes))
|
||||
assert self.results.dupes[0] is self.objects[0]
|
||||
@@ -702,23 +710,23 @@ class TestCaseResultsFilter:
|
||||
def test_include_ref_dupes_in_filter(self):
|
||||
# When only the ref of a group match the filter, include it in the group
|
||||
self.results.apply_filter(None)
|
||||
self.results.apply_filter(r'foo bar')
|
||||
self.results.apply_filter(r"foo bar")
|
||||
eq_(1, len(self.results.groups))
|
||||
eq_(0, len(self.results.dupes))
|
||||
|
||||
def test_filters_build_on_one_another(self):
|
||||
self.results.apply_filter(r'bar')
|
||||
self.results.apply_filter(r"bar")
|
||||
eq_(1, len(self.results.groups))
|
||||
eq_(0, len(self.results.dupes))
|
||||
|
||||
def test_stat_line(self):
|
||||
expected = '0 / 1 (0.00 B / 1.00 B) duplicates marked. filter: foo'
|
||||
expected = "0 / 1 (0.00 B / 1.00 B) duplicates marked. filter: foo"
|
||||
eq_(expected, self.results.stat_line)
|
||||
self.results.apply_filter(r'bar')
|
||||
expected = '0 / 0 (0.00 B / 0.00 B) duplicates marked. filter: foo --> bar'
|
||||
self.results.apply_filter(r"bar")
|
||||
expected = "0 / 0 (0.00 B / 0.00 B) duplicates marked. filter: foo --> bar"
|
||||
eq_(expected, self.results.stat_line)
|
||||
self.results.apply_filter(None)
|
||||
expected = '0 / 3 (0.00 B / 1.01 KB) duplicates marked.'
|
||||
expected = "0 / 3 (0.00 B / 1.01 KB) duplicates marked."
|
||||
eq_(expected, self.results.stat_line)
|
||||
|
||||
def test_mark_count_is_filtered_as_well(self):
|
||||
@@ -726,8 +734,8 @@ class TestCaseResultsFilter:
|
||||
# We don't want to perform mark_all() because we want the mark list to contain objects
|
||||
for dupe in self.results.dupes:
|
||||
self.results.mark(dupe)
|
||||
self.results.apply_filter(r'foo')
|
||||
expected = '1 / 1 (1.00 B / 1.00 B) duplicates marked. filter: foo'
|
||||
self.results.apply_filter(r"foo")
|
||||
expected = "1 / 1 (1.00 B / 1.00 B) duplicates marked. filter: foo"
|
||||
eq_(expected, self.results.stat_line)
|
||||
|
||||
def test_mark_all_only_affects_filtered_items(self):
|
||||
@@ -739,22 +747,22 @@ class TestCaseResultsFilter:
|
||||
|
||||
def test_sort_groups(self):
|
||||
self.results.apply_filter(None)
|
||||
self.results.make_ref(self.objects[1]) # to have the 1024 b obkect as ref
|
||||
self.results.make_ref(self.objects[1]) # to have the 1024 b obkect as ref
|
||||
g1, g2 = self.groups
|
||||
self.results.apply_filter('a') # Matches both group
|
||||
self.results.sort_groups('size')
|
||||
self.results.apply_filter("a") # Matches both group
|
||||
self.results.sort_groups("size")
|
||||
assert self.results.groups[0] is g2
|
||||
assert self.results.groups[1] is g1
|
||||
self.results.apply_filter(None)
|
||||
assert self.results.groups[0] is g2
|
||||
assert self.results.groups[1] is g1
|
||||
self.results.sort_groups('size', False)
|
||||
self.results.apply_filter('a')
|
||||
self.results.sort_groups("size", False)
|
||||
self.results.apply_filter("a")
|
||||
assert self.results.groups[1] is g2
|
||||
assert self.results.groups[0] is g1
|
||||
|
||||
def test_set_group(self):
|
||||
#We want the new group to be filtered
|
||||
# We want the new group to be filtered
|
||||
self.objects, self.matches, self.groups = GetTestGroups()
|
||||
self.results.groups = self.groups
|
||||
eq_(1, len(self.results.groups))
|
||||
@@ -764,12 +772,12 @@ class TestCaseResultsFilter:
|
||||
def get_file(path):
|
||||
return [f for f in self.objects if str(f.path) == path][0]
|
||||
|
||||
filename = str(tmpdir.join('dupeguru_results.xml'))
|
||||
self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
|
||||
filename = str(tmpdir.join("dupeguru_results.xml"))
|
||||
self.objects[4].name = "ibabtu 2" # we can't have 2 files with the same path
|
||||
self.results.save_to_xml(filename)
|
||||
app = DupeGuru()
|
||||
r = Results(app)
|
||||
r.apply_filter('foo')
|
||||
r.apply_filter("foo")
|
||||
r.load_from_xml(filename, get_file)
|
||||
eq_(2, len(r.groups))
|
||||
|
||||
@@ -778,7 +786,7 @@ class TestCaseResultsFilter:
|
||||
self.results.apply_filter(None)
|
||||
eq_(2, len(self.results.groups))
|
||||
eq_(2, len(self.results.dupes))
|
||||
self.results.apply_filter('ibabtu')
|
||||
self.results.apply_filter("ibabtu")
|
||||
self.results.remove_duplicates([self.results.dupes[0]])
|
||||
self.results.apply_filter(None)
|
||||
eq_(1, len(self.results.groups))
|
||||
@@ -786,7 +794,7 @@ class TestCaseResultsFilter:
|
||||
|
||||
def test_filter_is_case_insensitive(self):
|
||||
self.results.apply_filter(None)
|
||||
self.results.apply_filter('FOO')
|
||||
self.results.apply_filter("FOO")
|
||||
eq_(1, len(self.results.dupes))
|
||||
|
||||
def test_make_ref_on_filtered_out_doesnt_mess_stats(self):
|
||||
@@ -794,13 +802,15 @@ class TestCaseResultsFilter:
|
||||
# When calling make_ref on such a dupe, the total size and dupecount stats gets messed up
|
||||
# because they are *not* counted in the stats in the first place.
|
||||
g1, g2 = self.groups
|
||||
bar_bleh = g1[1] # The "bar bleh" dupe is filtered out
|
||||
bar_bleh = g1[1] # The "bar bleh" dupe is filtered out
|
||||
self.results.make_ref(bar_bleh)
|
||||
# Now the stats should display *2* markable dupes (instead of 1)
|
||||
expected = '0 / 2 (0.00 B / 2.00 B) duplicates marked. filter: foo'
|
||||
expected = "0 / 2 (0.00 B / 2.00 B) duplicates marked. filter: foo"
|
||||
eq_(expected, self.results.stat_line)
|
||||
self.results.apply_filter(None) # Now let's make sure our unfiltered results aren't fucked up
|
||||
expected = '0 / 3 (0.00 B / 3.00 B) duplicates marked.'
|
||||
self.results.apply_filter(
|
||||
None
|
||||
) # Now let's make sure our unfiltered results aren't fucked up
|
||||
expected = "0 / 3 (0.00 B / 3.00 B) duplicates marked."
|
||||
eq_(expected, self.results.stat_line)
|
||||
|
||||
|
||||
@@ -814,6 +824,5 @@ class TestCaseResultsRefFile:
|
||||
self.results.groups = self.groups
|
||||
|
||||
def test_stat_line(self):
|
||||
expected = '0 / 2 (0.00 B / 2.00 B) duplicates marked.'
|
||||
expected = "0 / 2 (0.00 B / 2.00 B) duplicates marked."
|
||||
eq_(expected, self.results.stat_line)
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ from ..ignore import IgnoreList
|
||||
from ..scanner import Scanner, ScanType
|
||||
from ..me.scanner import ScannerME
|
||||
|
||||
|
||||
class NamedObject:
|
||||
def __init__(self, name="foobar", size=1, path=None):
|
||||
if path is None:
|
||||
@@ -26,22 +27,25 @@ class NamedObject:
|
||||
self.words = getwords(name)
|
||||
|
||||
def __repr__(self):
|
||||
return '<NamedObject %r %r>' % (self.name, self.path)
|
||||
return "<NamedObject %r %r>" % (self.name, self.path)
|
||||
|
||||
|
||||
no = NamedObject
|
||||
|
||||
|
||||
def pytest_funcarg__fake_fileexists(request):
|
||||
# This is a hack to avoid invalidating all previous tests since the scanner started to test
|
||||
# for file existence before doing the match grouping.
|
||||
monkeypatch = request.getfuncargvalue('monkeypatch')
|
||||
monkeypatch.setattr(Path, 'exists', lambda _: True)
|
||||
monkeypatch = request.getfuncargvalue("monkeypatch")
|
||||
monkeypatch.setattr(Path, "exists", lambda _: True)
|
||||
|
||||
|
||||
def test_empty(fake_fileexists):
|
||||
s = Scanner()
|
||||
r = s.get_dupe_groups([])
|
||||
eq_(r, [])
|
||||
|
||||
|
||||
def test_default_settings(fake_fileexists):
|
||||
s = Scanner()
|
||||
eq_(s.min_match_percentage, 80)
|
||||
@@ -50,40 +54,54 @@ def test_default_settings(fake_fileexists):
|
||||
eq_(s.word_weighting, False)
|
||||
eq_(s.match_similar_words, False)
|
||||
|
||||
|
||||
def test_simple_with_default_settings(fake_fileexists):
|
||||
s = Scanner()
|
||||
f = [no('foo bar', path='p1'), no('foo bar', path='p2'), no('foo bleh')]
|
||||
f = [no("foo bar", path="p1"), no("foo bar", path="p2"), no("foo bleh")]
|
||||
r = s.get_dupe_groups(f)
|
||||
eq_(len(r), 1)
|
||||
g = r[0]
|
||||
#'foo bleh' cannot be in the group because the default min match % is 80
|
||||
# 'foo bleh' cannot be in the group because the default min match % is 80
|
||||
eq_(len(g), 2)
|
||||
assert g.ref in f[:2]
|
||||
assert g.dupes[0] in f[:2]
|
||||
|
||||
|
||||
def test_simple_with_lower_min_match(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.min_match_percentage = 50
|
||||
f = [no('foo bar', path='p1'), no('foo bar', path='p2'), no('foo bleh')]
|
||||
f = [no("foo bar", path="p1"), no("foo bar", path="p2"), no("foo bleh")]
|
||||
r = s.get_dupe_groups(f)
|
||||
eq_(len(r), 1)
|
||||
g = r[0]
|
||||
eq_(len(g), 3)
|
||||
|
||||
|
||||
def test_trim_all_ref_groups(fake_fileexists):
|
||||
# When all files of a group are ref, don't include that group in the results, but also don't
|
||||
# count the files from that group as discarded.
|
||||
s = Scanner()
|
||||
f = [no('foo', path='p1'), no('foo', path='p2'), no('bar', path='p1'), no('bar', path='p2')]
|
||||
f = [
|
||||
no("foo", path="p1"),
|
||||
no("foo", path="p2"),
|
||||
no("bar", path="p1"),
|
||||
no("bar", path="p2"),
|
||||
]
|
||||
f[2].is_ref = True
|
||||
f[3].is_ref = True
|
||||
r = s.get_dupe_groups(f)
|
||||
eq_(len(r), 1)
|
||||
eq_(s.discarded_file_count, 0)
|
||||
|
||||
|
||||
def test_priorize(fake_fileexists):
|
||||
s = Scanner()
|
||||
f = [no('foo', path='p1'), no('foo', path='p2'), no('bar', path='p1'), no('bar', path='p2')]
|
||||
f = [
|
||||
no("foo", path="p1"),
|
||||
no("foo", path="p2"),
|
||||
no("bar", path="p1"),
|
||||
no("bar", path="p2"),
|
||||
]
|
||||
f[1].size = 2
|
||||
f[2].size = 3
|
||||
f[3].is_ref = True
|
||||
@@ -94,17 +112,19 @@ def test_priorize(fake_fileexists):
|
||||
assert f[3] in (g1.ref, g2.ref)
|
||||
assert f[2] in (g1.dupes[0], g2.dupes[0])
|
||||
|
||||
|
||||
def test_content_scan(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Contents
|
||||
f = [no('foo'), no('bar'), no('bleh')]
|
||||
f[0].md5 = f[0].md5partial = 'foobar'
|
||||
f[1].md5 = f[1].md5partial = 'foobar'
|
||||
f[2].md5 = f[2].md5partial = 'bleh'
|
||||
f = [no("foo"), no("bar"), no("bleh")]
|
||||
f[0].md5 = f[0].md5partial = "foobar"
|
||||
f[1].md5 = f[1].md5partial = "foobar"
|
||||
f[2].md5 = f[2].md5partial = "bleh"
|
||||
r = s.get_dupe_groups(f)
|
||||
eq_(len(r), 1)
|
||||
eq_(len(r[0]), 2)
|
||||
eq_(s.discarded_file_count, 0) # don't count the different md5 as discarded!
|
||||
eq_(s.discarded_file_count, 0) # don't count the different md5 as discarded!
|
||||
|
||||
|
||||
def test_content_scan_compare_sizes_first(fake_fileexists):
|
||||
class MyFile(no):
|
||||
@@ -114,16 +134,17 @@ def test_content_scan_compare_sizes_first(fake_fileexists):
|
||||
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Contents
|
||||
f = [MyFile('foo', 1), MyFile('bar', 2)]
|
||||
f = [MyFile("foo", 1), MyFile("bar", 2)]
|
||||
eq_(len(s.get_dupe_groups(f)), 0)
|
||||
|
||||
|
||||
def test_min_match_perc_doesnt_matter_for_content_scan(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Contents
|
||||
f = [no('foo'), no('bar'), no('bleh')]
|
||||
f[0].md5 = f[0].md5partial = 'foobar'
|
||||
f[1].md5 = f[1].md5partial = 'foobar'
|
||||
f[2].md5 = f[2].md5partial = 'bleh'
|
||||
f = [no("foo"), no("bar"), no("bleh")]
|
||||
f[0].md5 = f[0].md5partial = "foobar"
|
||||
f[1].md5 = f[1].md5partial = "foobar"
|
||||
f[2].md5 = f[2].md5partial = "bleh"
|
||||
s.min_match_percentage = 101
|
||||
r = s.get_dupe_groups(f)
|
||||
eq_(len(r), 1)
|
||||
@@ -133,157 +154,180 @@ def test_min_match_perc_doesnt_matter_for_content_scan(fake_fileexists):
|
||||
eq_(len(r), 1)
|
||||
eq_(len(r[0]), 2)
|
||||
|
||||
|
||||
def test_content_scan_doesnt_put_md5_in_words_at_the_end(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Contents
|
||||
f = [no('foo'), no('bar')]
|
||||
f[0].md5 = f[0].md5partial = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
|
||||
f[1].md5 = f[1].md5partial = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
|
||||
f = [no("foo"), no("bar")]
|
||||
f[0].md5 = f[
|
||||
0
|
||||
].md5partial = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
|
||||
f[1].md5 = f[
|
||||
1
|
||||
].md5partial = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
|
||||
r = s.get_dupe_groups(f)
|
||||
r[0]
|
||||
|
||||
|
||||
def test_extension_is_not_counted_in_filename_scan(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.min_match_percentage = 100
|
||||
f = [no('foo.bar'), no('foo.bleh')]
|
||||
f = [no("foo.bar"), no("foo.bleh")]
|
||||
r = s.get_dupe_groups(f)
|
||||
eq_(len(r), 1)
|
||||
eq_(len(r[0]), 2)
|
||||
|
||||
|
||||
def test_job(fake_fileexists):
|
||||
def do_progress(progress, desc=''):
|
||||
def do_progress(progress, desc=""):
|
||||
log.append(progress)
|
||||
return True
|
||||
|
||||
s = Scanner()
|
||||
log = []
|
||||
f = [no('foo bar'), no('foo bar'), no('foo bleh')]
|
||||
f = [no("foo bar"), no("foo bar"), no("foo bleh")]
|
||||
s.get_dupe_groups(f, j=job.Job(1, do_progress))
|
||||
eq_(log[0], 0)
|
||||
eq_(log[-1], 100)
|
||||
|
||||
|
||||
def test_mix_file_kind(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.mix_file_kind = False
|
||||
f = [no('foo.1'), no('foo.2')]
|
||||
f = [no("foo.1"), no("foo.2")]
|
||||
r = s.get_dupe_groups(f)
|
||||
eq_(len(r), 0)
|
||||
|
||||
|
||||
def test_word_weighting(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.min_match_percentage = 75
|
||||
s.word_weighting = True
|
||||
f = [no('foo bar'), no('foo bar bleh')]
|
||||
f = [no("foo bar"), no("foo bar bleh")]
|
||||
r = s.get_dupe_groups(f)
|
||||
eq_(len(r), 1)
|
||||
g = r[0]
|
||||
m = g.get_match_of(g.dupes[0])
|
||||
eq_(m.percentage, 75) # 16 letters, 12 matching
|
||||
eq_(m.percentage, 75) # 16 letters, 12 matching
|
||||
|
||||
|
||||
def test_similar_words(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.match_similar_words = True
|
||||
f = [no('The White Stripes'), no('The Whites Stripe'), no('Limp Bizkit'), no('Limp Bizkitt')]
|
||||
f = [
|
||||
no("The White Stripes"),
|
||||
no("The Whites Stripe"),
|
||||
no("Limp Bizkit"),
|
||||
no("Limp Bizkitt"),
|
||||
]
|
||||
r = s.get_dupe_groups(f)
|
||||
eq_(len(r), 2)
|
||||
|
||||
|
||||
def test_fields(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Fields
|
||||
f = [no('The White Stripes - Little Ghost'), no('The White Stripes - Little Acorn')]
|
||||
f = [no("The White Stripes - Little Ghost"), no("The White Stripes - Little Acorn")]
|
||||
r = s.get_dupe_groups(f)
|
||||
eq_(len(r), 0)
|
||||
|
||||
|
||||
def test_fields_no_order(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.FieldsNoOrder
|
||||
f = [no('The White Stripes - Little Ghost'), no('Little Ghost - The White Stripes')]
|
||||
f = [no("The White Stripes - Little Ghost"), no("Little Ghost - The White Stripes")]
|
||||
r = s.get_dupe_groups(f)
|
||||
eq_(len(r), 1)
|
||||
|
||||
|
||||
def test_tag_scan(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Tag
|
||||
o1 = no('foo')
|
||||
o2 = no('bar')
|
||||
o1.artist = 'The White Stripes'
|
||||
o1.title = 'The Air Near My Fingers'
|
||||
o2.artist = 'The White Stripes'
|
||||
o2.title = 'The Air Near My Fingers'
|
||||
o1 = no("foo")
|
||||
o2 = no("bar")
|
||||
o1.artist = "The White Stripes"
|
||||
o1.title = "The Air Near My Fingers"
|
||||
o2.artist = "The White Stripes"
|
||||
o2.title = "The Air Near My Fingers"
|
||||
r = s.get_dupe_groups([o1, o2])
|
||||
eq_(len(r), 1)
|
||||
|
||||
|
||||
def test_tag_with_album_scan(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Tag
|
||||
s.scanned_tags = set(['artist', 'album', 'title'])
|
||||
o1 = no('foo')
|
||||
o2 = no('bar')
|
||||
o3 = no('bleh')
|
||||
o1.artist = 'The White Stripes'
|
||||
o1.title = 'The Air Near My Fingers'
|
||||
o1.album = 'Elephant'
|
||||
o2.artist = 'The White Stripes'
|
||||
o2.title = 'The Air Near My Fingers'
|
||||
o2.album = 'Elephant'
|
||||
o3.artist = 'The White Stripes'
|
||||
o3.title = 'The Air Near My Fingers'
|
||||
o3.album = 'foobar'
|
||||
s.scanned_tags = set(["artist", "album", "title"])
|
||||
o1 = no("foo")
|
||||
o2 = no("bar")
|
||||
o3 = no("bleh")
|
||||
o1.artist = "The White Stripes"
|
||||
o1.title = "The Air Near My Fingers"
|
||||
o1.album = "Elephant"
|
||||
o2.artist = "The White Stripes"
|
||||
o2.title = "The Air Near My Fingers"
|
||||
o2.album = "Elephant"
|
||||
o3.artist = "The White Stripes"
|
||||
o3.title = "The Air Near My Fingers"
|
||||
o3.album = "foobar"
|
||||
r = s.get_dupe_groups([o1, o2, o3])
|
||||
eq_(len(r), 1)
|
||||
|
||||
|
||||
def test_that_dash_in_tags_dont_create_new_fields(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Tag
|
||||
s.scanned_tags = set(['artist', 'album', 'title'])
|
||||
s.scanned_tags = set(["artist", "album", "title"])
|
||||
s.min_match_percentage = 50
|
||||
o1 = no('foo')
|
||||
o2 = no('bar')
|
||||
o1.artist = 'The White Stripes - a'
|
||||
o1.title = 'The Air Near My Fingers - a'
|
||||
o1.album = 'Elephant - a'
|
||||
o2.artist = 'The White Stripes - b'
|
||||
o2.title = 'The Air Near My Fingers - b'
|
||||
o2.album = 'Elephant - b'
|
||||
o1 = no("foo")
|
||||
o2 = no("bar")
|
||||
o1.artist = "The White Stripes - a"
|
||||
o1.title = "The Air Near My Fingers - a"
|
||||
o1.album = "Elephant - a"
|
||||
o2.artist = "The White Stripes - b"
|
||||
o2.title = "The Air Near My Fingers - b"
|
||||
o2.album = "Elephant - b"
|
||||
r = s.get_dupe_groups([o1, o2])
|
||||
eq_(len(r), 1)
|
||||
|
||||
|
||||
def test_tag_scan_with_different_scanned(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Tag
|
||||
s.scanned_tags = set(['track', 'year'])
|
||||
o1 = no('foo')
|
||||
o2 = no('bar')
|
||||
o1.artist = 'The White Stripes'
|
||||
o1.title = 'some title'
|
||||
o1.track = 'foo'
|
||||
o1.year = 'bar'
|
||||
o2.artist = 'The White Stripes'
|
||||
o2.title = 'another title'
|
||||
o2.track = 'foo'
|
||||
o2.year = 'bar'
|
||||
s.scanned_tags = set(["track", "year"])
|
||||
o1 = no("foo")
|
||||
o2 = no("bar")
|
||||
o1.artist = "The White Stripes"
|
||||
o1.title = "some title"
|
||||
o1.track = "foo"
|
||||
o1.year = "bar"
|
||||
o2.artist = "The White Stripes"
|
||||
o2.title = "another title"
|
||||
o2.track = "foo"
|
||||
o2.year = "bar"
|
||||
r = s.get_dupe_groups([o1, o2])
|
||||
eq_(len(r), 1)
|
||||
|
||||
|
||||
def test_tag_scan_only_scans_existing_tags(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Tag
|
||||
s.scanned_tags = set(['artist', 'foo'])
|
||||
o1 = no('foo')
|
||||
o2 = no('bar')
|
||||
o1.artist = 'The White Stripes'
|
||||
o1.foo = 'foo'
|
||||
o2.artist = 'The White Stripes'
|
||||
o2.foo = 'bar'
|
||||
s.scanned_tags = set(["artist", "foo"])
|
||||
o1 = no("foo")
|
||||
o2 = no("bar")
|
||||
o1.artist = "The White Stripes"
|
||||
o1.foo = "foo"
|
||||
o2.artist = "The White Stripes"
|
||||
o2.foo = "bar"
|
||||
r = s.get_dupe_groups([o1, o2])
|
||||
eq_(len(r), 1) # Because 'foo' is not scanned, they match
|
||||
eq_(len(r), 1) # Because 'foo' is not scanned, they match
|
||||
|
||||
|
||||
def test_tag_scan_converts_to_str(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Tag
|
||||
s.scanned_tags = set(['track'])
|
||||
o1 = no('foo')
|
||||
o2 = no('bar')
|
||||
s.scanned_tags = set(["track"])
|
||||
o1 = no("foo")
|
||||
o2 = no("bar")
|
||||
o1.track = 42
|
||||
o2.track = 42
|
||||
try:
|
||||
@@ -292,28 +336,30 @@ def test_tag_scan_converts_to_str(fake_fileexists):
|
||||
raise AssertionError()
|
||||
eq_(len(r), 1)
|
||||
|
||||
|
||||
def test_tag_scan_non_ascii(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Tag
|
||||
s.scanned_tags = set(['title'])
|
||||
o1 = no('foo')
|
||||
o2 = no('bar')
|
||||
o1.title = 'foobar\u00e9'
|
||||
o2.title = 'foobar\u00e9'
|
||||
s.scanned_tags = set(["title"])
|
||||
o1 = no("foo")
|
||||
o2 = no("bar")
|
||||
o1.title = "foobar\u00e9"
|
||||
o2.title = "foobar\u00e9"
|
||||
try:
|
||||
r = s.get_dupe_groups([o1, o2])
|
||||
except UnicodeEncodeError:
|
||||
raise AssertionError()
|
||||
eq_(len(r), 1)
|
||||
|
||||
|
||||
def test_ignore_list(fake_fileexists):
|
||||
s = Scanner()
|
||||
f1 = no('foobar')
|
||||
f2 = no('foobar')
|
||||
f3 = no('foobar')
|
||||
f1.path = Path('dir1/foobar')
|
||||
f2.path = Path('dir2/foobar')
|
||||
f3.path = Path('dir3/foobar')
|
||||
f1 = no("foobar")
|
||||
f2 = no("foobar")
|
||||
f3 = no("foobar")
|
||||
f1.path = Path("dir1/foobar")
|
||||
f2.path = Path("dir2/foobar")
|
||||
f3.path = Path("dir3/foobar")
|
||||
ignore_list = IgnoreList()
|
||||
ignore_list.Ignore(str(f1.path), str(f2.path))
|
||||
ignore_list.Ignore(str(f1.path), str(f3.path))
|
||||
@@ -327,16 +373,17 @@ def test_ignore_list(fake_fileexists):
|
||||
# Ignored matches are not counted as discarded
|
||||
eq_(s.discarded_file_count, 0)
|
||||
|
||||
|
||||
def test_ignore_list_checks_for_unicode(fake_fileexists):
|
||||
#scanner was calling path_str for ignore list checks. Since the Path changes, it must
|
||||
#be unicode(path)
|
||||
# scanner was calling path_str for ignore list checks. Since the Path changes, it must
|
||||
# be unicode(path)
|
||||
s = Scanner()
|
||||
f1 = no('foobar')
|
||||
f2 = no('foobar')
|
||||
f3 = no('foobar')
|
||||
f1.path = Path('foo1\u00e9')
|
||||
f2.path = Path('foo2\u00e9')
|
||||
f3.path = Path('foo3\u00e9')
|
||||
f1 = no("foobar")
|
||||
f2 = no("foobar")
|
||||
f3 = no("foobar")
|
||||
f1.path = Path("foo1\u00e9")
|
||||
f2.path = Path("foo2\u00e9")
|
||||
f3.path = Path("foo3\u00e9")
|
||||
ignore_list = IgnoreList()
|
||||
ignore_list.Ignore(str(f1.path), str(f2.path))
|
||||
ignore_list.Ignore(str(f1.path), str(f3.path))
|
||||
@@ -348,6 +395,7 @@ def test_ignore_list_checks_for_unicode(fake_fileexists):
|
||||
assert f2 in g
|
||||
assert f3 in g
|
||||
|
||||
|
||||
def test_file_evaluates_to_false(fake_fileexists):
|
||||
# A very wrong way to use any() was added at some point, causing resulting group list
|
||||
# to be empty.
|
||||
@@ -355,19 +403,19 @@ def test_file_evaluates_to_false(fake_fileexists):
|
||||
def __bool__(self):
|
||||
return False
|
||||
|
||||
|
||||
s = Scanner()
|
||||
f1 = FalseNamedObject('foobar', path='p1')
|
||||
f2 = FalseNamedObject('foobar', path='p2')
|
||||
f1 = FalseNamedObject("foobar", path="p1")
|
||||
f2 = FalseNamedObject("foobar", path="p2")
|
||||
r = s.get_dupe_groups([f1, f2])
|
||||
eq_(len(r), 1)
|
||||
|
||||
|
||||
def test_size_threshold(fake_fileexists):
|
||||
# Only file equal or higher than the size_threshold in size are scanned
|
||||
s = Scanner()
|
||||
f1 = no('foo', 1, path='p1')
|
||||
f2 = no('foo', 2, path='p2')
|
||||
f3 = no('foo', 3, path='p3')
|
||||
f1 = no("foo", 1, path="p1")
|
||||
f2 = no("foo", 2, path="p2")
|
||||
f3 = no("foo", 3, path="p3")
|
||||
s.size_threshold = 2
|
||||
groups = s.get_dupe_groups([f1, f2, f3])
|
||||
eq_(len(groups), 1)
|
||||
@@ -377,48 +425,52 @@ def test_size_threshold(fake_fileexists):
|
||||
assert f2 in group
|
||||
assert f3 in group
|
||||
|
||||
|
||||
def test_tie_breaker_path_deepness(fake_fileexists):
|
||||
# If there is a tie in prioritization, path deepness is used as a tie breaker
|
||||
s = Scanner()
|
||||
o1, o2 = no('foo'), no('foo')
|
||||
o1.path = Path('foo')
|
||||
o2.path = Path('foo/bar')
|
||||
o1, o2 = no("foo"), no("foo")
|
||||
o1.path = Path("foo")
|
||||
o2.path = Path("foo/bar")
|
||||
[group] = s.get_dupe_groups([o1, o2])
|
||||
assert group.ref is o2
|
||||
|
||||
|
||||
def test_tie_breaker_copy(fake_fileexists):
|
||||
# if copy is in the words used (even if it has a deeper path), it becomes a dupe
|
||||
s = Scanner()
|
||||
o1, o2 = no('foo bar Copy'), no('foo bar')
|
||||
o1.path = Path('deeper/path')
|
||||
o2.path = Path('foo')
|
||||
o1, o2 = no("foo bar Copy"), no("foo bar")
|
||||
o1.path = Path("deeper/path")
|
||||
o2.path = Path("foo")
|
||||
[group] = s.get_dupe_groups([o1, o2])
|
||||
assert group.ref is o2
|
||||
|
||||
|
||||
def test_tie_breaker_same_name_plus_digit(fake_fileexists):
|
||||
# if ref has the same words as dupe, but has some just one extra word which is a digit, it
|
||||
# becomes a dupe
|
||||
s = Scanner()
|
||||
o1 = no('foo bar 42')
|
||||
o2 = no('foo bar [42]')
|
||||
o3 = no('foo bar (42)')
|
||||
o4 = no('foo bar {42}')
|
||||
o5 = no('foo bar')
|
||||
o1 = no("foo bar 42")
|
||||
o2 = no("foo bar [42]")
|
||||
o3 = no("foo bar (42)")
|
||||
o4 = no("foo bar {42}")
|
||||
o5 = no("foo bar")
|
||||
# all numbered names have deeper paths, so they'll end up ref if the digits aren't correctly
|
||||
# used as tie breakers
|
||||
o1.path = Path('deeper/path')
|
||||
o2.path = Path('deeper/path')
|
||||
o3.path = Path('deeper/path')
|
||||
o4.path = Path('deeper/path')
|
||||
o5.path = Path('foo')
|
||||
o1.path = Path("deeper/path")
|
||||
o2.path = Path("deeper/path")
|
||||
o3.path = Path("deeper/path")
|
||||
o4.path = Path("deeper/path")
|
||||
o5.path = Path("foo")
|
||||
[group] = s.get_dupe_groups([o1, o2, o3, o4, o5])
|
||||
assert group.ref is o5
|
||||
|
||||
|
||||
def test_partial_group_match(fake_fileexists):
|
||||
# Count the number of discarded matches (when a file doesn't match all other dupes of the
|
||||
# group) in Scanner.discarded_file_count
|
||||
s = Scanner()
|
||||
o1, o2, o3 = no('a b'), no('a'), no('b')
|
||||
o1, o2, o3 = no("a b"), no("a"), no("b")
|
||||
s.min_match_percentage = 50
|
||||
[group] = s.get_dupe_groups([o1, o2, o3])
|
||||
eq_(len(group), 2)
|
||||
@@ -431,6 +483,7 @@ def test_partial_group_match(fake_fileexists):
|
||||
assert o3 in group
|
||||
eq_(s.discarded_file_count, 1)
|
||||
|
||||
|
||||
def test_dont_group_files_that_dont_exist(tmpdir):
|
||||
# when creating groups, check that files exist first. It's possible that these files have
|
||||
# been moved during the scan by the user.
|
||||
@@ -439,8 +492,8 @@ def test_dont_group_files_that_dont_exist(tmpdir):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Contents
|
||||
p = Path(str(tmpdir))
|
||||
p['file1'].open('w').write('foo')
|
||||
p['file2'].open('w').write('foo')
|
||||
p["file1"].open("w").write("foo")
|
||||
p["file2"].open("w").write("foo")
|
||||
file1, file2 = fs.get_files(p)
|
||||
|
||||
def getmatches(*args, **kw):
|
||||
@@ -451,6 +504,7 @@ def test_dont_group_files_that_dont_exist(tmpdir):
|
||||
|
||||
assert not s.get_dupe_groups([file1, file2])
|
||||
|
||||
|
||||
def test_folder_scan_exclude_subfolder_matches(fake_fileexists):
|
||||
# when doing a Folders scan type, don't include matches for folders whose parent folder already
|
||||
# match.
|
||||
@@ -458,31 +512,33 @@ def test_folder_scan_exclude_subfolder_matches(fake_fileexists):
|
||||
s.scan_type = ScanType.Folders
|
||||
topf1 = no("top folder 1", size=42)
|
||||
topf1.md5 = topf1.md5partial = b"some_md5_1"
|
||||
topf1.path = Path('/topf1')
|
||||
topf1.path = Path("/topf1")
|
||||
topf2 = no("top folder 2", size=42)
|
||||
topf2.md5 = topf2.md5partial = b"some_md5_1"
|
||||
topf2.path = Path('/topf2')
|
||||
topf2.path = Path("/topf2")
|
||||
subf1 = no("sub folder 1", size=41)
|
||||
subf1.md5 = subf1.md5partial = b"some_md5_2"
|
||||
subf1.path = Path('/topf1/sub')
|
||||
subf1.path = Path("/topf1/sub")
|
||||
subf2 = no("sub folder 2", size=41)
|
||||
subf2.md5 = subf2.md5partial = b"some_md5_2"
|
||||
subf2.path = Path('/topf2/sub')
|
||||
eq_(len(s.get_dupe_groups([topf1, topf2, subf1, subf2])), 1) # only top folders
|
||||
subf2.path = Path("/topf2/sub")
|
||||
eq_(len(s.get_dupe_groups([topf1, topf2, subf1, subf2])), 1) # only top folders
|
||||
# however, if another folder matches a subfolder, keep in in the matches
|
||||
otherf = no("other folder", size=41)
|
||||
otherf.md5 = otherf.md5partial = b"some_md5_2"
|
||||
otherf.path = Path('/otherfolder')
|
||||
otherf.path = Path("/otherfolder")
|
||||
eq_(len(s.get_dupe_groups([topf1, topf2, subf1, subf2, otherf])), 2)
|
||||
|
||||
|
||||
def test_ignore_files_with_same_path(fake_fileexists):
|
||||
# It's possible that the scanner is fed with two file instances pointing to the same path. One
|
||||
# of these files has to be ignored
|
||||
s = Scanner()
|
||||
f1 = no('foobar', path='path1/foobar')
|
||||
f2 = no('foobar', path='path1/foobar')
|
||||
f1 = no("foobar", path="path1/foobar")
|
||||
f2 = no("foobar", path="path1/foobar")
|
||||
eq_(s.get_dupe_groups([f1, f2]), [])
|
||||
|
||||
|
||||
def test_dont_count_ref_files_as_discarded(fake_fileexists):
|
||||
# To speed up the scan, we don't bother comparing contents of files that are both ref files.
|
||||
# However, this causes problems in "discarded" counting and we make sure here that we don't
|
||||
@@ -492,20 +548,20 @@ def test_dont_count_ref_files_as_discarded(fake_fileexists):
|
||||
o1 = no("foo", path="p1")
|
||||
o2 = no("foo", path="p2")
|
||||
o3 = no("foo", path="p3")
|
||||
o1.md5 = o1.md5partial = 'foobar'
|
||||
o2.md5 = o2.md5partial = 'foobar'
|
||||
o3.md5 = o3.md5partial = 'foobar'
|
||||
o1.md5 = o1.md5partial = "foobar"
|
||||
o2.md5 = o2.md5partial = "foobar"
|
||||
o3.md5 = o3.md5partial = "foobar"
|
||||
o1.is_ref = True
|
||||
o2.is_ref = True
|
||||
eq_(len(s.get_dupe_groups([o1, o2, o3])), 1)
|
||||
eq_(s.discarded_file_count, 0)
|
||||
|
||||
|
||||
def test_priorize_me(fake_fileexists):
|
||||
# in ScannerME, bitrate goes first (right after is_ref) in priorization
|
||||
s = ScannerME()
|
||||
o1, o2 = no('foo', path='p1'), no('foo', path='p2')
|
||||
o1, o2 = no("foo", path="p1"), no("foo", path="p2")
|
||||
o1.bitrate = 1
|
||||
o2.bitrate = 2
|
||||
[group] = s.get_dupe_groups([o1, o2])
|
||||
assert group.ref is o2
|
||||
|
||||
|
||||
26
core/util.py
26
core/util.py
@@ -8,35 +8,41 @@ import time
|
||||
|
||||
from hscommon.util import format_time_decimal
|
||||
|
||||
|
||||
def format_timestamp(t, delta):
|
||||
if delta:
|
||||
return format_time_decimal(t)
|
||||
else:
|
||||
if t > 0:
|
||||
return time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(t))
|
||||
return time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(t))
|
||||
else:
|
||||
return '---'
|
||||
return "---"
|
||||
|
||||
|
||||
def format_words(w):
|
||||
def do_format(w):
|
||||
if isinstance(w, list):
|
||||
return '(%s)' % ', '.join(do_format(item) for item in w)
|
||||
return "(%s)" % ", ".join(do_format(item) for item in w)
|
||||
else:
|
||||
return w.replace('\n', ' ')
|
||||
return w.replace("\n", " ")
|
||||
|
||||
return ", ".join(do_format(item) for item in w)
|
||||
|
||||
return ', '.join(do_format(item) for item in w)
|
||||
|
||||
def format_perc(p):
|
||||
return "%0.0f" % p
|
||||
|
||||
|
||||
def format_dupe_count(c):
|
||||
return str(c) if c else '---'
|
||||
return str(c) if c else "---"
|
||||
|
||||
|
||||
def cmp_value(dupe, attrname):
|
||||
value = getattr(dupe, attrname, '')
|
||||
value = getattr(dupe, attrname, "")
|
||||
return value.lower() if isinstance(value, str) else value
|
||||
|
||||
def fix_surrogate_encoding(s, encoding='utf-8'):
|
||||
|
||||
def fix_surrogate_encoding(s, encoding="utf-8"):
|
||||
# ref #210. It's possible to end up with file paths that, while correct unicode strings, are
|
||||
# decoded with the 'surrogateescape' option, which make the string unencodable to utf-8. We fix
|
||||
# these strings here by trying to encode them and, if it fails, we do an encode/decode dance
|
||||
@@ -49,8 +55,6 @@ def fix_surrogate_encoding(s, encoding='utf-8'):
|
||||
try:
|
||||
s.encode(encoding)
|
||||
except UnicodeEncodeError:
|
||||
return s.encode(encoding, 'replace').decode(encoding)
|
||||
return s.encode(encoding, "replace").decode(encoding)
|
||||
else:
|
||||
return s
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user