Integrated the jobprogress library into hscommon

I have a fix to make in it and it's really silly to pretend that this
lib is of any use to anybody outside HS apps. Bringing it back here will
make things more simple.
This commit is contained in:
Virgil Dupras 2014-10-05 16:31:16 -04:00
parent 87c2fa2573
commit ac32305532
21 changed files with 775 additions and 487 deletions

View File

@ -1,9 +1,9 @@
# Created By: Virgil Dupras
# Created On: 2009-12-30
# Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/bsd_license
import sys
@ -110,8 +110,9 @@ def build_cocoa(edition, dev):
'me': ['core_me'] + appscript_pkgs + ['hsaudiotag'],
'pe': ['core_pe'] + appscript_pkgs,
}[edition]
tocopy = ['core', 'hscommon', 'cocoa/inter', 'cocoalib/cocoa', 'jobprogress', 'objp',
'send2trash'] + specific_packages
tocopy = [
'core', 'hscommon', 'cocoa/inter', 'cocoalib/cocoa', 'objp', 'send2trash'
] + specific_packages
copy_packages(tocopy, pydep_folder, create_links=dev)
sys.path.insert(0, 'build')
extra_deps = None

View File

@ -2,8 +2,8 @@
# Created On: 2007-10-06
# Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/bsd_license
import logging
@ -26,7 +26,7 @@ def autoreleasepool(func):
def as_fetch(as_list, as_type, step_size=1000):
"""When fetching items from a very big list through applescript, the connection with the app
will timeout. This function is to circumvent that. 'as_type' is the type of the items in the
will timeout. This function is to circumvent that. 'as_type' is the type of the items in the
list (found in appscript.k). If we don't pass it to the 'each' arg of 'count()', it doesn't work.
applescript is rather stupid..."""
result = []
@ -66,7 +66,7 @@ def extract_tb_noline(tb):
def safe_format_exception(type, value, tb):
"""Format exception from type, value and tb and fallback if there's a problem.
In some cases in threaded exceptions under Cocoa, I get tracebacks targeting pyc files instead
of py files, which results in traceback.format_exception() trying to print lines from pyc files
and then crashing when trying to interpret that binary data as utf-8. We want a fallback in
@ -113,5 +113,6 @@ def patch_threaded_job_performer():
# _async_run, under cocoa, has to be run within an autorelease pool to prevent leaks.
# You only need this patch is you use one of CocoaProxy's function (which allocate objc
# structures) inside a threaded job.
from jobprogress.performer import ThreadedJobPerformer
from hscommon.jobprogress.performer import ThreadedJobPerformer
ThreadedJobPerformer._async_run = autoreleasepool(ThreadedJobPerformer._async_run)

View File

@ -1,9 +1,9 @@
# Created By: Virgil Dupras
# Created On: 2006/11/11
# Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/bsd_license
import os
@ -15,7 +15,7 @@ import time
import shutil
from send2trash import send2trash
from jobprogress import job
from hscommon.jobprogress import job
from hscommon.notify import Broadcaster
from hscommon.path import Path
from hscommon.conflict import smart_move, smart_copy
@ -78,7 +78,7 @@ def format_words(w):
return '(%s)' % ', '.join(do_format(item) for item in w)
else:
return w.replace('\n', ' ')
return ', '.join(do_format(item) for item in w)
def format_perc(p):
@ -110,33 +110,33 @@ def fix_surrogate_encoding(s, encoding='utf-8'):
class DupeGuru(Broadcaster):
"""Holds everything together.
Instantiated once per running application, it holds a reference to every high-level object
whose reference needs to be held: :class:`~core.results.Results`, :class:`Scanner`,
:class:`~core.directories.Directories`, :mod:`core.gui` instances, etc..
It also hosts high level methods and acts as a coordinator for all those elements. This is why
some of its methods seem a bit shallow, like for example :meth:`mark_all` and
:meth:`remove_duplicates`. These methos are just proxies for a method in :attr:`results`, but
they are also followed by a notification call which is very important if we want GUI elements
to be correctly notified of a change in the data they're presenting.
.. attribute:: directories
Instance of :class:`~core.directories.Directories`. It holds the current folder selection.
.. attribute:: results
Instance of :class:`core.results.Results`. Holds the results of the latest scan.
.. attribute:: selected_dupes
List of currently selected dupes from our :attr:`results`. Whenever the user changes its
selection at the UI level, :attr:`result_table` takes care of updating this attribute, so
you can trust that it's always up-to-date.
.. attribute:: result_table
Instance of :mod:`meta-gui <core.gui>` table listing the results from :attr:`results`
"""
#--- View interface
@ -154,7 +154,7 @@ class DupeGuru(Broadcaster):
PROMPT_NAME = "dupeGuru"
SCANNER_CLASS = scanner.Scanner
def __init__(self, view):
if view.get_default(DEBUG_MODE_PREFERENCE):
logging.getLogger().setLevel(logging.DEBUG)
@ -185,14 +185,14 @@ class DupeGuru(Broadcaster):
children = [self.result_table, self.directory_tree, self.stats_label, self.details_panel]
for child in children:
child.connect()
#--- Virtual
def _prioritization_categories(self):
raise NotImplementedError()
def _create_result_table(self):
raise NotImplementedError()
#--- Private
def _get_dupe_sort_key(self, dupe, get_group, key, delta):
if key == 'marked':
@ -212,7 +212,7 @@ class DupeGuru(Broadcaster):
same = cmp_value(dupe, key) == refval
result = (same, result)
return result
def _get_group_sort_key(self, group, key):
if key == 'percentage':
return group.percentage
@ -221,15 +221,15 @@ class DupeGuru(Broadcaster):
if key == 'marked':
return len([dupe for dupe in group.dupes if self.results.is_marked(dupe)])
return cmp_value(group.ref, key)
def _do_delete(self, j, link_deleted, use_hardlinks, direct_deletion):
def op(dupe):
j.add_progress()
return self._do_delete_dupe(dupe, link_deleted, use_hardlinks, direct_deletion)
j.start_job(self.results.mark_count)
self.results.perform_on_marked(op, True)
def _do_delete_dupe(self, dupe, link_deleted, use_hardlinks, direct_deletion):
if not dupe.path.exists():
return
@ -248,11 +248,11 @@ class DupeGuru(Broadcaster):
linkfunc = os.link if use_hardlinks else os.symlink
linkfunc(str(ref.path), str_path)
self.clean_empty_dirs(dupe.path.parent())
def _create_file(self, path):
# We add fs.Folder to fileclasses in case the file we're loading contains folder paths.
return fs.get_file(path, self.directories.fileclasses + [fs.Folder])
def _get_file(self, str_path):
path = Path(str_path)
f = self._create_file(path)
@ -263,7 +263,7 @@ class DupeGuru(Broadcaster):
return f
except EnvironmentError:
return None
def _get_export_data(self):
columns = [col for col in self.result_table.columns.ordered_columns
if col.visible and col.name != 'marked']
@ -276,20 +276,20 @@ class DupeGuru(Broadcaster):
row.insert(0, group_id)
rows.append(row)
return colnames, rows
def _results_changed(self):
self.selected_dupes = [d for d in self.selected_dupes
if self.results.get_group_of_duplicate(d) is not None]
self.notify('results_changed')
def _start_job(self, jobid, func, args=()):
title = JOBID2TITLE[jobid]
try:
self.progress_window.run(jobid, title, func, args=args)
self.progress_window.run(jobid, title, func, args=args)
except job.JobInProgressError:
msg = tr("A previous action is still hanging in there. You can't start a new one yet. Wait a few seconds, then try again.")
self.view.show_message(msg)
def _job_completed(self, jobid):
if jobid == JobType.Scan:
self._results_changed()
@ -312,7 +312,7 @@ class DupeGuru(Broadcaster):
JobType.Delete: tr("All marked files were successfully sent to Trash."),
}[jobid]
self.view.show_message(msg)
@staticmethod
def _remove_hardlink_dupes(files):
seen_inodes = set()
@ -327,19 +327,19 @@ class DupeGuru(Broadcaster):
seen_inodes.add(inode)
result.append(file)
return result
def _select_dupes(self, dupes):
if dupes == self.selected_dupes:
return
self.selected_dupes = dupes
self.notify('dupes_selected')
#--- Public
def add_directory(self, d):
"""Adds folder ``d`` to :attr:`directories`.
Shows an error message dialog if something bad happens.
:param str d: path of folder to add
"""
try:
@ -349,7 +349,7 @@ class DupeGuru(Broadcaster):
self.view.show_message(tr("'{}' already is in the list.").format(d))
except directories.InvalidPathError:
self.view.show_message(tr("'{}' does not exist.").format(d))
def add_selected_to_ignore_list(self):
"""Adds :attr:`selected_dupes` to :attr:`scanner`'s ignore list.
"""
@ -367,10 +367,10 @@ class DupeGuru(Broadcaster):
self.scanner.ignore_list.Ignore(str(other.path), str(dupe.path))
self.remove_duplicates(dupes)
self.ignore_list_dialog.refresh()
def apply_filter(self, filter):
"""Apply a filter ``filter`` to the results so that it shows only dupe groups that match it.
:param str filter: filter to apply
"""
self.results.apply_filter(None)
@ -379,12 +379,12 @@ class DupeGuru(Broadcaster):
filter = escape(filter, '*', '.')
self.results.apply_filter(filter)
self._results_changed()
def clean_empty_dirs(self, path):
if self.options['clean_empty_dirs']:
while delete_if_empty(path, ['.DS_Store']):
path = path.parent()
def copy_or_move(self, dupe, copy: bool, destination: str, dest_type: DestType):
source_path = dupe.path
location_path = first(p for p in self.directories if dupe.path in p)
@ -406,20 +406,20 @@ class DupeGuru(Broadcaster):
else:
smart_move(source_path, dest_path)
self.clean_empty_dirs(source_path.parent())
def copy_or_move_marked(self, copy):
"""Start an async move (or copy) job on marked duplicates.
:param bool copy: If True, duplicates will be copied instead of moved
"""
def do(j):
def op(dupe):
j.add_progress()
self.copy_or_move(dupe, copy, destination, desttype)
j.start_job(self.results.mark_count)
self.results.perform_on_marked(op, not copy)
if not self.results.mark_count:
self.view.show_message(MSG_NO_MARKED_DUPES)
return
@ -430,7 +430,7 @@ class DupeGuru(Broadcaster):
desttype = self.options['copymove_dest_type']
jobid = JobType.Copy if copy else JobType.Move
self._start_job(jobid, do)
def delete_marked(self):
"""Start an async job to send marked duplicates to the trash.
"""
@ -443,10 +443,10 @@ class DupeGuru(Broadcaster):
self.deletion_options.direct]
logging.debug("Starting deletion job with args %r", args)
self._start_job(JobType.Delete, self._do_delete, args=args)
def export_to_xhtml(self):
"""Export current results to XHTML.
The configuration of the :attr:`result_table` (columns order and visibility) is used to
determine how the data is presented in the export. In other words, the exported table in
the resulting XHTML will look just like the results table.
@ -454,10 +454,10 @@ class DupeGuru(Broadcaster):
colnames, rows = self._get_export_data()
export_path = export.export_to_xhtml(colnames, rows)
desktop.open_path(export_path)
def export_to_csv(self):
"""Export current results to CSV.
The columns and their order in the resulting CSV file is determined in the same way as in
:meth:`export_to_xhtml`.
"""
@ -465,7 +465,7 @@ class DupeGuru(Broadcaster):
if dest_file:
colnames, rows = self._get_export_data()
export.export_to_csv(dest_file, colnames, rows)
def get_display_info(self, dupe, group, delta=False):
def empty_data():
return {c.name: '---' for c in self.result_table.COLUMNS[1:]}
@ -476,10 +476,10 @@ class DupeGuru(Broadcaster):
except Exception as e:
logging.warning("Exception on GetDisplayInfo for %s: %s", str(dupe.path), str(e))
return empty_data()
def invoke_custom_command(self):
"""Calls command in ``CustomCommand`` pref with ``%d`` and ``%r`` placeholders replaced.
Using the current selection, ``%d`` is replaced with the currently selected dupe and ``%r``
is replaced with that dupe's ref file. If there's no selection, the command is not invoked.
If the dupe is a ref, ``%d`` and ``%r`` will be the same.
@ -506,10 +506,10 @@ class DupeGuru(Broadcaster):
subprocess.Popen(exename + args, shell=True, cwd=path)
else:
subprocess.Popen(cmd, shell=True)
def load(self):
"""Load directory selection and ignore list from files in appdata.
This method is called during startup so that directory selection and ignore list, which
is persistent data, is the same as when the last session was closed (when :meth:`save` was
called).
@ -519,19 +519,19 @@ class DupeGuru(Broadcaster):
p = op.join(self.appdata, 'ignore_list.xml')
self.scanner.ignore_list.load_from_xml(p)
self.ignore_list_dialog.refresh()
def load_from(self, filename):
"""Start an async job to load results from ``filename``.
:param str filename: path of the XML file (created with :meth:`save_as`) to load
"""
def do(j):
self.results.load_from_xml(filename, self._get_file, j)
self._start_job(JobType.Load, do)
def make_selected_reference(self):
"""Promote :attr:`selected_dupes` to reference position within their respective groups.
Each selected dupe will become the :attr:`~core.engine.Group.ref` of its group. If there's
more than one dupe selected for the same group, only the first (in the order currently shown
in :attr:`result_table`) dupe will be promoted.
@ -560,28 +560,28 @@ class DupeGuru(Broadcaster):
# do is to keep our selection index-wise (different dupe selection, but same index
# selection).
self.notify('results_changed_but_keep_selection')
def mark_all(self):
"""Set all dupes in the results as marked.
"""
self.results.mark_all()
self.notify('marking_changed')
def mark_none(self):
"""Set all dupes in the results as unmarked.
"""
self.results.mark_none()
self.notify('marking_changed')
def mark_invert(self):
"""Invert the marked state of all dupes in the results.
"""
self.results.mark_invert()
self.notify('marking_changed')
def mark_dupe(self, dupe, marked):
"""Change marked status of ``dupe``.
:param dupe: dupe to mark/unmark
:type dupe: :class:`~core.fs.File`
:param bool marked: True = mark, False = unmark
@ -591,7 +591,7 @@ class DupeGuru(Broadcaster):
else:
self.results.unmark(dupe)
self.notify('marking_changed')
def open_selected(self):
"""Open :attr:`selected_dupes` with their associated application.
"""
@ -600,16 +600,16 @@ class DupeGuru(Broadcaster):
return
for dupe in self.selected_dupes:
desktop.open_path(dupe.path)
def purge_ignore_list(self):
"""Remove files that don't exist from :attr:`ignore_list`.
"""
self.scanner.ignore_list.Filter(lambda f,s:op.exists(f) and op.exists(s))
self.ignore_list_dialog.refresh()
def remove_directories(self, indexes):
"""Remove root directories at ``indexes`` from :attr:`directories`.
:param indexes: Indexes of the directories to remove.
:type indexes: list of int
"""
@ -620,30 +620,30 @@ class DupeGuru(Broadcaster):
self.notify('directories_changed')
except IndexError:
pass
def remove_duplicates(self, duplicates):
"""Remove ``duplicates`` from :attr:`results`.
Calls :meth:`~core.results.Results.remove_duplicates` and send appropriate notifications.
:param duplicates: duplicates to remove.
:type duplicates: list of :class:`~core.fs.File`
"""
self.results.remove_duplicates(self.without_ref(duplicates))
self.notify('results_changed_but_keep_selection')
def remove_marked(self):
"""Removed marked duplicates from the results (without touching the files themselves).
"""
if not self.results.mark_count:
self.view.show_message(MSG_NO_MARKED_DUPES)
return
msg = tr("You are about to remove %d files from results. Continue?")
msg = tr("You are about to remove %d files from results. Continue?")
if not self.view.ask_yes_no(msg % self.results.mark_count):
return
self.results.perform_on_marked(lambda x:None, True)
self._results_changed()
def remove_selected(self):
"""Removed :attr:`selected_dupes` from the results (without touching the files themselves).
"""
@ -651,16 +651,16 @@ class DupeGuru(Broadcaster):
if not dupes:
self.view.show_message(MSG_NO_SELECTED_DUPES)
return
msg = tr("You are about to remove %d files from results. Continue?")
msg = tr("You are about to remove %d files from results. Continue?")
if not self.view.ask_yes_no(msg % len(dupes)):
return
self.remove_duplicates(dupes)
def rename_selected(self, newname):
"""Renames the selected dupes's file to ``newname``.
If there's more than one selected dupes, the first one is used.
:param str newname: The filename to rename the dupe's file to.
"""
try:
@ -670,13 +670,13 @@ class DupeGuru(Broadcaster):
except (IndexError, fs.FSError) as e:
logging.warning("dupeGuru Warning: %s" % str(e))
return False
def reprioritize_groups(self, sort_key):
"""Sort dupes in each group (in :attr:`results`) according to ``sort_key``.
Called by the re-prioritize dialog. Calls :meth:`~core.engine.Group.prioritize` and, once
the sorting is done, show a message that confirms the action.
:param sort_key: The key being sent to :meth:`~core.engine.Group.prioritize`
:type sort_key: f(dupe)
"""
@ -687,11 +687,11 @@ class DupeGuru(Broadcaster):
self._results_changed()
msg = tr("{} duplicate groups were changed by the re-prioritization.").format(count)
self.view.show_message(msg)
def reveal_selected(self):
if self.selected_dupes:
desktop.reveal_path(self.selected_dupes[0].path)
def save(self):
if not op.exists(self.appdata):
os.makedirs(self.appdata)
@ -699,17 +699,17 @@ class DupeGuru(Broadcaster):
p = op.join(self.appdata, 'ignore_list.xml')
self.scanner.ignore_list.save_to_xml(p)
self.notify('save_session')
def save_as(self, filename):
"""Save results in ``filename``.
:param str filename: path of the file to save results (as XML) to.
"""
self.results.save_to_xml(filename)
def start_scanning(self):
"""Starts an async job to scan for duplicates.
Scans folders selected in :attr:`directories` and put the results in :attr:`results`
"""
def do(j):
@ -722,14 +722,14 @@ class DupeGuru(Broadcaster):
files = self._remove_hardlink_dupes(files)
logging.info('Scanning %d files' % len(files))
self.results.groups = self.scanner.get_dupe_groups(files, j)
if not self.directories.has_any_file():
self.view.show_message(tr("The selected directories contain no scannable file."))
return
self.results.groups = []
self._results_changed()
self._start_job(JobType.Scan, do)
def toggle_selected_mark_state(self):
selected = self.without_ref(self.selected_dupes)
if not selected:
@ -741,12 +741,12 @@ class DupeGuru(Broadcaster):
for dupe in selected:
markfunc(dupe)
self.notify('marking_changed')
def without_ref(self, dupes):
"""Returns ``dupes`` with all reference elements removed.
"""
return [dupe for dupe in dupes if self.results.get_group_of_duplicate(dupe).ref is not dupe]
def get_default(self, key, fallback_value=None):
result = nonone(self.view.get_default(key), fallback_value)
if fallback_value is not None and not isinstance(result, type(fallback_value)):
@ -756,10 +756,10 @@ class DupeGuru(Broadcaster):
except Exception:
result = fallback_value
return result
def set_default(self, key, value):
self.view.set_default(key, value)
#--- Properties
@property
def stat_line(self):
@ -767,4 +767,4 @@ class DupeGuru(Broadcaster):
if self.scanner.discarded_file_count:
result = tr("%s (%d discarded)") % (result, self.scanner.discarded_file_count)
return result

View File

@ -1,15 +1,15 @@
# Created By: Virgil Dupras
# Created On: 2006/02/27
# Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/bsd_license
from xml.etree import ElementTree as ET
import logging
from jobprogress import job
from hscommon.jobprogress import job
from hscommon.path import Path
from hscommon.util import FileOrPath
@ -24,7 +24,7 @@ __all__ = [
class DirectoryState:
"""Enum describing how a folder should be considered.
* DirectoryState.Normal: Scan all files normally
* DirectoryState.Reference: Scan files, but make sure never to delete any of them
* DirectoryState.Excluded: Don't scan this folder
@ -41,10 +41,10 @@ class InvalidPathError(Exception):
class Directories:
"""Holds user folder selection.
Manages the selection that the user make through the folder selection dialog. It also manages
folder states, and how recursion applies to them.
Then, when the user starts the scan, :meth:`get_files` is called to retrieve all files (wrapped
in :mod:`core.fs`) that have to be scanned according to the chosen folders/states.
"""
@ -55,28 +55,28 @@ class Directories:
self.states = {}
self.fileclasses = fileclasses
self.folderclass = fs.Folder
def __contains__(self, path):
for p in self._dirs:
if path in p:
return True
return False
def __delitem__(self,key):
self._dirs.__delitem__(key)
def __getitem__(self,key):
return self._dirs.__getitem__(key)
def __len__(self):
return len(self._dirs)
#---Private
def _default_state_for_path(self, path):
# Override this in subclasses to specify the state of some special folders.
if path.name.startswith('.'): # hidden
return DirectoryState.Excluded
def _get_files(self, from_path, j):
j.check_if_cancelled()
state = self.get_state(from_path)
@ -102,7 +102,7 @@ class Directories:
yield file
except (EnvironmentError, fs.InvalidPath):
pass
def _get_folders(self, from_folder, j):
j.check_if_cancelled()
try:
@ -116,16 +116,16 @@ class Directories:
yield from_folder
except (EnvironmentError, fs.InvalidPath):
pass
#---Public
def add_path(self, path):
"""Adds ``path`` to self, if not already there.
Raises :exc:`AlreadyThereError` if ``path`` is already in self. If path is a directory
containing some of the directories already present in self, ``path`` will be added, but all
directories under it will be removed. Can also raise :exc:`InvalidPathError` if ``path``
does not exist.
:param Path path: path to add
"""
if path in self:
@ -134,11 +134,11 @@ class Directories:
raise InvalidPathError()
self._dirs = [p for p in self._dirs if p not in path]
self._dirs.append(path)
@staticmethod
def get_subfolders(path):
"""Returns a sorted list of paths corresponding to subfolders in ``path``.
:param Path path: get subfolders from there
:rtype: list of Path
"""
@ -148,29 +148,29 @@ class Directories:
return subpaths
except EnvironmentError:
return []
def get_files(self, j=job.nulljob):
"""Returns a list of all files that are not excluded.
Returned files also have their ``is_ref`` attr set if applicable.
"""
for path in self._dirs:
for file in self._get_files(path, j):
yield file
def get_folders(self, j=job.nulljob):
"""Returns a list of all folders that are not excluded.
Returned folders also have their ``is_ref`` attr set if applicable.
"""
for path in self._dirs:
from_folder = self.folderclass(path)
for folder in self._get_folders(from_folder, j):
yield folder
def get_state(self, path):
"""Returns the state of ``path``.
:rtype: :class:`DirectoryState`
"""
if path in self.states:
@ -183,12 +183,12 @@ class Directories:
return self.get_state(parent)
else:
return DirectoryState.Normal
def has_any_file(self):
"""Returns whether selected folders contain any file.
Because it stops at the first file it finds, it's much faster than get_files().
:rtype: bool
"""
try:
@ -196,10 +196,10 @@ class Directories:
return True
except StopIteration:
return False
def load_from_file(self, infile):
"""Load folder selection from ``infile``.
:param file infile: path or file pointer to XML generated through :meth:`save_to_file`
"""
try:
@ -222,10 +222,10 @@ class Directories:
path = attrib['path']
state = attrib['value']
self.states[Path(path)] = int(state)
def save_to_file(self, outfile):
"""Save folder selection as XML to ``outfile``.
:param file outfile: path or file pointer to XML file to save to.
"""
with FileOrPath(outfile, 'wb') as fp:
@ -239,10 +239,10 @@ class Directories:
state_node.set('value', str(state))
tree = ET.ElementTree(root)
tree.write(fp, encoding='utf-8')
def set_state(self, path, state):
"""Set the state of folder at ``path``.
:param Path path: path of the target folder
:param state: state to set folder to
:type state: :class:`DirectoryState`
@ -253,4 +253,4 @@ class Directories:
if path.is_parent_of(iter_path):
del self.states[iter_path]
self.states[path] = state

View File

@ -1,9 +1,9 @@
# Created By: Virgil Dupras
# Created On: 2006/01/29
# Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/bsd_license
import difflib
@ -15,7 +15,7 @@ from unicodedata import normalize
from hscommon.util import flatten, multi_replace
from hscommon.trans import tr
from jobprogress import job
from hscommon.jobprogress import job
(WEIGHT_WORDS,
MATCH_SIMILAR_WORDS,
@ -45,7 +45,7 @@ def unpack_fields(fields):
def compare(first, second, flags=()):
"""Returns the % of words that match between ``first`` and ``second``
The result is a ``int`` in the range 0..100.
``first`` and ``second`` can be either a string or a list (of words).
"""
@ -53,7 +53,7 @@ def compare(first, second, flags=()):
return 0
if any(isinstance(element, list) for element in first):
return compare_fields(first, second, flags)
second = second[:] #We must use a copy of second because we remove items from it
second = second[:] #We must use a copy of second because we remove items from it
match_similar = MATCH_SIMILAR_WORDS in flags
weight_words = WEIGHT_WORDS in flags
joined = first + second
@ -77,9 +77,9 @@ def compare(first, second, flags=()):
def compare_fields(first, second, flags=()):
"""Returns the score for the lowest matching :ref:`fields`.
``first`` and ``second`` must be lists of lists of string. Each sub-list is then compared with
:func:`compare`.
:func:`compare`.
"""
if len(first) != len(second):
return 0
@ -104,10 +104,10 @@ def compare_fields(first, second, flags=()):
def build_word_dict(objects, j=job.nulljob):
"""Returns a dict of objects mapped by their words.
objects must have a ``words`` attribute being a list of strings or a list of lists of strings
(:ref:`fields`).
The result will be a dict with words as keys, lists of objects as values.
"""
result = defaultdict(set)
@ -118,7 +118,7 @@ def build_word_dict(objects, j=job.nulljob):
def merge_similar_words(word_dict):
"""Take all keys in ``word_dict`` that are similar, and merge them together.
``word_dict`` has been built with :func:`build_word_dict`. Similarity is computed with Python's
``difflib.get_close_matches()``, which computes the number of edits that are necessary to make
a word equal to the other.
@ -138,9 +138,9 @@ def merge_similar_words(word_dict):
def reduce_common_words(word_dict, threshold):
"""Remove all objects from ``word_dict`` values where the object count >= ``threshold``
``word_dict`` has been built with :func:`build_word_dict`.
The exception to this removal are the objects where all the words of the object are common.
Because if we remove them, we will miss some duplicates!
"""
@ -181,17 +181,17 @@ class Match(namedtuple('Match', 'first second percentage')):
exact scan methods, such as Contents scans, this will always be 100.
"""
__slots__ = ()
def get_match(first, second, flags=()):
#it is assumed here that first and second both have a "words" attribute
percentage = compare(first.words, second.words, flags)
return Match(first, second, percentage)
def getmatches(
objects, min_match_percentage=0, match_similar_words=False, weight_words=False,
objects, min_match_percentage=0, match_similar_words=False, weight_words=False,
no_field_order=False, j=job.nulljob):
"""Returns a list of :class:`Match` within ``objects`` after fuzzily matching their words.
:param objects: List of :class:`~core.fs.File` to match.
:param int min_match_percentage: minimum % of words that have to match.
:param bool match_similar_words: make similar words (see :func:`merge_similar_words`) match.
@ -246,7 +246,7 @@ def getmatches(
def getmatches_by_contents(files, sizeattr='size', partial=False, j=job.nulljob):
"""Returns a list of :class:`Match` within ``files`` if their contents is the same.
:param str sizeattr: attibute name of the :class:`~core.fs.file` that returns the size of the
file to use for comparison.
:param bool partial: if true, will use the "md5partial" attribute instead of "md5" to compute
@ -278,44 +278,44 @@ class Group:
This manages match pairs into groups and ensures that all files in the group match to each
other.
.. attribute:: ref
The "reference" file, which is the file among the group that isn't going to be deleted.
.. attribute:: ordered
Ordered list of duplicates in the group (including the :attr:`ref`).
.. attribute:: unordered
Set duplicates in the group (including the :attr:`ref`).
.. attribute:: dupes
An ordered list of the group's duplicate, without :attr:`ref`. Equivalent to
``ordered[1:]``
.. attribute:: percentage
Average match percentage of match pairs containing :attr:`ref`.
"""
#---Override
def __init__(self):
self._clear()
def __contains__(self, item):
return item in self.unordered
def __getitem__(self, key):
return self.ordered.__getitem__(key)
def __iter__(self):
return iter(self.ordered)
def __len__(self):
return len(self.ordered)
#---Private
def _clear(self):
self._percentage = None
@ -324,22 +324,22 @@ class Group:
self.candidates = defaultdict(set)
self.ordered = []
self.unordered = set()
def _get_matches_for_ref(self):
if self._matches_for_ref is None:
ref = self.ref
self._matches_for_ref = [match for match in self.matches if ref in match]
return self._matches_for_ref
#---Public
def add_match(self, match):
"""Adds ``match`` to internal match list and possibly add duplicates to the group.
A duplicate can only be considered as such if it matches all other duplicates in the group.
This method registers that pair (A, B) represented in ``match`` as possible candidates and,
if A and/or B end up matching every other duplicates in the group, add these duplicates to
the group.
:param tuple match: pair of :class:`~core.fs.File` to add
"""
def add_candidate(item, match):
@ -348,7 +348,7 @@ class Group:
if self.unordered <= matches:
self.ordered.append(item)
self.unordered.add(item)
if match in self.matches:
return
self.matches.add(match)
@ -359,17 +359,17 @@ class Group:
add_candidate(second, first)
self._percentage = None
self._matches_for_ref = None
def discard_matches(self):
"""Remove all recorded matches that didn't result in a duplicate being added to the group.
You can call this after the duplicate scanning process to free a bit of memory.
"""
discarded = set(m for m in self.matches if not all(obj in self.unordered for obj in [m.first, m.second]))
self.matches -= discarded
self.candidates = defaultdict(set)
return discarded
def get_match_of(self, item):
"""Returns the match pair between ``item`` and :attr:`ref`.
"""
@ -378,10 +378,10 @@ class Group:
for m in self._get_matches_for_ref():
if item in m:
return m
def prioritize(self, key_func, tie_breaker=None):
"""Reorders :attr:`ordered` according to ``key_func``.
:param key_func: Key (f(x)) to be used for sorting
:param tie_breaker: function to be used to select the reference position in case the top
duplicates have the same key_func() result.
@ -405,7 +405,7 @@ class Group:
self.switch_ref(ref)
return True
return changed
def remove_dupe(self, item, discard_matches=True):
try:
self.ordered.remove(item)
@ -419,7 +419,7 @@ class Group:
self._clear()
except ValueError:
pass
def switch_ref(self, with_dupe):
"""Make the :attr:`ref` dupe of the group switch position with ``with_dupe``.
"""
@ -433,9 +433,9 @@ class Group:
return True
except ValueError:
return False
dupes = property(lambda self: self[1:])
@property
def percentage(self):
if self._percentage is None:
@ -445,16 +445,16 @@ class Group:
else:
self._percentage = 0
return self._percentage
@property
def ref(self):
if self:
return self[0]
def get_groups(matches, j=job.nulljob):
"""Returns a list of :class:`Group` from ``matches``.
Create groups out of match pairs in the smartest way possible.
"""
matches.sort(key=lambda match: -match.percentage)

View File

@ -1,9 +1,9 @@
# Created By: Virgil Dupras
# Created On: 2006/02/23
# Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/bsd_license
import logging
@ -12,7 +12,7 @@ import os
import os.path as op
from xml.etree import ElementTree as ET
from jobprogress.job import nulljob
from hscommon.jobprogress.job import nulljob
from hscommon.conflict import get_conflicted_name
from hscommon.util import flatten, nonone, FileOrPath, format_size
from hscommon.trans import tr
@ -22,15 +22,15 @@ from .markable import Markable
class Results(Markable):
"""Manages a collection of duplicate :class:`~core.engine.Group`.
This class takes care or marking, sorting and filtering duplicate groups.
.. attribute:: groups
The list of :class:`~core.engine.Group` contained managed by this instance.
.. attribute:: dupes
A list of all duplicates (:class:`~core.fs.File` instances), without ref, contained in the
currently managed :attr:`groups`.
"""
@ -50,16 +50,16 @@ class Results(Markable):
self.app = app
self.problems = [] # (dupe, error_msg)
self.is_modified = False
def _did_mark(self, dupe):
self.__marked_size += dupe.size
def _did_unmark(self, dupe):
self.__marked_size -= dupe.size
def _get_markable_count(self):
return self.__total_count
def _is_markable(self, dupe):
if dupe.is_ref:
return False
@ -71,25 +71,25 @@ class Results(Markable):
if self.__filtered_dupes and dupe not in self.__filtered_dupes:
return False
return True
def mark_all(self):
if self.__filters:
self.mark_multiple(self.__filtered_dupes)
else:
Markable.mark_all(self)
def mark_invert(self):
if self.__filters:
self.mark_toggle_multiple(self.__filtered_dupes)
else:
Markable.mark_invert(self)
def mark_none(self):
if self.__filters:
self.unmark_multiple(self.__filtered_dupes)
else:
Markable.mark_none(self)
#---Private
def __get_dupe_list(self):
if self.__dupes is None:
@ -103,13 +103,13 @@ class Results(Markable):
if sd:
self.sort_dupes(sd[0], sd[1], sd[2])
return self.__dupes
def __get_groups(self):
if self.__filtered_groups is None:
return self.__groups
else:
return self.__filtered_groups
def __get_stat_line(self):
if self.__filtered_dupes is None:
mark_count = self.mark_count
@ -132,7 +132,7 @@ class Results(Markable):
if self.__filters:
result += tr(" filter: %s") % ' --> '.join(self.__filters)
return result
def __recalculate_stats(self):
self.__total_size = 0
self.__total_count = 0
@ -140,7 +140,7 @@ class Results(Markable):
markable = [dupe for dupe in group.dupes if self._is_markable(dupe)]
self.__total_count += len(markable)
self.__total_size += sum(dupe.size for dupe in markable)
def __set_groups(self, new_groups):
self.mark_none()
self.__groups = new_groups
@ -155,18 +155,18 @@ class Results(Markable):
self.apply_filter(None)
for filter_str in old_filters:
self.apply_filter(filter_str)
#---Public
def apply_filter(self, filter_str):
"""Applies a filter ``filter_str`` to :attr:`groups`
When you apply the filter, only dupes with the filename matching ``filter_str`` will be in
in the results. To cancel the filter, just call apply_filter with ``filter_str`` to None,
in the results. To cancel the filter, just call apply_filter with ``filter_str`` to None,
and the results will go back to normal.
If call apply_filter on a filtered results, the filter will be applied
If call apply_filter on a filtered results, the filter will be applied
*on the filtered results*.