1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2026-01-25 16:11:39 +00:00

Compare commits

...

10 Commits

Author SHA1 Message Date
Virgil Dupras
350b2c64e0 Fixed nasty crash during PE's Cocoa block scanning
Using PyUnicode_GET_SIZE was obviously wrong, but I'm guessing that the str changes in py3.3 made that wrongness significant...
2013-08-26 07:17:02 -04:00
Virgil Dupras
dcc57a7afb Ah crap, another Cocoa fatal mistake 2013-08-25 17:10:26 -04:00
Virgil Dupras
8b510994ad pe v2.8.0 2013-08-25 10:53:08 -04:00
Virgil Dupras
4a4d1bbfcd Eased "Clear Picture Cache" triggering under Qt
Added a keybinding and added the action to the directories dialog's menu
(it was previously only in the results window's menu). Fixes #230.
2013-08-25 10:47:10 -04:00
Virgil Dupras
78c3c8ec2d Improved dev docs 2013-08-20 22:52:43 -04:00
Virgil Dupras
e99e2b18e0 Call sphinx-build from withing Python instead of a subprocess 2013-08-19 17:43:32 -04:00
Virgil Dupras
ae1283f2e1 se v3.7.1 2013-08-19 16:48:07 -04:00
Virgil Dupras
cc76f3ca87 Fixed SE folder scanning under Cocoa 2013-08-18 21:07:33 -04:00
Virgil Dupras
be8efea081 Fixed folder scanning in SE, which was completely broken
Oops
2013-08-18 20:50:31 -04:00
Virgil Dupras
7e8f9036d8 Began serious code documentation effort
Enabled the autodoc Sphinx extension and started adding docstrings to
classes, methods, etc.. It's quickly becoming quite interesting...
2013-08-18 18:36:09 -04:00
28 changed files with 492 additions and 76 deletions

View File

@@ -259,8 +259,8 @@ class DupeGuruPE(DupeGuruBase):
return self.path2aperture.get(path)
return DupeGuruBase._create_file(self, path)
def _job_completed(self, jobid, exc):
DupeGuruBase._job_completed(self, jobid, exc)
def _job_completed(self, jobid):
DupeGuruBase._job_completed(self, jobid)
if jobid == JobType.Load:
if hasattr(self, 'path2iphoto'):
del self.path2iphoto

View File

@@ -14,10 +14,9 @@ from hscommon.path import Path
from cocoa import proxy
from core.scanner import ScanType
from core import fs
from core.directories import Directories as DirectoriesBase, DirectoryState
from core_se.app import DupeGuru as DupeGuruBase
from core_se.fs import File
from core_se import fs
from .app import PyDupeGuruBase
def is_bundle(str_path):
@@ -36,7 +35,8 @@ class Directories(DirectoriesBase):
ROOT_PATH_TO_EXCLUDE = list(map(Path, ['/Library', '/Volumes', '/System', '/bin', '/sbin', '/opt', '/private', '/dev']))
HOME_PATH_TO_EXCLUDE = [Path('Library')]
def __init__(self):
DirectoriesBase.__init__(self, fileclasses=[Bundle, File])
DirectoriesBase.__init__(self, fileclasses=[Bundle, fs.File])
self.folderclass = fs.Folder
def _default_state_for_path(self, path):
result = DirectoriesBase._default_state_for_path(self, path)

View File

@@ -96,6 +96,32 @@ def cmp_value(dupe, attrname):
return value.lower() if isinstance(value, str) else value
class DupeGuru(RegistrableApplication, Broadcaster):
"""Holds everything together.
Instantiated once per running application, it holds a reference to every high-level object
whose reference needs to be held: :class:`Results`, :class:`Scanner`,
:class:`~core.directories.Directories`, :mod:`core.gui` instances, etc..
It also hosts high level methods and acts as a coordinator for all those elements.
.. attribute:: directories
Instance of :class:`~core.directories.Directories`. It holds the current folder selection.
.. attribute:: results
Instance of :class:`core.results.Results`. Holds the results of the latest scan.
.. attribute:: selected_dupes
List of currently selected dupes from our :attr:`results`. Whenever the user changes its
selection at the UI level, :attr:`result_table` takes care of updating this attribute, so
you can trust that it's always up-to-date.
.. attribute:: result_table
Instance of :mod:`meta-gui <core.gui>` table listing the results from :attr:`results`
"""
#--- View interface
# open_path(path)
# reveal_path(path)
@@ -299,6 +325,12 @@ class DupeGuru(RegistrableApplication, Broadcaster):
#--- Public
def add_directory(self, d):
"""Adds folder ``d`` to :attr:`directories`.
Shows an error message dialog if something bad happens.
:param str d: path of folder to add
"""
try:
self.directories.add_path(Path(d))
self.notify('directories_changed')
@@ -308,6 +340,8 @@ class DupeGuru(RegistrableApplication, Broadcaster):
self.view.show_message(tr("'{}' does not exist.").format(d))
def add_selected_to_ignore_list(self):
"""Adds :attr:`selected_dupes` to :attr:`scanner`'s ignore list.
"""
dupes = self.without_ref(self.selected_dupes)
if not dupes:
self.view.show_message(MSG_NO_SELECTED_DUPES)
@@ -324,6 +358,10 @@ class DupeGuru(RegistrableApplication, Broadcaster):
self.ignore_list_dialog.refresh()
def apply_filter(self, filter):
"""Apply a filter ``filter`` to the results so that it shows only dupe groups that match it.
:param str filter: filter to apply
"""
self.results.apply_filter(None)
if self.options['escape_filter_regexp']:
filter = escape(filter, set('()[]\\.|+?^'))
@@ -359,6 +397,10 @@ class DupeGuru(RegistrableApplication, Broadcaster):
self.clean_empty_dirs(source_path[:-1])
def copy_or_move_marked(self, copy):
"""Start an async move (or copy) job on marked duplicates.
:param bool copy: If True, duplicates will be copied instead of moved
"""
def do(j):
def op(dupe):
j.add_progress()
@@ -381,6 +423,8 @@ class DupeGuru(RegistrableApplication, Broadcaster):
self._start_job(jobid, do)
def delete_marked(self):
"""Start an async job to send marked duplicates to the trash.
"""
if not self._check_demo():
return
if not self.results.mark_count:
@@ -416,11 +460,11 @@ class DupeGuru(RegistrableApplication, Broadcaster):
return empty_data()
def invoke_custom_command(self):
"""Calls command in 'CustomCommand' pref with %d and %r placeholders replaced.
"""Calls command in ``CustomCommand`` pref with ``%d`` and ``%r`` placeholders replaced.
Using the current selection, %d is replaced with the currently selected dupe and %r is
replaced with that dupe's ref file. If there's no selection, the command is not invoked.
If the dupe is a ref, %d and %r will be the same.
Using the current selection, ``%d`` is replaced with the currently selected dupe and ``%r``
is replaced with that dupe's ref file. If there's no selection, the command is not invoked.
If the dupe is a ref, ``%d`` and ``%r`` will be the same.
"""
cmd = self.view.get_default('CustomCommand')
if not cmd:
@@ -453,6 +497,10 @@ class DupeGuru(RegistrableApplication, Broadcaster):
self.ignore_list_dialog.refresh()
def load_from(self, filename):
"""Start an async job to load results from ``filename``.
:param str filename: path of the XML file (created with :meth:`save_as`) to load
"""
def do(j):
self.results.load_from_xml(filename, self._get_file, j)
self._start_job(JobType.Load, do)
@@ -503,6 +551,8 @@ class DupeGuru(RegistrableApplication, Broadcaster):
self.notify('marking_changed')
def open_selected(self):
"""Open :attr:`selected_dupes` with their associated application.
"""
if len(self.selected_dupes) > 10:
if not self.view.ask_yes_no(MSG_MANY_FILES_TO_OPEN):
return
@@ -527,6 +577,8 @@ class DupeGuru(RegistrableApplication, Broadcaster):
self.notify('results_changed_but_keep_selection')
def remove_marked(self):
"""Removed marked duplicates from the results (without touching the files themselves).
"""
if not self.results.mark_count:
self.view.show_message(MSG_NO_MARKED_DUPES)
return
@@ -537,6 +589,8 @@ class DupeGuru(RegistrableApplication, Broadcaster):
self._results_changed()
def remove_selected(self):
"""Removed :attr:`selected_dupes` from the results (without touching the files themselves).
"""
dupes = self.without_ref(self.selected_dupes)
if not dupes:
self.view.show_message(MSG_NO_SELECTED_DUPES)
@@ -577,9 +631,17 @@ class DupeGuru(RegistrableApplication, Broadcaster):
self.notify('save_session')
def save_as(self, filename):
"""Save results in ``filename``.
:param str filename: path of the file to save results (as XML) to.
"""
self.results.save_to_xml(filename)
def start_scanning(self):
"""Starts an async job to scan for duplicates.
Scans folders selected in :attr:`directories` and put the results in :attr:`results`
"""
def do(j):
j.set_progress(0, tr("Collecting files to scan"))
if self.scanner.scan_type == scanner.ScanType.Folders:
@@ -611,6 +673,8 @@ class DupeGuru(RegistrableApplication, Broadcaster):
self.notify('marking_changed')
def without_ref(self, dupes):
"""Returns ``dupes`` with all reference elements removed.
"""
return [dupe for dupe in dupes if self.results.get_group_of_duplicate(dupe).ref is not dupe]
def get_default(self, key, fallback_value=None):

View File

@@ -15,7 +15,20 @@ from hscommon.util import FileOrPath
from . import fs
__all__ = [
'Directories',
'DirectoryState',
'AlreadyThereError',
'InvalidPathError',
]
class DirectoryState:
"""Enum describing how a folder should be considered.
* DirectoryState.Normal: Scan all files normally
* DirectoryState.Reference: Scan files, but make sure never to delete any of them
* DirectoryState.Excluded: Don't scan this folder
"""
Normal = 0
Reference = 1
Excluded = 2
@@ -27,11 +40,20 @@ class InvalidPathError(Exception):
"""The path being added is invalid"""
class Directories:
"""Holds user folder selection.
Manages the selection that the user make through the folder selection dialog. It also manages
folder states, and how recursion applies to them.
Then, when the user starts the scan, :meth:`get_files` is called to retrieve all files (wrapped
in :mod:`core.fs`) that have to be scanned according to the chosen folders/states.
"""
#---Override
def __init__(self, fileclasses=[fs.File]):
self._dirs = []
self.states = {}
self.fileclasses = fileclasses
self.folderclass = fs.Folder
def __contains__(self, path):
for p in self._dirs:
@@ -97,11 +119,14 @@ class Directories:
#---Public
def add_path(self, path):
"""Adds 'path' to self, if not already there.
"""Adds ``path`` to self, if not already there.
Raises AlreadyThereError if 'path' is already in self. If path is a directory containing
some of the directories already present in self, 'path' will be added, but all directories
under it will be removed. Can also raise InvalidPathError if 'path' does not exist.
Raises :exc:`AlreadyThereError` if ``path`` is already in self. If path is a directory
containing some of the directories already present in self, ``path`` will be added, but all
directories under it will be removed. Can also raise :exc:`InvalidPathError` if ``path``
does not exist.
:param Path path: path to add
"""
if path in self:
raise AlreadyThereError()
@@ -112,7 +137,11 @@ class Directories:
@staticmethod
def get_subfolders(path):
"""returns a sorted list of paths corresponding to subfolders in `path`"""
"""Returns a sorted list of paths corresponding to subfolders in ``path``.
:param Path path: get subfolders from there
:rtype: list of Path
"""
try:
names = [name for name in path.listdir() if (path + name).isdir()]
names.sort(key=lambda x:x.lower())
@@ -123,7 +152,7 @@ class Directories:
def get_files(self, j=job.nulljob):
"""Returns a list of all files that are not excluded.
Returned files also have their 'is_ref' attr set.
Returned files also have their ``is_ref`` attr set if applicable.
"""
for path in self._dirs:
for file in self._get_files(path, j):
@@ -132,15 +161,17 @@ class Directories:
def get_folders(self, j=job.nulljob):
"""Returns a list of all folders that are not excluded.
Returned folders also have their 'is_ref' attr set.
Returned folders also have their ``is_ref`` attr set if applicable.
"""
for path in self._dirs:
from_folder = fs.Folder(path)
from_folder = self.folderclass(path)
for folder in self._get_folders(from_folder, j):
yield folder
def get_state(self, path):
"""Returns the state of 'path' (One of the STATE_* const.)
"""Returns the state of ``path``.
:rtype: :class:`DirectoryState`
"""
if path in self.states:
return self.states[path]
@@ -154,6 +185,12 @@ class Directories:
return DirectoryState.Normal
def has_any_file(self):
"""Returns whether selected folders contain any file.
Because it stops at the first file it finds, it's much faster than get_files().
:rtype: bool
"""
try:
next(self.get_files())
return True
@@ -161,6 +198,10 @@ class Directories:
return False
def load_from_file(self, infile):
"""Load folder selection from ``infile``.
:param file infile: path or file pointer to XML generated through :meth:`save_to_file`
"""
try:
root = ET.parse(infile).getroot()
except Exception:
@@ -183,6 +224,10 @@ class Directories:
self.set_state(Path(path), int(state))
def save_to_file(self, outfile):
"""Save folder selection as XML to ``outfile``.
:param file outfile: path or file pointer to XML file to save to.
"""
with FileOrPath(outfile, 'wb') as fp:
root = ET.Element('directories')
for root_path in self:
@@ -196,6 +241,12 @@ class Directories:
tree.write(fp, encoding='utf-8')
def set_state(self, path, state):
"""Set the state of folder at ``path``.
:param Path path: path of the target folder
:param state: state to set folder to
:type state: :class:`DirectoryState`
"""
if self.get_state(path) == state:
return
# we don't want to needlessly fill self.states. if get_state returns the same thing

View File

@@ -44,10 +44,10 @@ def unpack_fields(fields):
return result
def compare(first, second, flags=()):
"""Returns the % of words that match between first and second
"""Returns the % of words that match between ``first`` and ``second``
The result is a int in the range 0..100.
First and second can be either a string or a list.
The result is a ``int`` in the range 0..100.
``first`` and ``second`` can be either a string or a list (of words).
"""
if not (first and second):
return 0
@@ -76,9 +76,10 @@ def compare(first, second, flags=()):
return result
def compare_fields(first, second, flags=()):
"""Returns the score for the lowest matching fields.
"""Returns the score for the lowest matching :ref:`fields`.
first and second must be lists of lists of string.
``first`` and ``second`` must be lists of lists of string. Each sub-list is then compared with
:func:`compare`.
"""
if len(first) != len(second):
return 0
@@ -98,13 +99,14 @@ def compare_fields(first, second, flags=()):
if matched_field:
second.remove(matched_field)
else:
results = [compare(word1, word2, flags) for word1, word2 in zip(first, second)]
results = [compare(field1, field2, flags) for field1, field2 in zip(first, second)]
return min(results) if results else 0
def build_word_dict(objects, j=job.nulljob):
"""Returns a dict of objects mapped by their words.
objects must have a 'words' attribute being a list of strings or a list of lists of strings.
objects must have a ``words`` attribute being a list of strings or a list of lists of strings
(:ref:`fields`).
The result will be a dict with words as keys, lists of objects as values.
"""
@@ -115,7 +117,11 @@ def build_word_dict(objects, j=job.nulljob):
return result
def merge_similar_words(word_dict):
"""Take all keys in word_dict that are similar, and merge them together.
"""Take all keys in ``word_dict`` that are similar, and merge them together.
``word_dict`` has been built with :func:`build_word_dict`. Similarity is computed with Python's
``difflib.get_close_matches()``, which computes the number of edits that are necessary to make
a word equal to the other.
"""
keys = list(word_dict.keys())
keys.sort(key=len)# we want the shortest word to stay
@@ -131,7 +137,9 @@ def merge_similar_words(word_dict):
keys.remove(similar)
def reduce_common_words(word_dict, threshold):
"""Remove all objects from word_dict values where the object count >= threshold
"""Remove all objects from ``word_dict`` values where the object count >= ``threshold``
``word_dict`` has been built with :func:`build_word_dict`.
The exception to this removal are the objects where all the words of the object are common.
Because if we remove them, we will miss some duplicates!
@@ -150,13 +158,42 @@ def reduce_common_words(word_dict, threshold):
del word_dict[word]
Match = namedtuple('Match', 'first second percentage')
Match.__doc__ = """Represents a match between two :class:`~core.fs.File`.
Regarless of the matching method, when two files are determined to match, a Match pair is created,
which holds, of course, the two matched files, but also their match "level".
.. attribute:: first
first file of the pair.
.. attribute:: second
second file of the pair.
.. attribute:: percentage
their match level according to the scan method which found the match. int from 1 to 100. For
exact scan methods, such as Contents scans, this will always be 100.
"""
def get_match(first, second, flags=()):
#it is assumed here that first and second both have a "words" attribute
percentage = compare(first.words, second.words, flags)
return Match(first, second, percentage)
def getmatches(objects, min_match_percentage=0, match_similar_words=False, weight_words=False,
no_field_order=False, j=job.nulljob):
def getmatches(
objects, min_match_percentage=0, match_similar_words=False, weight_words=False,
no_field_order=False, j=job.nulljob):
"""Returns a list of :class:`Match` within ``objects`` after fuzzily matching their words.
:param objects: List of :class:`~core.fs.File` to match.
:param int min_match_percentage: minimum % of words that have to match.
:param bool match_similar_words: make similar words (see :func:`merge_similar_words`) match.
:param bool weight_words: longer words are worth more in match % computations.
:param bool no_field_order: match :ref:`fields` regardless of their order.
:param j: A :ref:`job progress instance <jobs>`.
"""
COMMON_WORD_THRESHOLD = 50
LIMIT = 5000000
j = j.start_subjob(2)
@@ -203,6 +240,14 @@ def getmatches(objects, min_match_percentage=0, match_similar_words=False, weigh
return result
def getmatches_by_contents(files, sizeattr='size', partial=False, j=job.nulljob):
"""Returns a list of :class:`Match` within ``files`` if their contents is the same.
:param str sizeattr: attibute name of the :class:`~core.fs.file` that returns the size of the
file to use for comparison.
:param bool partial: if true, will use the "md5partial" attribute instead of "md5" to compute
contents hash.
:param j: A :ref:`job progress instance <jobs>`.
"""
j = j.start_subjob([2, 8])
size2files = defaultdict(set)
for file in j.iter_with_progress(files, tr("Read size of %d/%d files")):
@@ -224,6 +269,32 @@ def getmatches_by_contents(files, sizeattr='size', partial=False, j=job.nulljob)
return result
class Group:
"""A group of :class:`~core.fs.File` that match together.
This manages match pairs into groups and ensures that all files in the group match to each
other.
.. attribute:: ref
The "reference" file, which is the file among the group that isn't going to be deleted.
.. attribute:: ordered
Ordered list of duplicates in the group (including the :attr:`ref`).
.. attribute:: unordered
Set duplicates in the group (including the :attr:`ref`).
.. attribute:: dupes
An ordered list of the group's duplicate, without :attr:`ref`. Equivalent to
``ordered[1:]``
.. attribute:: percentage
Average match percentage of match pairs containing :attr:`ref`.
"""
#---Override
def __init__(self):
self._clear()
@@ -257,6 +328,15 @@ class Group:
#---Public
def add_match(self, match):
"""Adds ``match`` to internal match list and possibly add duplicates to the group.
A duplicate can only be considered as such if it matches all other duplicates in the group.
This method registers that pair (A, B) represented in ``match`` as possible candidates and,
if A and/or B end up matching every other duplicates in the group, add these duplicates to
the group.
:param tuple match: pair of :class:`~core.fs.File` to add
"""
def add_candidate(item, match):
matches = self.candidates[item]
matches.add(match)
@@ -276,12 +356,18 @@ class Group:
self._matches_for_ref = None
def discard_matches(self):
"""Remove all recorded matches that didn't result in a duplicate being added to the group.
You can call this after the duplicate scanning process to free a bit of memory.
"""
discarded = set(m for m in self.matches if not all(obj in self.unordered for obj in [m.first, m.second]))
self.matches -= discarded
self.candidates = defaultdict(set)
return discarded
def get_match_of(self, item):
"""Returns the match pair between ``item`` and :attr:`ref`.
"""
if item is self.ref:
return
for m in self._get_matches_for_ref():
@@ -289,6 +375,12 @@ class Group:
return m
def prioritize(self, key_func, tie_breaker=None):
"""Reorders :attr:`ordered` according to ``key_func``.
:param key_func: Key (f(x)) to be used for sorting
:param tie_breaker: function to be used to select the reference position in case the top
duplicates have the same key_func() result.
"""
# tie_breaker(ref, dupe) --> True if dupe should be ref
# Returns True if anything changed during prioritization.
master_key_func = lambda x: (-x.is_ref, key_func(x))
@@ -324,6 +416,8 @@ class Group:
pass
def switch_ref(self, with_dupe):
"""Make the :attr:`ref` dupe of the group switch position with ``with_dupe``.
"""
if self.ref.is_ref:
return False
try:
@@ -354,6 +448,10 @@ class Group:
def get_groups(matches, j=job.nulljob):
"""Returns a list of :class:`Group` from ``matches``.
Create groups out of match pairs in the smartest way possible.
"""
matches.sort(key=lambda match: -match.percentage)
dupe2group = {}
groups = []

View File

@@ -16,6 +16,18 @@ import logging
from hscommon.util import nonone, get_file_ext
__all__ = [
'File',
'Folder',
'get_file',
'get_files',
'FSError',
'AlreadyExistsError',
'InvalidPath',
'InvalidDestinationError',
'OperationError',
]
NOT_SET = object()
class FSError(Exception):
@@ -50,6 +62,8 @@ class OperationError(FSError):
cls_message = "Operation on '{name}' failed."
class File:
"""Represents a file and holds metadata to be used for scanning.
"""
INITIAL_INFO = {
'size': 0,
'mtime': 0,
@@ -129,6 +143,8 @@ class File:
#--- Public
@classmethod
def can_handle(cls, path):
"""Returns whether this file wrapper class can handle ``path``.
"""
return not path.islink() and path.isfile()
def rename(self, newname):
@@ -205,7 +221,7 @@ class Folder(File):
if self._subfolders is None:
subpaths = [self.path + name for name in self.path.listdir()]
subfolders = [p for p in subpaths if not p.islink() and p.isdir()]
self._subfolders = [Folder(p) for p in subfolders]
self._subfolders = [self.__class__(p) for p in subfolders]
return self._subfolders
@classmethod
@@ -214,11 +230,23 @@ class Folder(File):
def get_file(path, fileclasses=[File]):
"""Wraps ``path`` around its appropriate :class:`File` class.
Whether a class is "appropriate" is decided by :meth:`File.can_handle`
:param Path path: path to wrap
:param fileclasses: List of candidate :class:`File` classes
"""
for fileclass in fileclasses:
if fileclass.can_handle(path):
return fileclass(path)
def get_files(path, fileclasses=[File]):
"""Returns a list of :class:`File` for each file contained in ``path``.
:param Path path: path to scan
:param fileclasses: List of candidate :class:`File` classes
"""
assert all(issubclass(fileclass, File) for fileclass in fileclasses)
def combine_paths(p1, p2):
try:

View File

@@ -0,0 +1,15 @@
"""
Meta GUI elements in dupeGuru
-----------------------------
dupeGuru is designed with a `cross-toolkit`_ approach in mind. It means that its core code
(which doesn't depend on any GUI toolkit) has elements which preformat core information in a way
that makes it easy for a UI layer to consume.
For example, we have :class:`~core.gui.ResultTable` which takes information from
:class:`~core.results.Results` and mashes it in rows and columns which are ready to be fetched by
either Cocoa's ``NSTableView`` or Qt's ``QTableView``. It tells them which cell is supposed to be
blue, which is supposed to be orange, does the sorting logic, holds selection, etc..
.. _cross-toolkit: http://www.hardcoded.net/articles/cross-toolkit-software
"""

View File

@@ -21,6 +21,19 @@ from . import engine
from .markable import Markable
class Results(Markable):
"""Manages a collection of duplicate :class:`~core.engine.Group`.
This class takes care or marking, sorting and filtering duplicate groups.
.. attribute:: groups
The list of :class:`~core.engine.Group` contained managed by this instance.
.. attribute:: dupes
A list of all duplicates (:class:`~core.fs.File` instances), without ref, contained in the
currently managed :attr:`groups`.
"""
#---Override
def __init__(self, app):
Markable.__init__(self)
@@ -145,17 +158,17 @@ class Results(Markable):
#---Public
def apply_filter(self, filter_str):
''' Applies a filter 'filter_str' to self.groups
"""Applies a filter ``filter_str`` to :attr:`groups`
When you apply the filter, only dupes with the filename matching 'filter_str' will be in
in the results. To cancel the filter, just call apply_filter with 'filter_str' to None,
and the results will go back to normal.
When you apply the filter, only dupes with the filename matching ``filter_str`` will be in
in the results. To cancel the filter, just call apply_filter with ``filter_str`` to None,
and the results will go back to normal.
If call apply_filter on a filtered results, the filter will be applied
*on the filtered results*.
If call apply_filter on a filtered results, the filter will be applied
*on the filtered results*.
'filter_str' is a string containing a regexp to filter dupes with.
'''
:param str filter_str: a string containing a regexp to filter dupes with.
"""
if not filter_str:
self.__filtered_dupes = None
self.__filtered_groups = None
@@ -182,6 +195,8 @@ class Results(Markable):
self.__dupes = None
def get_group_of_duplicate(self, dupe):
"""Returns :class:`~core.engine.Group` in which ``dupe`` belongs.
"""
try:
return self.__group_of_duplicate[dupe]
except (TypeError, KeyError):
@@ -190,6 +205,12 @@ class Results(Markable):
is_markable = _is_markable
def load_from_xml(self, infile, get_file, j=nulljob):
"""Load results from ``infile``.
:param infile: a file or path pointing to an XML file created with :meth:`save_to_xml`.
:param get_file: a function f(path) returning a :class:`~core.fs.File` wrapping the path.
:param j: A :ref:`job progress instance <jobs>`.
"""
def do_match(ref_file, other_files, group):
if not other_files:
return
@@ -242,6 +263,8 @@ class Results(Markable):
self.is_modified = False
def make_ref(self, dupe):
"""Make ``dupe`` take the :attr:`~core.engine.Group.ref` position of its group.
"""
g = self.get_group_of_duplicate(dupe)
r = g.ref
if not g.switch_ref(dupe):
@@ -258,8 +281,14 @@ class Results(Markable):
return True
def perform_on_marked(self, func, remove_from_results):
# Performs `func` on all marked dupes. If an EnvironmentError is raised during the call,
# the problematic dupe is added to self.problems.
"""Performs ``func`` on all marked dupes.
If an ``EnvironmentError`` is raised during the call, the problematic dupe is added to
self.problems.
:param bool remove_from_results: If true, dupes which had ``func`` applied and didn't cause
any problem.
"""
self.problems = []
to_remove = []
marked = (dupe for dupe in self.dupes if self.is_marked(dupe))
@@ -276,8 +305,10 @@ class Results(Markable):
self.mark(dupe)
def remove_duplicates(self, dupes):
'''Remove 'dupes' from their respective group, and remove the group is it ends up empty.
'''
"""Remove ``dupes`` from their respective :class:`~core.engine.Group`.
Also, remove the group from :attr:`groups` if it ends up empty.
"""
affected_groups = set()
for dupe in dupes:
group = self.get_group_of_duplicate(dupe)
@@ -302,9 +333,12 @@ class Results(Markable):
self.is_modified = bool(self.__groups)
def save_to_xml(self, outfile):
"""Save results to ``outfile`` in XML.
:param outfile: file object or path.
"""
self.apply_filter(None)
root = ET.Element('results')
# writer = XMLGenerator(outfile, 'utf-8')
for g in self.groups:
group_elem = ET.SubElement(root, 'group')
dupe2index = {}
@@ -349,13 +383,26 @@ class Results(Markable):
self.is_modified = False
def sort_dupes(self, key, asc=True, delta=False):
"""Sort :attr:`dupes` according to ``key``.
:param str key: key attribute name to sort with.
:param bool asc: If false, sorting is reversed.
:param bool delta: If true, sorting occurs using :ref:`delta values <deltavalues>`.
"""
if not self.__dupes:
self.__get_dupe_list()
keyfunc = lambda d: self.app._get_dupe_sort_key(d, lambda: self.get_group_of_duplicate(d), key, delta)
self.__dupes.sort(key=keyfunc, reverse=not asc)
self.__dupes_sort_descriptor = (key,asc,delta)
def sort_groups(self,key,asc=True):
def sort_groups(self, key, asc=True):
"""Sort :attr:`groups` according to ``key``.
The :attr:`~core.engine.Group.ref` of each group is used to extract values for sorting.
:param str key: key attribute name to sort with.
:param bool asc: If false, sorting is reversed.
"""
keyfunc = lambda g: self.app._get_group_sort_key(g, key)
self.groups.sort(key=keyfunc, reverse=not asc)
self.__groups_sort_descriptor = (key,asc)

View File

@@ -1,2 +1,2 @@
__version__ = '2.7.1'
__version__ = '2.8.0'
__appname__ = 'dupeGuru Picture Edition'

View File

@@ -32,7 +32,7 @@ pystring2cfstring(PyObject *pystring)
}
s = (UInt8*)PyBytes_AS_STRING(encoded);
size = PyUnicode_GET_SIZE(encoded);
size = PyBytes_GET_SIZE(encoded);
result = CFStringCreateWithBytes(NULL, s, size, kCFStringEncodingUTF8, FALSE);
Py_DECREF(encoded);
return result;

View File

@@ -1,2 +1,2 @@
__version__ = '3.7.0'
__version__ = '3.7.1'
__appname__ = 'dupeGuru'

View File

@@ -17,6 +17,7 @@ class DupeGuru(DupeGuruBase):
def __init__(self, view, appdata):
DupeGuruBase.__init__(self, view, appdata)
self.directories.fileclasses = [fs.File]
self.directories.folderclass = fs.Folder
def _prioritization_categories(self):
return prioritize.all_categories()

View File

@@ -11,29 +11,37 @@ from hscommon.util import format_size
from core import fs
from core.app import format_timestamp, format_perc, format_words, format_dupe_count
def get_display_info(dupe, group, delta):
size = dupe.size
mtime = dupe.mtime
m = group.get_match_of(dupe)
if m:
percentage = m.percentage
dupe_count = 0
if delta:
r = group.ref
size -= r.size
mtime -= r.mtime
else:
percentage = group.percentage
dupe_count = len(group.dupes)
return {
'name': dupe.name,
'folder_path': str(dupe.folder_path),
'size': format_size(size, 0, 1, False),
'extension': dupe.extension,
'mtime': format_timestamp(mtime, delta and m),
'percentage': format_perc(percentage),
'words': format_words(dupe.words) if hasattr(dupe, 'words') else '',
'dupe_count': format_dupe_count(dupe_count),
}
class File(fs.File):
def get_display_info(self, group, delta):
size = self.size
mtime = self.mtime
m = group.get_match_of(self)
if m:
percentage = m.percentage
dupe_count = 0
if delta:
r = group.ref
size -= r.size
mtime -= r.mtime
else:
percentage = group.percentage
dupe_count = len(group.dupes)
return {
'name': self.name,
'folder_path': str(self.folder_path),
'size': format_size(size, 0, 1, False),
'extension': self.extension,
'mtime': format_timestamp(mtime, delta and m),
'percentage': format_perc(percentage),
'words': format_words(self.words) if hasattr(self, 'words') else '',
'dupe_count': format_dupe_count(dupe_count),
}
return get_display_info(self, group, delta)
class Folder(fs.Folder):
def get_display_info(self, group, delta):
return get_display_info(self, group, delta)

View File

@@ -1,3 +1,12 @@
=== 2.8.0 (2013-08-25)
* Improved delta values to support non-numerical values. (#213)
* Improved the Re-Prioritize dialog's UI. (#224)
* Added hardlink/symlink support on Windows Vista+. (#220)
* Added keybinding for the "Clear Picture Cache" action. [Linux, Windows] (#230)
* Dropped 32bit support on Mac OS X.
* Added Vietnamese localization by Phan Anh.
=== 2.7.1 (2013-05-05)
* Fixed false matching bug in EXIF matching. (#219)

View File

@@ -1,3 +1,7 @@
=== 3.7.1 (2013-08-19)
* Fixed folder scan type, which was broken in v3.7.0.
=== 3.7.0 (2013-08-17)
* Improved delta values to support non-numerical values. (#213)

View File

@@ -16,7 +16,9 @@ import sys, os
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('.'))
# for autodocs
sys.path.insert(0, os.path.abspath(os.path.join('..', '..')))
# -- General configuration -----------------------------------------------------
@@ -25,7 +27,7 @@ import sys, os
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ['sphinx.ext.todo']
extensions = ['sphinx.ext.todo', 'sphinx.ext.autodoc']
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']

View File

@@ -0,0 +1,5 @@
core.app
========
.. automodule:: core.app
:members:

View File

@@ -0,0 +1,5 @@
core.directories
================
.. automodule:: core.directories
:members:

View File

@@ -0,0 +1,36 @@
core.engine
===========
.. automodule:: core.engine
.. autoclass:: Match
.. autoclass:: Group
:members:
.. autofunction:: build_word_dict
.. autofunction:: compare
.. autofunction:: compare_fields
.. autofunction:: getmatches
.. autofunction:: getmatches_by_contents
.. autofunction:: get_groups
.. autofunction:: merge_similar_words
.. autofunction:: reduce_common_words
.. _fields:
Fields
------
Fields are groups of words which each represent a significant part of the whole name. This concept
is sifnificant in music file names, where we often have names like "My Artist - a very long title
with many many words".
This title has 10 words. If you run as scan with a bit of tolerance, let's say 90%, you'll be able
to find a dupe that has only one "many" in the song title. However, you would also get false
duplicates from a title like "My Giraffe - a very long title with many many words", which is of
course a very different song and it doesn't make sense to match them.
When matching by fields, each field (separated by "-") is considered as a separate string to match
independently. After all fields are matched, the lowest result is kept. In the "Giraffe" example we
gave, the result would be 50% instead of 90% in normal mode.

View File

@@ -0,0 +1,5 @@
core.fs
=======
.. automodule:: core.fs
:members:

View File

@@ -0,0 +1,5 @@
core.gui
========
.. automodule:: core.gui
:members:

View File

@@ -0,0 +1,5 @@
core.results
============
.. automodule:: core.results
:members:

View File

@@ -23,6 +23,8 @@ codebase. For example, when performing "Remove Selected From Results",
``base.app.DupeGuru.remove_duplicates()`` on the PyQt side, are respectively called to perform the
thing. All of this is quite ugly, I know (see the "Refactoring" section below).
.. _jobs:
Jobs
----
@@ -44,3 +46,16 @@ a list of matches and returns a list of ``Group`` instances (a ``Group`` is basi
When a scan is over, the final result (the list of groups from ``get_groups()``) is placed into
``app.DupeGuru.results``, which is a ``results.Results`` instance. The ``Results`` instance is where
all the dupe marking, sorting, removing, power marking, etc. takes place.
API
---
.. toctree::
:maxdepth: 2
core/app
core/fs
core/engine
core/directories
core/results
core/gui

View File

@@ -54,6 +54,6 @@ Contents:
results
reprioritize
faq
developer
developer/index
changelog
credits

View File

@@ -45,6 +45,8 @@ The dupeGuru results, when in normal mode, are sorted according to duplicate gro
* Hold Shift and click on it.
* Press Space to mark all selected duplicates.
.. _deltavalues:
Delta Values
------------

View File

@@ -9,7 +9,9 @@
import os.path as op
import re
from .build import print_and_do, read_changelog_file, filereplace
from pkg_resources import load_entry_point
from .build import read_changelog_file, filereplace
CHANGELOG_FORMAT = """
{version} ({date})
@@ -58,5 +60,10 @@ def gen(basepath, destpath, changelogpath, tixurl, confrepl=None, confpath=None,
filereplace(changelogtmpl, changelog_out, changelog='\n'.join(rendered_logs))
conf_out = op.join(basepath, 'conf.py')
filereplace(confpath, conf_out, **confrepl)
cmd = 'sphinx-build "{}" "{}"'.format(basepath, destpath)
print_and_do(cmd)
# We used to call sphinx-build with print_and_do(), but the problem was that the virtualenv
# of the calling python wasn't correctly considered and caused problems with documentation
# relying on autodoc (which tries to import the module to auto-document, but fail because of
# missing dependencies which are in the virtualenv). Here, we do exactly what is done when
# calling the command from bash.
cmd = load_entry_point('Sphinx', 'console_scripts', 'sphinx-build')
cmd(['sphinx-build', basepath, destpath])

View File

@@ -80,6 +80,9 @@ class DupeGuru(DupeGuruBase):
def _setup(self):
self.model.directories.fileclasses = [File]
DupeGuruBase._setup(self)
self.directories_dialog.menuFile.insertAction(
self.directories_dialog.actionLoadResults, self.resultWindow.actionClearPictureCache
)
def _update_options(self):
DupeGuruBase._update_options(self)

View File

@@ -17,6 +17,7 @@ class ResultWindow(ResultWindowBase):
def _setupMenu(self):
ResultWindowBase._setupMenu(self)
self.actionClearPictureCache = QAction(tr("Clear Picture Cache"), self)
self.actionClearPictureCache.setShortcut('Ctrl+Shift+P')
self.menuFile.insertAction(self.actionSaveResults, self.actionClearPictureCache)
self.actionClearPictureCache.triggered.connect(self.clearPictureCacheTriggered)