Fixed nasty crash during PE's Cocoa block scanning

Using PyUnicode_GET_SIZE was obviously wrong, but I'm guessing that the str changes in py3.3 made that wrongness significant...
Ah crap, another Cocoa fatal mistake
2026-01-25 16:11:39 +00:00 · 2013-08-26 07:17:02 -04:00 · 2013-08-25 17:10:26 -04:00 · 2013-08-25 10:53:08 -04:00 · 2013-08-25 10:47:10 -04:00 · 2013-08-20 22:52:43 -04:00
28 changed files with 492 additions and 76 deletions
--- a/cocoa/inter/app_pe.py
+++ b/cocoa/inter/app_pe.py
@@ -259,8 +259,8 @@ class DupeGuruPE(DupeGuruBase):
            return self.path2aperture.get(path)
        return DupeGuruBase._create_file(self, path)
    
-    def _job_completed(self, jobid, exc):
-        DupeGuruBase._job_completed(self, jobid, exc)
+    def _job_completed(self, jobid):
+        DupeGuruBase._job_completed(self, jobid)
        if jobid == JobType.Load:
            if hasattr(self, 'path2iphoto'):
                del self.path2iphoto
--- a/cocoa/inter/app_se.py
+++ b/cocoa/inter/app_se.py
@@ -14,10 +14,9 @@ from hscommon.path import Path
 from cocoa import proxy

 from core.scanner import ScanType
-from core import fs
 from core.directories import Directories as DirectoriesBase, DirectoryState
 from core_se.app import DupeGuru as DupeGuruBase
-from core_se.fs import File
+from core_se import fs
 from .app import PyDupeGuruBase

 def is_bundle(str_path):
@@ -36,7 +35,8 @@ class Directories(DirectoriesBase):
    ROOT_PATH_TO_EXCLUDE = list(map(Path, ['/Library', '/Volumes', '/System', '/bin', '/sbin', '/opt', '/private', '/dev']))
    HOME_PATH_TO_EXCLUDE = [Path('Library')]
    def __init__(self):
-        DirectoriesBase.__init__(self, fileclasses=[Bundle, File])
+        DirectoriesBase.__init__(self, fileclasses=[Bundle, fs.File])
+        self.folderclass = fs.Folder
    
    def _default_state_for_path(self, path):
        result = DirectoriesBase._default_state_for_path(self, path)
--- a/core/app.py
+++ b/core/app.py
@@ -96,6 +96,32 @@ def cmp_value(dupe, attrname):
    return value.lower() if isinstance(value, str) else value

 class DupeGuru(RegistrableApplication, Broadcaster):
+    """Holds everything together.
+    
+    Instantiated once per running application, it holds a reference to every high-level object
+    whose reference needs to be held: :class:`Results`, :class:`Scanner`,
+    :class:`~core.directories.Directories`, :mod:`core.gui` instances, etc..
+    
+    It also hosts high level methods and acts as a coordinator for all those elements.
+    
+    .. attribute:: directories
+    
+        Instance of :class:`~core.directories.Directories`. It holds the current folder selection.
+    
+    .. attribute:: results
+    
+        Instance of :class:`core.results.Results`. Holds the results of the latest scan.
+    
+    .. attribute:: selected_dupes
+    
+        List of currently selected dupes from our :attr:`results`. Whenever the user changes its
+        selection at the UI level, :attr:`result_table` takes care of updating this attribute, so
+        you can trust that it's always up-to-date.
+    
+    .. attribute:: result_table
+    
+        Instance of :mod:`meta-gui <core.gui>` table listing the results from :attr:`results`
+    """
    #--- View interface
    # open_path(path)
    # reveal_path(path)
@@ -299,6 +325,12 @@ class DupeGuru(RegistrableApplication, Broadcaster):
            
    #--- Public
    def add_directory(self, d):
+        """Adds folder ``d`` to :attr:`directories`.
+        
+        Shows an error message dialog if something bad happens.
+        
+        :param str d: path of folder to add
+        """
        try:
            self.directories.add_path(Path(d))
            self.notify('directories_changed')
@@ -308,6 +340,8 @@ class DupeGuru(RegistrableApplication, Broadcaster):
            self.view.show_message(tr("'{}' does not exist.").format(d))
    
    def add_selected_to_ignore_list(self):
+        """Adds :attr:`selected_dupes` to :attr:`scanner`'s ignore list.
+        """
        dupes = self.without_ref(self.selected_dupes)
        if not dupes:
            self.view.show_message(MSG_NO_SELECTED_DUPES)
@@ -324,6 +358,10 @@ class DupeGuru(RegistrableApplication, Broadcaster):
        self.ignore_list_dialog.refresh()
    
    def apply_filter(self, filter):
+        """Apply a filter ``filter`` to the results so that it shows only dupe groups that match it.
+        
+        :param str filter: filter to apply
+        """
        self.results.apply_filter(None)
        if self.options['escape_filter_regexp']:
            filter = escape(filter, set('()[]\\.|+?^'))
@@ -359,6 +397,10 @@ class DupeGuru(RegistrableApplication, Broadcaster):
            self.clean_empty_dirs(source_path[:-1])
    
    def copy_or_move_marked(self, copy):
+        """Start an async move (or copy) job on marked duplicates.
+        
+        :param bool copy: If True, duplicates will be copied instead of moved
+        """
        def do(j):
            def op(dupe):
                j.add_progress()
@@ -381,6 +423,8 @@ class DupeGuru(RegistrableApplication, Broadcaster):
            self._start_job(jobid, do)
    
    def delete_marked(self):
+        """Start an async job to send marked duplicates to the trash.
+        """
        if not self._check_demo():
            return
        if not self.results.mark_count:
@@ -416,11 +460,11 @@ class DupeGuru(RegistrableApplication, Broadcaster):
            return empty_data()
    
    def invoke_custom_command(self):
-        """Calls command in 'CustomCommand' pref with %d and %r placeholders replaced.
+        """Calls command in ``CustomCommand`` pref with ``%d`` and ``%r`` placeholders replaced.
        
-        Using the current selection, %d is replaced with the currently selected dupe and %r is
-        replaced with that dupe's ref file. If there's no selection, the command is not invoked.
-        If the dupe is a ref, %d and %r will be the same.
+        Using the current selection, ``%d`` is replaced with the currently selected dupe and ``%r``
+        is replaced with that dupe's ref file. If there's no selection, the command is not invoked.
+        If the dupe is a ref, ``%d`` and ``%r`` will be the same.
        """
        cmd = self.view.get_default('CustomCommand')
        if not cmd:
@@ -453,6 +497,10 @@ class DupeGuru(RegistrableApplication, Broadcaster):
        self.ignore_list_dialog.refresh()
    
    def load_from(self, filename):
+        """Start an async job to load results from ``filename``.
+        
+        :param str filename: path of the XML file (created with :meth:`save_as`) to load
+        """
        def do(j):
            self.results.load_from_xml(filename, self._get_file, j)
        self._start_job(JobType.Load, do)
@@ -503,6 +551,8 @@ class DupeGuru(RegistrableApplication, Broadcaster):
        self.notify('marking_changed')
    
    def open_selected(self):
+        """Open :attr:`selected_dupes` with their associated application.
+        """
        if len(self.selected_dupes) > 10:
            if not self.view.ask_yes_no(MSG_MANY_FILES_TO_OPEN):
                return
@@ -527,6 +577,8 @@ class DupeGuru(RegistrableApplication, Broadcaster):
        self.notify('results_changed_but_keep_selection')
    
    def remove_marked(self):
+        """Removed marked duplicates from the results (without touching the files themselves).
+        """
        if not self.results.mark_count:
            self.view.show_message(MSG_NO_MARKED_DUPES)
            return
@@ -537,6 +589,8 @@ class DupeGuru(RegistrableApplication, Broadcaster):
        self._results_changed()
    
    def remove_selected(self):
+        """Removed :attr:`selected_dupes` from the results (without touching the files themselves).
+        """
        dupes = self.without_ref(self.selected_dupes)
        if not dupes:
            self.view.show_message(MSG_NO_SELECTED_DUPES)
@@ -577,9 +631,17 @@ class DupeGuru(RegistrableApplication, Broadcaster):
        self.notify('save_session')
    
    def save_as(self, filename):
+        """Save results in ``filename``.
+        
+        :param str filename: path of the file to save results (as XML) to.
+        """
        self.results.save_to_xml(filename)
    
    def start_scanning(self):
+        """Starts an async job to scan for duplicates.
+        
+        Scans folders selected in :attr:`directories` and put the results in :attr:`results`
+        """
        def do(j):
            j.set_progress(0, tr("Collecting files to scan"))
            if self.scanner.scan_type == scanner.ScanType.Folders:
@@ -611,6 +673,8 @@ class DupeGuru(RegistrableApplication, Broadcaster):
        self.notify('marking_changed')
    
    def without_ref(self, dupes):
+        """Returns ``dupes`` with all reference elements removed.
+        """
        return [dupe for dupe in dupes if self.results.get_group_of_duplicate(dupe).ref is not dupe]
    
    def get_default(self, key, fallback_value=None):
--- a/core/directories.py
+++ b/core/directories.py
@@ -15,7 +15,20 @@ from hscommon.util import FileOrPath

 from . import fs

+__all__ = [
+    'Directories',
+    'DirectoryState',
+    'AlreadyThereError',
+    'InvalidPathError',
+]
+
 class DirectoryState:
+    """Enum describing how a folder should be considered.
+    
+    * DirectoryState.Normal: Scan all files normally
+    * DirectoryState.Reference: Scan files, but make sure never to delete any of them
+    * DirectoryState.Excluded: Don't scan this folder
+    """
    Normal = 0
    Reference = 1
    Excluded = 2
@@ -27,11 +40,20 @@ class InvalidPathError(Exception):
    """The path being added is invalid"""

 class Directories:
+    """Holds user folder selection.
+    
+    Manages the selection that the user make through the folder selection dialog. It also manages
+    folder states, and how recursion applies to them.
+    
+    Then, when the user starts the scan, :meth:`get_files` is called to retrieve all files (wrapped
+    in :mod:`core.fs`) that have to be scanned according to the chosen folders/states.
+    """
    #---Override
    def __init__(self, fileclasses=[fs.File]):
        self._dirs = []
        self.states = {}
        self.fileclasses = fileclasses
+        self.folderclass = fs.Folder
    
    def __contains__(self, path):
        for p in self._dirs:
@@ -97,11 +119,14 @@ class Directories:
    
    #---Public
    def add_path(self, path):
-        """Adds 'path' to self, if not already there.
+        """Adds ``path`` to self, if not already there.
        
-        Raises AlreadyThereError if 'path' is already in self. If path is a directory containing
-        some of the directories already present in self, 'path' will be added, but all directories
-        under it will be removed. Can also raise InvalidPathError if 'path' does not exist.
+        Raises :exc:`AlreadyThereError` if ``path`` is already in self. If path is a directory
+        containing some of the directories already present in self, ``path`` will be added, but all
+        directories under it will be removed. Can also raise :exc:`InvalidPathError` if ``path``
+        does not exist.
+        
+        :param Path path: path to add
        """
        if path in self:
            raise AlreadyThereError()
@@ -112,7 +137,11 @@ class Directories:
    
    @staticmethod
    def get_subfolders(path):
-        """returns a sorted list of paths corresponding to subfolders in `path`"""
+        """Returns a sorted list of paths corresponding to subfolders in ``path``.
+        
+        :param Path path: get subfolders from there
+        :rtype: list of Path
+        """
        try:
            names = [name for name in path.listdir() if (path + name).isdir()]
            names.sort(key=lambda x:x.lower())
@@ -123,7 +152,7 @@ class Directories:
    def get_files(self, j=job.nulljob):
        """Returns a list of all files that are not excluded.
        
-        Returned files also have their 'is_ref' attr set.
+        Returned files also have their ``is_ref`` attr set if applicable.
        """
        for path in self._dirs:
            for file in self._get_files(path, j):
@@ -132,15 +161,17 @@ class Directories:
    def get_folders(self, j=job.nulljob):
        """Returns a list of all folders that are not excluded.
        
-        Returned folders also have their 'is_ref' attr set.
+        Returned folders also have their ``is_ref`` attr set if applicable.
        """
        for path in self._dirs:
-            from_folder = fs.Folder(path)
+            from_folder = self.folderclass(path)
            for folder in self._get_folders(from_folder, j):
                yield folder
    
    def get_state(self, path):
-        """Returns the state of 'path' (One of the STATE_* const.)
+        """Returns the state of ``path``.
+        
+        :rtype: :class:`DirectoryState`
        """
        if path in self.states:
            return self.states[path]
@@ -154,6 +185,12 @@ class Directories:
            return DirectoryState.Normal
    
    def has_any_file(self):
+        """Returns whether selected folders contain any file.
+        
+        Because it stops at the first file it finds, it's much faster than get_files().
+        
+        :rtype: bool
+        """
        try:
            next(self.get_files())
            return True
@@ -161,6 +198,10 @@ class Directories:
            return False
    
    def load_from_file(self, infile):
+        """Load folder selection from ``infile``.
+        
+        :param file infile: path or file pointer to XML generated through :meth:`save_to_file`
+        """
        try:
            root = ET.parse(infile).getroot()
        except Exception:
@@ -183,6 +224,10 @@ class Directories:
            self.set_state(Path(path), int(state))
    
    def save_to_file(self, outfile):
+        """Save folder selection as XML to ``outfile``.
+        
+        :param file outfile: path or file pointer to XML file to save to.
+        """
        with FileOrPath(outfile, 'wb') as fp:
            root = ET.Element('directories')
            for root_path in self:
@@ -196,6 +241,12 @@ class Directories:
            tree.write(fp, encoding='utf-8')
    
    def set_state(self, path, state):
+        """Set the state of folder at ``path``.
+        
+        :param Path path: path of the target folder
+        :param state: state to set folder to
+        :type state: :class:`DirectoryState`
+        """
        if self.get_state(path) == state:
            return
        # we don't want to needlessly fill self.states. if get_state returns the same thing
--- a/core/engine.py
+++ b/core/engine.py
@@ -44,10 +44,10 @@ def unpack_fields(fields):
    return result

 def compare(first, second, flags=()):
-    """Returns the % of words that match between first and second
+    """Returns the % of words that match between ``first`` and ``second``
    
-    The result is a int in the range 0..100.
-    First and second can be either a string or a list.
+    The result is a ``int`` in the range 0..100.
+    ``first`` and ``second`` can be either a string or a list (of words).
    """
    if not (first and second):
        return 0
@@ -76,9 +76,10 @@ def compare(first, second, flags=()):
    return result

 def compare_fields(first, second, flags=()):
-    """Returns the score for the lowest matching fields.
+    """Returns the score for the lowest matching :ref:`fields`.
    
-    first and second must be lists of lists of string.
+    ``first`` and ``second`` must be lists of lists of string. Each sub-list is then compared with
+    :func:`compare`. 
    """
    if len(first) != len(second):
        return 0
@@ -98,13 +99,14 @@ def compare_fields(first, second, flags=()):
            if matched_field:
                second.remove(matched_field)
    else:
-        results = [compare(word1, word2, flags) for word1, word2 in zip(first, second)]
+        results = [compare(field1, field2, flags) for field1, field2 in zip(first, second)]
    return min(results) if results else 0

 def build_word_dict(objects, j=job.nulljob):
    """Returns a dict of objects mapped by their words.
    
-    objects must have a 'words' attribute being a list of strings or a list of lists of strings.
+    objects must have a ``words`` attribute being a list of strings or a list of lists of strings
+    (:ref:`fields`).
    
    The result will be a dict with words as keys, lists of objects as values.
    """
@@ -115,7 +117,11 @@ def build_word_dict(objects, j=job.nulljob):
    return result

 def merge_similar_words(word_dict):
-    """Take all keys in word_dict that are similar, and merge them together.
+    """Take all keys in ``word_dict`` that are similar, and merge them together.
+    
+    ``word_dict`` has been built with :func:`build_word_dict`. Similarity is computed with Python's
+    ``difflib.get_close_matches()``, which computes the number of edits that are necessary to make
+    a word equal to the other.
    """
    keys = list(word_dict.keys())
    keys.sort(key=len)# we want the shortest word to stay
@@ -131,7 +137,9 @@ def merge_similar_words(word_dict):
            keys.remove(similar)

 def reduce_common_words(word_dict, threshold):
-    """Remove all objects from word_dict values where the object count >= threshold
+    """Remove all objects from ``word_dict`` values where the object count >= ``threshold``
+    
+    ``word_dict`` has been built with :func:`build_word_dict`.
    
    The exception to this removal are the objects where all the words of the object are common.
    Because if we remove them, we will miss some duplicates!
@@ -150,13 +158,42 @@ def reduce_common_words(word_dict, threshold):
            del word_dict[word]

 Match = namedtuple('Match', 'first second percentage')
+Match.__doc__ = """Represents a match between two :class:`~core.fs.File`.
+
+Regarless of the matching method, when two files are determined to match, a Match pair is created,
+which holds, of course, the two matched files, but also their match "level".
+
+.. attribute:: first
+
+    first file of the pair.
+
+.. attribute:: second
+
+    second file of the pair.
+
+.. attribute:: percentage
+
+    their match level according to the scan method which found the match. int from 1 to 100. For
+    exact scan methods, such as Contents scans, this will always be 100.
+"""
+
 def get_match(first, second, flags=()):
    #it is assumed here that first and second both have a "words" attribute
    percentage = compare(first.words, second.words, flags)
    return Match(first, second, percentage)

-def getmatches(objects, min_match_percentage=0, match_similar_words=False, weight_words=False, 
-    no_field_order=False, j=job.nulljob):
+def getmatches(
+        objects, min_match_percentage=0, match_similar_words=False, weight_words=False, 
+        no_field_order=False, j=job.nulljob):
+    """Returns a list of :class:`Match` within ``objects`` after fuzzily matching their words.
+    
+    :param objects: List of :class:`~core.fs.File` to match.
+    :param int min_match_percentage: minimum % of words that have to match.
+    :param bool match_similar_words: make similar words (see :func:`merge_similar_words`) match.
+    :param bool weight_words: longer words are worth more in match % computations.
+    :param bool no_field_order: match :ref:`fields` regardless of their order.
+    :param j: A :ref:`job progress instance <jobs>`.
+    """
    COMMON_WORD_THRESHOLD = 50
    LIMIT = 5000000
    j = j.start_subjob(2)
@@ -203,6 +240,14 @@ def getmatches(objects, min_match_percentage=0, match_similar_words=False, weigh
    return result

 def getmatches_by_contents(files, sizeattr='size', partial=False, j=job.nulljob):
+    """Returns a list of :class:`Match` within ``files`` if their contents is the same.
+    
+    :param str sizeattr: attibute name of the :class:`~core.fs.file` that returns the size of the
+                         file to use for comparison.
+    :param bool partial: if true, will use the "md5partial" attribute instead of "md5" to compute
+                         contents hash.
+    :param j: A :ref:`job progress instance <jobs>`.
+    """
    j = j.start_subjob([2, 8])
    size2files = defaultdict(set)
    for file in j.iter_with_progress(files, tr("Read size of %d/%d files")):
@@ -224,6 +269,32 @@ def getmatches_by_contents(files, sizeattr='size', partial=False, j=job.nulljob)
    return result

 class Group:
+    """A group of :class:`~core.fs.File` that match together.
+
+    This manages match pairs into groups and ensures that all files in the group match to each
+    other.
+    
+    .. attribute:: ref
+    
+        The "reference" file, which is the file among the group that isn't going to be deleted.
+    
+    .. attribute:: ordered
+    
+        Ordered list of duplicates in the group (including the :attr:`ref`).
+    
+    .. attribute:: unordered
+    
+        Set duplicates in the group (including the :attr:`ref`).
+    
+    .. attribute:: dupes
+    
+        An ordered list of the group's duplicate, without :attr:`ref`. Equivalent to
+        ``ordered[1:]``
+    
+    .. attribute:: percentage
+    
+        Average match percentage of match pairs containing :attr:`ref`.
+    """
    #---Override
    def __init__(self):
        self._clear()
@@ -257,6 +328,15 @@ class Group:
    
    #---Public
    def add_match(self, match):
+        """Adds ``match`` to internal match list and possibly add duplicates to the group.
+        
+        A duplicate can only be considered as such if it matches all other duplicates in the group.
+        This method registers that pair (A, B) represented in ``match`` as possible candidates and,
+        if A and/or B end up matching every other duplicates in the group, add these duplicates to
+        the group.
+        
+        :param tuple match: pair of :class:`~core.fs.File` to add
+        """
        def add_candidate(item, match):
            matches = self.candidates[item]
            matches.add(match)
@@ -276,12 +356,18 @@ class Group:
        self._matches_for_ref = None
    
    def discard_matches(self):
+        """Remove all recorded matches that didn't result in a duplicate being added to the group.
+        
+        You can call this after the duplicate scanning process to free a bit of memory.
+        """
        discarded = set(m for m in self.matches if not all(obj in self.unordered for obj in [m.first, m.second]))
        self.matches -= discarded
        self.candidates = defaultdict(set)
        return discarded
    
    def get_match_of(self, item):
+        """Returns the match pair between ``item`` and :attr:`ref`.
+        """
        if item is self.ref:
            return
        for m in self._get_matches_for_ref():
@@ -289,6 +375,12 @@ class Group:
                return m
    
    def prioritize(self, key_func, tie_breaker=None):
+        """Reorders :attr:`ordered` according to ``key_func``.
+        
+        :param key_func: Key (f(x)) to be used for sorting
+        :param tie_breaker: function to be used to select the reference position in case the top
+                            duplicates have the same key_func() result.
+        """
        # tie_breaker(ref, dupe) --> True if dupe should be ref
        # Returns True if anything changed during prioritization.
        master_key_func = lambda x: (-x.is_ref, key_func(x))
@@ -324,6 +416,8 @@ class Group:
            pass
    
    def switch_ref(self, with_dupe):
+        """Make the :attr:`ref` dupe of the group switch position with ``with_dupe``.
+        """
        if self.ref.is_ref:
            return False
        try:
@@ -354,6 +448,10 @@ class Group:
    

 def get_groups(matches, j=job.nulljob):
+    """Returns a list of :class:`Group` from ``matches``.
+    
+    Create groups out of match pairs in the smartest way possible.
+    """
    matches.sort(key=lambda match: -match.percentage)
    dupe2group = {}
    groups = []
--- a/core/fs.py
+++ b/core/fs.py
@@ -16,6 +16,18 @@ import logging

 from hscommon.util import nonone, get_file_ext

+__all__ = [
+    'File',
+    'Folder',
+    'get_file',
+    'get_files',
+    'FSError',
+    'AlreadyExistsError',
+    'InvalidPath',
+    'InvalidDestinationError',
+    'OperationError',
+]
+
 NOT_SET = object()

 class FSError(Exception):
@@ -50,6 +62,8 @@ class OperationError(FSError):
    cls_message = "Operation on '{name}' failed."

 class File:
+    """Represents a file and holds metadata to be used for scanning.
+    """
    INITIAL_INFO = {
        'size': 0,
        'mtime': 0,
@@ -129,6 +143,8 @@ class File:
    #--- Public
    @classmethod
    def can_handle(cls, path):
+        """Returns whether this file wrapper class can handle ``path``.
+        """
        return not path.islink() and path.isfile()
    
    def rename(self, newname):
@@ -205,7 +221,7 @@ class Folder(File):
        if self._subfolders is None:
            subpaths = [self.path + name for name in self.path.listdir()]
            subfolders = [p for p in subpaths if not p.islink() and p.isdir()]
-            self._subfolders = [Folder(p) for p in subfolders]
+            self._subfolders = [self.__class__(p) for p in subfolders]
        return self._subfolders
    
    @classmethod
@@ -214,11 +230,23 @@ class Folder(File):
    

 def get_file(path, fileclasses=[File]):
+    """Wraps ``path`` around its appropriate :class:`File` class.
+    
+    Whether a class is "appropriate" is decided by :meth:`File.can_handle`
+    
+    :param Path path: path to wrap
+    :param fileclasses: List of candidate :class:`File` classes
+    """
    for fileclass in fileclasses:
        if fileclass.can_handle(path):
            return fileclass(path)

 def get_files(path, fileclasses=[File]):
+    """Returns a list of :class:`File` for each file contained in ``path``.
+    
+    :param Path path: path to scan
+    :param fileclasses: List of candidate :class:`File` classes
+    """
    assert all(issubclass(fileclass, File) for fileclass in fileclasses)
    def combine_paths(p1, p2):
        try:
--- a/core/gui/init.py
+++ b/core/gui/init.py
@@ -0,0 +1,15 @@
+"""
+Meta GUI elements in dupeGuru
+-----------------------------
+
+dupeGuru is designed with a `cross-toolkit`_ approach in mind. It means that its core code
+(which doesn't depend on any GUI toolkit) has elements which preformat core information in a way
+that makes it easy for a UI layer to consume.
+
+For example, we have :class:`~core.gui.ResultTable` which takes information from
+:class:`~core.results.Results` and mashes it in rows and columns which are ready to be fetched by
+either Cocoa's ``NSTableView`` or Qt's ``QTableView``. It tells them which cell is supposed to be
+blue, which is supposed to be orange, does the sorting logic, holds selection, etc..
+
+.. _cross-toolkit: http://www.hardcoded.net/articles/cross-toolkit-software
+"""
--- a/core/results.py
+++ b/core/results.py
@@ -21,6 +21,19 @@ from . import engine
 from .markable import Markable

 class Results(Markable):
+    """Manages a collection of duplicate :class:`~core.engine.Group`.
+    
+    This class takes care or marking, sorting and filtering duplicate groups.
+    
+    .. attribute:: groups
+    
+        The list of :class:`~core.engine.Group` contained managed by this instance.
+    
+    .. attribute:: dupes
+    
+        A list of all duplicates (:class:`~core.fs.File` instances), without ref, contained in the
+        currently managed :attr:`groups`.
+    """
    #---Override
    def __init__(self, app):
        Markable.__init__(self)
@@ -145,17 +158,17 @@ class Results(Markable):
    
    #---Public
    def apply_filter(self, filter_str):
-        ''' Applies a filter 'filter_str' to self.groups
+        """Applies a filter ``filter_str`` to :attr:`groups`
        
-            When you apply the filter, only  dupes with the filename matching 'filter_str' will be in
-            in the results. To cancel the filter, just call apply_filter with 'filter_str' to None, 
-            and the results will go back to normal.
+        When you apply the filter, only  dupes with the filename matching ``filter_str`` will be in
+        in the results. To cancel the filter, just call apply_filter with ``filter_str`` to None, 
+        and the results will go back to normal.
            
-            If call apply_filter on a filtered results, the filter will be applied 
-            *on the filtered results*.
+        If call apply_filter on a filtered results, the filter will be applied 
+        *on the filtered results*.
            
-            'filter_str' is a string containing a regexp to filter dupes with.
-        '''
+        :param str filter_str: a string containing a regexp to filter dupes with.
+        """
        if not filter_str:
            self.__filtered_dupes = None
            self.__filtered_groups = None
@@ -182,6 +195,8 @@ class Results(Markable):
        self.__dupes = None
    
    def get_group_of_duplicate(self, dupe):
+        """Returns :class:`~core.engine.Group` in which ``dupe`` belongs.
+        """
        try:
            return self.__group_of_duplicate[dupe]
        except (TypeError, KeyError):
@@ -190,6 +205,12 @@ class Results(Markable):
    is_markable = _is_markable
    
    def load_from_xml(self, infile, get_file, j=nulljob):
+        """Load results from ``infile``.
+        
+        :param infile: a file or path pointing to an XML file created with :meth:`save_to_xml`.
+        :param get_file: a function f(path) returning a :class:`~core.fs.File` wrapping the path.
+        :param j: A :ref:`job progress instance <jobs>`.
+        """
        def do_match(ref_file, other_files, group):
            if not other_files:
                return
@@ -242,6 +263,8 @@ class Results(Markable):
        self.is_modified = False
    
    def make_ref(self, dupe):
+        """Make ``dupe`` take the :attr:`~core.engine.Group.ref` position of its group.
+        """
        g = self.get_group_of_duplicate(dupe)
        r = g.ref
        if not g.switch_ref(dupe):
@@ -258,8 +281,14 @@ class Results(Markable):
        return True
    
    def perform_on_marked(self, func, remove_from_results):
-        # Performs `func` on all marked dupes. If an EnvironmentError is raised during the call,
-        # the problematic dupe is added to self.problems.
+        """Performs ``func`` on all marked dupes.
+        
+        If an ``EnvironmentError`` is raised during the call, the problematic dupe is added to
+        self.problems.
+        
+        :param bool remove_from_results: If true, dupes which had ``func`` applied and didn't cause
+                                         any problem.
+        """
        self.problems = []
        to_remove = []
        marked = (dupe for dupe in self.dupes if self.is_marked(dupe))
@@ -276,8 +305,10 @@ class Results(Markable):
                self.mark(dupe)
    
    def remove_duplicates(self, dupes):
-        '''Remove 'dupes' from their respective group, and remove the group is it ends up empty.
-        '''
+        """Remove ``dupes`` from their respective :class:`~core.engine.Group`.
+        
+        Also, remove the group from :attr:`groups` if it ends up empty.
+        """
        affected_groups = set()
        for dupe in dupes:
            group = self.get_group_of_duplicate(dupe)
@@ -302,9 +333,12 @@ class Results(Markable):
        self.is_modified = bool(self.__groups)
    
    def save_to_xml(self, outfile):
+        """Save results to ``outfile`` in XML.
+        
+        :param outfile: file object or path.
+        """
        self.apply_filter(None)
        root = ET.Element('results')
-        # writer = XMLGenerator(outfile, 'utf-8')
        for g in self.groups:
            group_elem = ET.SubElement(root, 'group')
            dupe2index = {}
@@ -349,13 +383,26 @@ class Results(Markable):
        self.is_modified = False
    
    def sort_dupes(self, key, asc=True, delta=False):
+        """Sort :attr:`dupes` according to ``key``.
+        
+        :param str key: key attribute name to sort with.
+        :param bool asc: If false, sorting is reversed.
+        :param bool delta: If true, sorting occurs using :ref:`delta values <deltavalues>`.
+        """
        if not self.__dupes:
            self.__get_dupe_list()
        keyfunc = lambda d: self.app._get_dupe_sort_key(d, lambda: self.get_group_of_duplicate(d), key, delta)
        self.__dupes.sort(key=keyfunc, reverse=not asc)
        self.__dupes_sort_descriptor = (key,asc,delta)
    
-    def sort_groups(self,key,asc=True):
+    def sort_groups(self, key, asc=True):
+        """Sort :attr:`groups` according to ``key``.
+        
+        The :attr:`~core.engine.Group.ref` of each group is used to extract values for sorting.
+        
+        :param str key: key attribute name to sort with.
+        :param bool asc: If false, sorting is reversed.
+        """
        keyfunc = lambda g: self.app._get_group_sort_key(g, key)
        self.groups.sort(key=keyfunc, reverse=not asc)
        self.__groups_sort_descriptor = (key,asc)
--- a/core_pe/init.py
+++ b/core_pe/init.py
@@ -1,2 +1,2 @@
-__version__ = '2.7.1'
+__version__ = '2.8.0'
 __appname__ = 'dupeGuru Picture Edition'
--- a/core_pe/modules/block_osx.m
+++ b/core_pe/modules/block_osx.m
@@ -32,7 +32,7 @@ pystring2cfstring(PyObject *pystring)
    }
    
    s = (UInt8*)PyBytes_AS_STRING(encoded);
-    size = PyUnicode_GET_SIZE(encoded);
+    size = PyBytes_GET_SIZE(encoded);
    result = CFStringCreateWithBytes(NULL, s, size, kCFStringEncodingUTF8, FALSE);
    Py_DECREF(encoded);
    return result;
--- a/core_se/init.py
+++ b/core_se/init.py
@@ -1,2 +1,2 @@
-__version__ = '3.7.0'
+__version__ = '3.7.1'
 __appname__ = 'dupeGuru'
--- a/core_se/app.py
+++ b/core_se/app.py
@@ -17,6 +17,7 @@ class DupeGuru(DupeGuruBase):
    def __init__(self, view, appdata):
        DupeGuruBase.__init__(self, view, appdata)
        self.directories.fileclasses = [fs.File]
+        self.directories.folderclass = fs.Folder
    
    def _prioritization_categories(self):
        return prioritize.all_categories()
--- a/core_se/fs.py
+++ b/core_se/fs.py
@@ -11,29 +11,37 @@ from hscommon.util import format_size
 from core import fs
 from core.app import format_timestamp, format_perc, format_words, format_dupe_count

+def get_display_info(dupe, group, delta):
+    size = dupe.size
+    mtime = dupe.mtime
+    m = group.get_match_of(dupe)
+    if m:
+        percentage = m.percentage
+        dupe_count = 0
+        if delta:
+            r = group.ref
+            size -= r.size
+            mtime -= r.mtime
+    else:
+        percentage = group.percentage
+        dupe_count = len(group.dupes)
+    return {
+        'name': dupe.name,
+        'folder_path': str(dupe.folder_path),
+        'size': format_size(size, 0, 1, False),
+        'extension': dupe.extension,
+        'mtime': format_timestamp(mtime, delta and m),
+        'percentage': format_perc(percentage),
+        'words': format_words(dupe.words) if hasattr(dupe, 'words') else '',
+        'dupe_count': format_dupe_count(dupe_count),
+    }
+
 class File(fs.File):
    def get_display_info(self, group, delta):
-        size = self.size
-        mtime = self.mtime
-        m = group.get_match_of(self)
-        if m:
-            percentage = m.percentage
-            dupe_count = 0
-            if delta:
-                r = group.ref
-                size -= r.size
-                mtime -= r.mtime
-        else:
-            percentage = group.percentage
-            dupe_count = len(group.dupes)
-        return {
-            'name': self.name,
-            'folder_path': str(self.folder_path),
-            'size': format_size(size, 0, 1, False),
-            'extension': self.extension,
-            'mtime': format_timestamp(mtime, delta and m),
-            'percentage': format_perc(percentage),
-            'words': format_words(self.words) if hasattr(self, 'words') else '',
-            'dupe_count': format_dupe_count(dupe_count),
-        }
+        return get_display_info(self, group, delta)
+    
+
+class Folder(fs.Folder):
+    def get_display_info(self, group, delta):
+        return get_display_info(self, group, delta)
    
--- a/help/changelog_pe
+++ b/help/changelog_pe
@@ -1,3 +1,12 @@
+=== 2.8.0 (2013-08-25)
+
+* Improved delta values to support non-numerical values. (#213)
+* Improved the Re-Prioritize dialog's UI. (#224)
+* Added hardlink/symlink support on Windows Vista+. (#220)
+* Added keybinding for the "Clear Picture Cache" action. [Linux, Windows] (#230)
+* Dropped 32bit support on Mac OS X.
+* Added Vietnamese localization by Phan Anh.
+
 === 2.7.1 (2013-05-05)

 * Fixed false matching bug in EXIF matching. (#219)
--- a/help/changelog_se
+++ b/help/changelog_se
@@ -1,3 +1,7 @@
+=== 3.7.1 (2013-08-19)
+
+* Fixed folder scan type, which was broken in v3.7.0.
+
 === 3.7.0 (2013-08-17)

 * Improved delta values to support non-numerical values. (#213)
--- a/help/conf.tmpl
+++ b/help/conf.tmpl
@@ -16,7 +16,9 @@ import sys, os
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-#sys.path.insert(0, os.path.abspath('.'))
+
+# for autodocs
+sys.path.insert(0, os.path.abspath(os.path.join('..', '..')))

 # -- General configuration -----------------------------------------------------

@@ -25,7 +27,7 @@ import sys, os

 # Add any Sphinx extension module names here, as strings. They can be extensions
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ['sphinx.ext.todo']
+extensions = ['sphinx.ext.todo', 'sphinx.ext.autodoc']

 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
--- a/help/en/developer/core/app.rst
+++ b/help/en/developer/core/app.rst
@@ -0,0 +1,5 @@
+core.app
+========
+
+.. automodule:: core.app
+    :members:
--- a/help/en/developer/core/directories.rst
+++ b/help/en/developer/core/directories.rst
@@ -0,0 +1,5 @@
+core.directories
+================
+
+.. automodule:: core.directories
+    :members:
--- a/help/en/developer/core/engine.rst
+++ b/help/en/developer/core/engine.rst
@@ -0,0 +1,36 @@
+core.engine
+===========
+
+.. automodule:: core.engine
+    
+    .. autoclass:: Match
+    
+    .. autoclass:: Group
+        :members:
+    
+    .. autofunction:: build_word_dict
+    .. autofunction:: compare
+    .. autofunction:: compare_fields
+    .. autofunction:: getmatches
+    .. autofunction:: getmatches_by_contents
+    .. autofunction:: get_groups
+    .. autofunction:: merge_similar_words
+    .. autofunction:: reduce_common_words
+    
+.. _fields:
+
+Fields
+------
+
+Fields are groups of words which each represent a significant part of the whole name. This concept
+is sifnificant in music file names, where we often have names like "My Artist - a very long title
+with many many words".
+
+This title has 10 words. If you run as scan with a bit of tolerance, let's say 90%, you'll be able
+to find a dupe that has only one "many" in the song title. However, you would also get false
+duplicates from a title like "My Giraffe - a very long title with many many words", which is of
+course a very different song and it doesn't make sense to match them.
+
+When matching by fields, each field (separated by "-") is considered as a separate string to match
+independently. After all fields are matched, the lowest result is kept. In the "Giraffe" example we
+gave, the result would be 50% instead of 90% in normal mode.
--- a/help/en/developer/core/fs.rst
+++ b/help/en/developer/core/fs.rst
@@ -0,0 +1,5 @@
+core.fs
+=======
+
+.. automodule:: core.fs
+    :members:
--- a/help/en/developer/core/gui.rst
+++ b/help/en/developer/core/gui.rst
@@ -0,0 +1,5 @@
+core.gui
+========
+
+.. automodule:: core.gui
+    :members:
--- a/help/en/developer/core/results.rst
+++ b/help/en/developer/core/results.rst
@@ -0,0 +1,5 @@
+core.results
+============
+
+.. automodule:: core.results
+    :members:
--- a/help/en/developer/index.rst
+++ b/help/en/developer/index.rst
@@ -23,6 +23,8 @@ codebase. For example, when performing "Remove Selected From Results",
 ``base.app.DupeGuru.remove_duplicates()`` on the PyQt side, are respectively called to perform the
 thing. All of this is quite ugly, I know (see the "Refactoring" section below).

+.. _jobs:
+
 Jobs
 ----

@@ -44,3 +46,16 @@ a list of matches and returns a list of ``Group`` instances (a ``Group`` is basi
 When a scan is over, the final result (the list of groups from ``get_groups()``) is placed into
 ``app.DupeGuru.results``, which is a ``results.Results`` instance. The ``Results`` instance is where
 all the dupe marking, sorting, removing, power marking, etc. takes place.
+
+API
+---
+
+.. toctree::
+    :maxdepth: 2
+    
+    core/app
+    core/fs
+    core/engine
+    core/directories
+    core/results
+    core/gui
--- a/help/en/index.rst
+++ b/help/en/index.rst
@@ -54,6 +54,6 @@ Contents:
    results
    reprioritize
    faq
-    developer
+    developer/index
    changelog
    credits
--- a/help/en/results.rst
+++ b/help/en/results.rst
@@ -45,6 +45,8 @@ The dupeGuru results, when in normal mode, are sorted according to duplicate gro
 * Hold Shift and click on it.
 * Press Space to mark all selected duplicates.

+.. _deltavalues:
+
 Delta Values
 ------------

--- a/hscommon/sphinxgen.py
+++ b/hscommon/sphinxgen.py
@@ -9,7 +9,9 @@
 import os.path as op
 import re

-from .build import print_and_do, read_changelog_file, filereplace
+from pkg_resources import load_entry_point
+
+from .build import read_changelog_file, filereplace

 CHANGELOG_FORMAT = """
 {version} ({date})
@@ -58,5 +60,10 @@ def gen(basepath, destpath, changelogpath, tixurl, confrepl=None, confpath=None,
    filereplace(changelogtmpl, changelog_out, changelog='\n'.join(rendered_logs))
    conf_out = op.join(basepath, 'conf.py')
    filereplace(confpath, conf_out, **confrepl)
-    cmd = 'sphinx-build "{}" "{}"'.format(basepath, destpath)
-    print_and_do(cmd)
+    # We used to call sphinx-build with print_and_do(), but the problem was that the virtualenv
+    # of the calling python wasn't correctly considered and caused problems with documentation
+    # relying on autodoc (which tries to import the module to auto-document, but fail because of
+    # missing dependencies which are in the virtualenv). Here, we do exactly what is done when
+    # calling the command from bash.
+    cmd = load_entry_point('Sphinx', 'console_scripts', 'sphinx-build')
+    cmd(['sphinx-build', basepath, destpath])
--- a/qt/pe/app.py
+++ b/qt/pe/app.py
@@ -80,6 +80,9 @@ class DupeGuru(DupeGuruBase):
    def _setup(self):
        self.model.directories.fileclasses = [File]
        DupeGuruBase._setup(self)
+        self.directories_dialog.menuFile.insertAction(
+            self.directories_dialog.actionLoadResults, self.resultWindow.actionClearPictureCache
+        )
    
    def _update_options(self):
        DupeGuruBase._update_options(self)
--- a/qt/pe/result_window.py
+++ b/qt/pe/result_window.py
@@ -17,6 +17,7 @@ class ResultWindow(ResultWindowBase):
    def _setupMenu(self):
        ResultWindowBase._setupMenu(self)
        self.actionClearPictureCache = QAction(tr("Clear Picture Cache"), self)
+        self.actionClearPictureCache.setShortcut('Ctrl+Shift+P')
        self.menuFile.insertAction(self.actionSaveResults, self.actionClearPictureCache)
        self.actionClearPictureCache.triggered.connect(self.clearPictureCacheTriggered)
Author	SHA1	Message	Date
Virgil Dupras	350b2c64e0	Fixed nasty crash during PE's Cocoa block scanning Using PyUnicode_GET_SIZE was obviously wrong, but I'm guessing that the str changes in py3.3 made that wrongness significant...	2013-08-26 07:17:02 -04:00
Virgil Dupras	dcc57a7afb	Ah crap, another Cocoa fatal mistake	2013-08-25 17:10:26 -04:00
Virgil Dupras	8b510994ad	pe v2.8.0	2013-08-25 10:53:08 -04:00
Virgil Dupras	4a4d1bbfcd	Eased "Clear Picture Cache" triggering under Qt Added a keybinding and added the action to the directories dialog's menu (it was previously only in the results window's menu). Fixes #230.	2013-08-25 10:47:10 -04:00
Virgil Dupras	78c3c8ec2d	Improved dev docs	2013-08-20 22:52:43 -04:00
Virgil Dupras	e99e2b18e0	Call sphinx-build from withing Python instead of a subprocess	2013-08-19 17:43:32 -04:00
Virgil Dupras	ae1283f2e1	se v3.7.1	2013-08-19 16:48:07 -04:00
Virgil Dupras	cc76f3ca87	Fixed SE folder scanning under Cocoa	2013-08-18 21:07:33 -04:00
Virgil Dupras	be8efea081	Fixed folder scanning in SE, which was completely broken Oops	2013-08-18 20:50:31 -04:00
Virgil Dupras	7e8f9036d8	Began serious code documentation effort Enabled the autodoc Sphinx extension and started adding docstrings to classes, methods, etc.. It's quickly becoming quite interesting...	2013-08-18 18:36:09 -04:00