Fixed nasty crash during PE's Cocoa block scanning

Using PyUnicode_GET_SIZE was obviously wrong, but I'm guessing that the str changes in py3.3 made that wrongness significant...
Ah crap, another Cocoa fatal mistake
2026-01-25 16:11:39 +00:00 · 2013-08-26 07:17:02 -04:00 · 2013-08-25 17:10:26 -04:00 · 2013-08-25 10:53:08 -04:00 · 2013-08-25 10:47:10 -04:00 · 2013-08-20 22:52:43 -04:00
32 changed files with 515 additions and 88 deletions
--- a/build.py
+++ b/build.py
@@ -104,7 +104,7 @@ def build_cocoa(edition, dev):
    if not op.exists(pydep_folder):
        os.mkdir(pydep_folder)
    shutil.copy(op.join(cocoa_project_path, 'dg_cocoa.py'), 'build')
-    appscript_pkgs = ['appscript', 'aem', 'mactypes']
+    appscript_pkgs = ['appscript', 'aem', 'mactypes', 'osax']
    specific_packages = {
        'se': ['core_se'],
        'me': ['core_me'] + appscript_pkgs + ['hsaudiotag'],
--- a/cocoa/inter/app_me.py
+++ b/cocoa/inter/app_me.py
@@ -19,11 +19,11 @@ from hscommon.path import Path
 from hscommon.util import remove_invalid_xml
 from core import directories
-from core.app import JobType
+from core.app import JobType, JOBID2TITLE
 from core.scanner import ScanType
 from core_me.app import DupeGuru as DupeGuruBase
 from core_me import fs
-from .app import JOBID2TITLE, PyDupeGuruBase
+from .app import PyDupeGuruBase
 tr = trget('ui')
@@ -184,11 +184,14 @@ class DupeGuruME(DupeGuruBase):
                pass # We'll return the default file type, as per the last line of this method
        return DupeGuruBase._create_file(self, path)
-    def _job_completed(self, jobid, exc):
+    def _job_completed(self, jobid):
-        if (jobid in {JobType.RemoveDeadTracks, JobType.ScanDeadTracks}) and (exc is not None):
+        # XXX Just before release, I'm realizing that this piece of code below is why I was passing
-            msg = tr("There were communication problems with iTunes. The operation couldn't be completed.")
+        # job exception as an argument to _job_completed(). I have to comment it for now. It's not
-            self.view.show_message(msg)
+        # the end of the world, but I should find an elegant solution to this at some point.
-            return True
+        # if (jobid in {JobType.RemoveDeadTracks, JobType.ScanDeadTracks}) and (exc is not None):
        #     msg = tr("There were communication problems with iTunes. The operation couldn't be completed.")
        #     self.view.show_message(msg)
        #     return True
        if jobid == JobType.ScanDeadTracks:
            dead_tracks_count = len(self.dead_tracks)
            if dead_tracks_count > 0:
@@ -202,7 +205,7 @@ class DupeGuruME(DupeGuruBase):
            if hasattr(self, 'itunes_songs'):
                # If we load another file, we want a refresh song list
                del self.itunes_songs
-        DupeGuruBase._job_completed(self, jobid, exc)
+        DupeGuruBase._job_completed(self, jobid)
    def copy_or_move(self, dupe, copy, destination, dest_type):
        if isinstance(dupe, ITunesSong):
@@ -230,7 +233,7 @@ class DupeGuruME(DupeGuruBase):
                except CommandError as e:
                    logging.warning('Error while trying to remove a track from iTunes: %s' % str(e))
-        self.view.start_job(JobType.RemoveDeadTracks, do)
+        self._start_job(JobType.RemoveDeadTracks, do)
    def scan_dead_tracks(self):
        def do(j):
@@ -248,7 +251,7 @@ class DupeGuruME(DupeGuruBase):
                    self.dead_tracks.append(track)
            logging.info('Found %d dead tracks' % len(self.dead_tracks))
-        self.view.start_job(JobType.ScanDeadTracks, do)
+        self._start_job(JobType.ScanDeadTracks, do)
 class PyDupeGuru(PyDupeGuruBase):
    def __init__(self):
--- a/cocoa/inter/app_pe.py
+++ b/cocoa/inter/app_pe.py
@@ -259,8 +259,8 @@ class DupeGuruPE(DupeGuruBase):
            return self.path2aperture.get(path)
        return DupeGuruBase._create_file(self, path)
-    def _job_completed(self, jobid, exc):
+    def _job_completed(self, jobid):
-        DupeGuruBase._job_completed(self, jobid, exc)
+        DupeGuruBase._job_completed(self, jobid)
        if jobid == JobType.Load:
            if hasattr(self, 'path2iphoto'):
                del self.path2iphoto
--- a/cocoa/inter/app_se.py
+++ b/cocoa/inter/app_se.py
@@ -14,10 +14,9 @@ from hscommon.path import Path
 from cocoa import proxy
 from core.scanner import ScanType
 from core import fs
 from core.directories import Directories as DirectoriesBase, DirectoryState
 from core_se.app import DupeGuru as DupeGuruBase
-from core_se.fs import File
+from core_se import fs
 from .app import PyDupeGuruBase
 def is_bundle(str_path):
@@ -36,7 +35,8 @@ class Directories(DirectoriesBase):
    ROOT_PATH_TO_EXCLUDE = list(map(Path, ['/Library', '/Volumes', '/System', '/bin', '/sbin', '/opt', '/private', '/dev']))
    HOME_PATH_TO_EXCLUDE = [Path('Library')]
    def __init__(self):
-        DirectoriesBase.__init__(self, fileclasses=[Bundle, File])
+        DirectoriesBase.__init__(self, fileclasses=[Bundle, fs.File])
        self.folderclass = fs.Folder
    def _default_state_for_path(self, path):
        result = DirectoriesBase._default_state_for_path(self, path)
--- a/core/app.py
+++ b/core/app.py
@@ -96,6 +96,32 @@ def cmp_value(dupe, attrname):
    return value.lower() if isinstance(value, str) else value
 class DupeGuru(RegistrableApplication, Broadcaster):
    """Holds everything together.
    Instantiated once per running application, it holds a reference to every high-level object
    whose reference needs to be held: :class:`Results`, :class:`Scanner`,
    :class:`~core.directories.Directories`, :mod:`core.gui` instances, etc..
    It also hosts high level methods and acts as a coordinator for all those elements.
    .. attribute:: directories
        Instance of :class:`~core.directories.Directories`. It holds the current folder selection.
    .. attribute:: results
        Instance of :class:`core.results.Results`. Holds the results of the latest scan.
    .. attribute:: selected_dupes
        List of currently selected dupes from our :attr:`results`. Whenever the user changes its
        selection at the UI level, :attr:`result_table` takes care of updating this attribute, so
        you can trust that it's always up-to-date.
    .. attribute:: result_table
        Instance of :mod:`meta-gui <core.gui>` table listing the results from :attr:`results`
    """
    #--- View interface
    # open_path(path)
    # reveal_path(path)
@@ -299,6 +325,12 @@ class DupeGuru(RegistrableApplication, Broadcaster):
    #--- Public
    def add_directory(self, d):
        """Adds folder ``d`` to :attr:`directories`.
        Shows an error message dialog if something bad happens.
        :param str d: path of folder to add
        """
        try:
            self.directories.add_path(Path(d))
            self.notify('directories_changed')
@@ -308,6 +340,8 @@ class DupeGuru(RegistrableApplication, Broadcaster):
            self.view.show_message(tr("'{}' does not exist.").format(d))
    def add_selected_to_ignore_list(self):
        """Adds :attr:`selected_dupes` to :attr:`scanner`'s ignore list.
        """
        dupes = self.without_ref(self.selected_dupes)
        if not dupes:
            self.view.show_message(MSG_NO_SELECTED_DUPES)
@@ -324,6 +358,10 @@ class DupeGuru(RegistrableApplication, Broadcaster):
        self.ignore_list_dialog.refresh()
    def apply_filter(self, filter):
        """Apply a filter ``filter`` to the results so that it shows only dupe groups that match it.
        :param str filter: filter to apply
        """
        self.results.apply_filter(None)
        if self.options['escape_filter_regexp']:
            filter = escape(filter, set('()[]\\.|+?^'))
@@ -359,6 +397,10 @@ class DupeGuru(RegistrableApplication, Broadcaster):
            self.clean_empty_dirs(source_path[:-1])
    def copy_or_move_marked(self, copy):
        """Start an async move (or copy) job on marked duplicates.
        :param bool copy: If True, duplicates will be copied instead of moved
        """
        def do(j):
            def op(dupe):
                j.add_progress()
@@ -381,6 +423,8 @@ class DupeGuru(RegistrableApplication, Broadcaster):
            self._start_job(jobid, do)
    def delete_marked(self):
        """Start an async job to send marked duplicates to the trash.
        """
        if not self._check_demo():
            return
        if not self.results.mark_count:
@@ -416,11 +460,11 @@ class DupeGuru(RegistrableApplication, Broadcaster):
            return empty_data()
    def invoke_custom_command(self):
-        """Calls command in 'CustomCommand' pref with %d and %r placeholders replaced.
+        """Calls command in ``CustomCommand`` pref with ``%d`` and ``%r`` placeholders replaced.
-        Using the current selection, %d is replaced with the currently selected dupe and %r is
+        Using the current selection, ``%d`` is replaced with the currently selected dupe and ``%r``
-        replaced with that dupe's ref file. If there's no selection, the command is not invoked.
+        is replaced with that dupe's ref file. If there's no selection, the command is not invoked.
-        If the dupe is a ref, %d and %r will be the same.
+        If the dupe is a ref, ``%d`` and ``%r`` will be the same.
        """
        cmd = self.view.get_default('CustomCommand')
        if not cmd:
@@ -453,6 +497,10 @@ class DupeGuru(RegistrableApplication, Broadcaster):
        self.ignore_list_dialog.refresh()
    def load_from(self, filename):
        """Start an async job to load results from ``filename``.
        :param str filename: path of the XML file (created with :meth:`save_as`) to load
        """
        def do(j):
            self.results.load_from_xml(filename, self._get_file, j)
        self._start_job(JobType.Load, do)
@@ -503,6 +551,8 @@ class DupeGuru(RegistrableApplication, Broadcaster):
        self.notify('marking_changed')
    def open_selected(self):
        """Open :attr:`selected_dupes` with their associated application.
        """
        if len(self.selected_dupes) > 10:
            if not self.view.ask_yes_no(MSG_MANY_FILES_TO_OPEN):
                return
@@ -527,6 +577,8 @@ class DupeGuru(RegistrableApplication, Broadcaster):
        self.notify('results_changed_but_keep_selection')
    def remove_marked(self):
        """Removed marked duplicates from the results (without touching the files themselves).
        """
        if not self.results.mark_count:
            self.view.show_message(MSG_NO_MARKED_DUPES)
            return
@@ -537,6 +589,8 @@ class DupeGuru(RegistrableApplication, Broadcaster):
        self._results_changed()
    def remove_selected(self):
        """Removed :attr:`selected_dupes` from the results (without touching the files themselves).
        """
        dupes = self.without_ref(self.selected_dupes)
        if not dupes:
            self.view.show_message(MSG_NO_SELECTED_DUPES)
@@ -577,9 +631,17 @@ class DupeGuru(RegistrableApplication, Broadcaster):
        self.notify('save_session')
    def save_as(self, filename):
        """Save results in ``filename``.
        :param str filename: path of the file to save results (as XML) to.
        """
        self.results.save_to_xml(filename)
    def start_scanning(self):
        """Starts an async job to scan for duplicates.
        Scans folders selected in :attr:`directories` and put the results in :attr:`results`
        """
        def do(j):
            j.set_progress(0, tr("Collecting files to scan"))
            if self.scanner.scan_type == scanner.ScanType.Folders:
@@ -611,6 +673,8 @@ class DupeGuru(RegistrableApplication, Broadcaster):
        self.notify('marking_changed')
    def without_ref(self, dupes):
        """Returns ``dupes`` with all reference elements removed.
        """
        return [dupe for dupe in dupes if self.results.get_group_of_duplicate(dupe).ref is not dupe]
    def get_default(self, key, fallback_value=None):
--- a/core/directories.py
+++ b/core/directories.py
@@ -15,7 +15,20 @@ from hscommon.util import FileOrPath
 from . import fs
 __all__ = [
    'Directories',
    'DirectoryState',
    'AlreadyThereError',
    'InvalidPathError',
 ]
 class DirectoryState:
    """Enum describing how a folder should be considered.
    * DirectoryState.Normal: Scan all files normally
    * DirectoryState.Reference: Scan files, but make sure never to delete any of them
    * DirectoryState.Excluded: Don't scan this folder
    """
    Normal = 0
    Reference = 1
    Excluded = 2
@@ -27,11 +40,20 @@ class InvalidPathError(Exception):
    """The path being added is invalid"""
 class Directories:
    """Holds user folder selection.
    Manages the selection that the user make through the folder selection dialog. It also manages
    folder states, and how recursion applies to them.
    Then, when the user starts the scan, :meth:`get_files` is called to retrieve all files (wrapped
    in :mod:`core.fs`) that have to be scanned according to the chosen folders/states.
    """
    #---Override
    def __init__(self, fileclasses=[fs.File]):
        self._dirs = []
        self.states = {}
        self.fileclasses = fileclasses
        self.folderclass = fs.Folder
    def __contains__(self, path):
        for p in self._dirs:
@@ -97,11 +119,14 @@ class Directories:
    #---Public
    def add_path(self, path):
-        """Adds 'path' to self, if not already there.
+        """Adds ``path`` to self, if not already there.
-        Raises AlreadyThereError if 'path' is already in self. If path is a directory containing
+        Raises :exc:`AlreadyThereError` if ``path`` is already in self. If path is a directory
-        some of the directories already present in self, 'path' will be added, but all directories
+        containing some of the directories already present in self, ``path`` will be added, but all
-        under it will be removed. Can also raise InvalidPathError if 'path' does not exist.
+        directories under it will be removed. Can also raise :exc:`InvalidPathError` if ``path``
        does not exist.
        :param Path path: path to add
        """
        if path in self:
            raise AlreadyThereError()
@@ -112,7 +137,11 @@ class Directories:
    @staticmethod
    def get_subfolders(path):
-        """returns a sorted list of paths corresponding to subfolders in `path`"""
+        """Returns a sorted list of paths corresponding to subfolders in ``path``.
        :param Path path: get subfolders from there
        :rtype: list of Path
        """
        try:
            names = [name for name in path.listdir() if (path + name).isdir()]
            names.sort(key=lambda x:x.lower())
@@ -123,7 +152,7 @@ class Directories:
    def get_files(self, j=job.nulljob):
        """Returns a list of all files that are not excluded.
-        Returned files also have their 'is_ref' attr set.
+        Returned files also have their ``is_ref`` attr set if applicable.
        """
        for path in self._dirs:
            for file in self._get_files(path, j):
@@ -132,15 +161,17 @@ class Directories:
    def get_folders(self, j=job.nulljob):
        """Returns a list of all folders that are not excluded.
-        Returned folders also have their 'is_ref' attr set.
+        Returned folders also have their ``is_ref`` attr set if applicable.
        """
        for path in self._dirs:
-            from_folder = fs.Folder(path)
+            from_folder = self.folderclass(path)
            for folder in self._get_folders(from_folder, j):
                yield folder
    def get_state(self, path):
-        """Returns the state of 'path' (One of the STATE_* const.)
+        """Returns the state of ``path``.
        :rtype: :class:`DirectoryState`
        """
        if path in self.states:
            return self.states[path]
@@ -154,6 +185,12 @@ class Directories:
            return DirectoryState.Normal
    def has_any_file(self):
        """Returns whether selected folders contain any file.
        Because it stops at the first file it finds, it's much faster than get_files().
        :rtype: bool
        """
        try:
            next(self.get_files())
            return True
@@ -161,6 +198,10 @@ class Directories:
            return False
    def load_from_file(self, infile):
        """Load folder selection from ``infile``.
        :param file infile: path or file pointer to XML generated through :meth:`save_to_file`
        """
        try:
            root = ET.parse(infile).getroot()
        except Exception:
@@ -183,6 +224,10 @@ class Directories:
            self.set_state(Path(path), int(state))
    def save_to_file(self, outfile):
        """Save folder selection as XML to ``outfile``.
        :param file outfile: path or file pointer to XML file to save to.
        """
        with FileOrPath(outfile, 'wb') as fp:
            root = ET.Element('directories')
            for root_path in self:
@@ -196,6 +241,12 @@ class Directories:
            tree.write(fp, encoding='utf-8')
    def set_state(self, path, state):
        """Set the state of folder at ``path``.
        :param Path path: path of the target folder
        :param state: state to set folder to
        :type state: :class:`DirectoryState`
        """
        if self.get_state(path) == state:
            return
        # we don't want to needlessly fill self.states. if get_state returns the same thing
--- a/core/engine.py
+++ b/core/engine.py
@@ -44,10 +44,10 @@ def unpack_fields(fields):
    return result
 def compare(first, second, flags=()):
-    """Returns the % of words that match between first and second
+    """Returns the % of words that match between ``first`` and ``second``
-    The result is a int in the range 0..100.
+    The result is a ``int`` in the range 0..100.
-    First and second can be either a string or a list.
+    ``first`` and ``second`` can be either a string or a list (of words).
    """
    if not (first and second):
        return 0
@@ -76,9 +76,10 @@ def compare(first, second, flags=()):
    return result
 def compare_fields(first, second, flags=()):
-    """Returns the score for the lowest matching fields.
+    """Returns the score for the lowest matching :ref:`fields`.
-    first and second must be lists of lists of string.
+    ``first`` and ``second`` must be lists of lists of string. Each sub-list is then compared with
    :func:`compare`. 
    """
    if len(first) != len(second):
        return 0
@@ -98,13 +99,14 @@ def compare_fields(first, second, flags=()):
            if matched_field:
                second.remove(matched_field)
    else:
-        results = [compare(word1, word2, flags) for word1, word2 in zip(first, second)]
+        results = [compare(field1, field2, flags) for field1, field2 in zip(first, second)]
    return min(results) if results else 0
 def build_word_dict(objects, j=job.nulljob):
    """Returns a dict of objects mapped by their words.
-    objects must have a 'words' attribute being a list of strings or a list of lists of strings.
+    objects must have a ``words`` attribute being a list of strings or a list of lists of strings
    (:ref:`fields`).
    The result will be a dict with words as keys, lists of objects as values.
    """
@@ -115,7 +117,11 @@ def build_word_dict(objects, j=job.nulljob):
    return result
 def merge_similar_words(word_dict):
-    """Take all keys in word_dict that are similar, and merge them together.
+    """Take all keys in ``word_dict`` that are similar, and merge them together.
    ``word_dict`` has been built with :func:`build_word_dict`. Similarity is computed with Python's
    ``difflib.get_close_matches()``, which computes the number of edits that are necessary to make
    a word equal to the other.
    """
    keys = list(word_dict.keys())
    keys.sort(key=len)# we want the shortest word to stay
@@ -131,7 +137,9 @@ def merge_similar_words(word_dict):
            keys.remove(similar)
 def reduce_common_words(word_dict, threshold):
-    """Remove all objects from word_dict values where the object count >= threshold
+    """Remove all objects from ``word_dict`` values where the object count >= ``threshold``
    ``word_dict`` has been built with :func:`build_word_dict`.
    The exception to this removal are the objects where all the words of the object are common.
    Because if we remove them, we will miss some duplicates!
@@ -150,13 +158,42 @@ def reduce_common_words(word_dict, threshold):
            del word_dict[word]
 Match = namedtuple('Match', 'first second percentage')
 Match.__doc__ = """Represents a match between two :class:`~core.fs.File`.
 Regarless of the matching method, when two files are determined to match, a Match pair is created,
 which holds, of course, the two matched files, but also their match "level".
 .. attribute:: first
    first file of the pair.
 .. attribute:: second
    second file of the pair.
 .. attribute:: percentage
    their match level according to the scan method which found the match. int from 1 to 100. For
    exact scan methods, such as Contents scans, this will always be 100.
 """
 def get_match(first, second, flags=()):
    #it is assumed here that first and second both have a "words" attribute
    percentage = compare(first.words, second.words, flags)
    return Match(first, second, percentage)
-def getmatches(objects, min_match_percentage=0, match_similar_words=False, weight_words=False, 
+def getmatches(
        objects, min_match_percentage=0, match_similar_words=False, weight_words=False, 
        no_field_order=False, j=job.nulljob):
    """Returns a list of :class:`Match` within ``objects`` after fuzzily matching their words.
    :param objects: List of :class:`~core.fs.File` to match.
    :param int min_match_percentage: minimum % of words that have to match.
    :param bool match_similar_words: make similar words (see :func:`merge_similar_words`) match.
    :param bool weight_words: longer words are worth more in match % computations.
    :param bool no_field_order: match :ref:`fields` regardless of their order.
    :param j: A :ref:`job progress instance <jobs>`.
    """
    COMMON_WORD_THRESHOLD = 50
    LIMIT = 5000000
    j = j.start_subjob(2)
@@ -203,6 +240,14 @@ def getmatches(objects, min_match_percentage=0, match_similar_words=False, weigh
    return result
 def getmatches_by_contents(files, sizeattr='size', partial=False, j=job.nulljob):
    """Returns a list of :class:`Match` within ``files`` if their contents is the same.
    :param str sizeattr: attibute name of the :class:`~core.fs.file` that returns the size of the
                         file to use for comparison.
    :param bool partial: if true, will use the "md5partial" attribute instead of "md5" to compute
                         contents hash.
    :param j: A :ref:`job progress instance <jobs>`.
    """
    j = j.start_subjob([2, 8])
    size2files = defaultdict(set)
    for file in j.iter_with_progress(files, tr("Read size of %d/%d files")):
@@ -224,6 +269,32 @@ def getmatches_by_contents(files, sizeattr='size', partial=False, j=job.nulljob)
    return result
 class Group:
    """A group of :class:`~core.fs.File` that match together.
    This manages match pairs into groups and ensures that all files in the group match to each
    other.
    .. attribute:: ref
        The "reference" file, which is the file among the group that isn't going to be deleted.
    .. attribute:: ordered
        Ordered list of duplicates in the group (including the :attr:`ref`).
    .. attribute:: unordered
        Set duplicates in the group (including the :attr:`ref`).
    .. attribute:: dupes
        An ordered list of the group's duplicate, without :attr:`ref`. Equivalent to
        ``ordered[1:]``
    .. attribute:: percentage
        Average match percentage of match pairs containing :attr:`ref`.
    """
    #---Override
    def __init__(self):
        self._clear()
@@ -257,6 +328,15 @@ class Group:
    #---Public
    def add_match(self, match):
        """Adds ``match`` to internal match list and possibly add duplicates to the group.
        A duplicate can only be considered as such if it matches all other duplicates in the group.
        This method registers that pair (A, B) represented in ``match`` as possible candidates and,
        if A and/or B end up matching every other duplicates in the group, add these duplicates to
        the group.
        :param tuple match: pair of :class:`~core.fs.File` to add
        """
        def add_candidate(item, match):
            matches = self.candidates[item]
            matches.add(match)
@@ -276,12 +356,18 @@ class Group:
        self._matches_for_ref = None
    def discard_matches(self):
        """Remove all recorded matches that didn't result in a duplicate being added to the group.
        You can call this after the duplicate scanning process to free a bit of memory.
        """
        discarded = set(m for m in self.matches if not all(obj in self.unordered for obj in [m.first, m.second]))
        self.matches -= discarded
        self.candidates = defaultdict(set)
        return discarded
    def get_match_of(self, item):
        """Returns the match pair between ``item`` and :attr:`ref`.
        """
        if item is self.ref:
            return
        for m in self._get_matches_for_ref():
@@ -289,6 +375,12 @@ class Group:
                return m
    def prioritize(self, key_func, tie_breaker=None):
        """Reorders :attr:`ordered` according to ``key_func``.
        :param key_func: Key (f(x)) to be used for sorting
        :param tie_breaker: function to be used to select the reference position in case the top
                            duplicates have the same key_func() result.
        """
        # tie_breaker(ref, dupe) --> True if dupe should be ref
        # Returns True if anything changed during prioritization.
        master_key_func = lambda x: (-x.is_ref, key_func(x))
@@ -324,6 +416,8 @@ class Group:
            pass
    def switch_ref(self, with_dupe):
        """Make the :attr:`ref` dupe of the group switch position with ``with_dupe``.
        """
        if self.ref.is_ref:
            return False
        try:
@@ -354,6 +448,10 @@ class Group:
 def get_groups(matches, j=job.nulljob):
    """Returns a list of :class:`Group` from ``matches``.
    Create groups out of match pairs in the smartest way possible.
    """
    matches.sort(key=lambda match: -match.percentage)
    dupe2group = {}
    groups = []
--- a/core/fs.py
+++ b/core/fs.py
@@ -16,6 +16,18 @@ import logging
 from hscommon.util import nonone, get_file_ext
 __all__ = [
    'File',
    'Folder',
    'get_file',
    'get_files',
    'FSError',
    'AlreadyExistsError',
    'InvalidPath',
    'InvalidDestinationError',
    'OperationError',
 ]
 NOT_SET = object()
 class FSError(Exception):
@@ -50,6 +62,8 @@ class OperationError(FSError):
    cls_message = "Operation on '{name}' failed."
 class File:
    """Represents a file and holds metadata to be used for scanning.
    """
    INITIAL_INFO = {
        'size': 0,
        'mtime': 0,
@@ -129,6 +143,8 @@ class File:
    #--- Public
    @classmethod
    def can_handle(cls, path):
        """Returns whether this file wrapper class can handle ``path``.
        """
        return not path.islink() and path.isfile()
    def rename(self, newname):
@@ -205,7 +221,7 @@ class Folder(File):
        if self._subfolders is None:
            subpaths = [self.path + name for name in self.path.listdir()]
            subfolders = [p for p in subpaths if not p.islink() and p.isdir()]
-            self._subfolders = [Folder(p) for p in subfolders]
+            self._subfolders = [self.__class__(p) for p in subfolders]
        return self._subfolders
    @classmethod
@@ -214,11 +230,23 @@ class Folder(File):
 def get_file(path, fileclasses=[File]):
    """Wraps ``path`` around its appropriate :class:`File` class.
    Whether a class is "appropriate" is decided by :meth:`File.can_handle`
    :param Path path: path to wrap
    :param fileclasses: List of candidate :class:`File` classes
    """
    for fileclass in fileclasses:
        if fileclass.can_handle(path):
            return fileclass(path)
 def get_files(path, fileclasses=[File]):
    """Returns a list of :class:`File` for each file contained in ``path``.
    :param Path path: path to scan
    :param fileclasses: List of candidate :class:`File` classes
    """
    assert all(issubclass(fileclass, File) for fileclass in fileclasses)
    def combine_paths(p1, p2):
        try:
--- a/core/gui/init.py
+++ b/core/gui/init.py
@@ -0,0 +1,15 @@
 """
 Meta GUI elements in dupeGuru
 -----------------------------
 dupeGuru is designed with a `cross-toolkit`_ approach in mind. It means that its core code
 (which doesn't depend on any GUI toolkit) has elements which preformat core information in a way
 that makes it easy for a UI layer to consume.
 For example, we have :class:`~core.gui.ResultTable` which takes information from
 :class:`~core.results.Results` and mashes it in rows and columns which are ready to be fetched by
 either Cocoa's ``NSTableView`` or Qt's ``QTableView``. It tells them which cell is supposed to be
 blue, which is supposed to be orange, does the sorting logic, holds selection, etc..
 .. _cross-toolkit: http://www.hardcoded.net/articles/cross-toolkit-software
 """
--- a/core/results.py
+++ b/core/results.py
@@ -21,6 +21,19 @@ from . import engine
 from .markable import Markable
 class Results(Markable):
    """Manages a collection of duplicate :class:`~core.engine.Group`.
    This class takes care or marking, sorting and filtering duplicate groups.
    .. attribute:: groups
        The list of :class:`~core.engine.Group` contained managed by this instance.
    .. attribute:: dupes
        A list of all duplicates (:class:`~core.fs.File` instances), without ref, contained in the
        currently managed :attr:`groups`.
    """
    #---Override
    def __init__(self, app):
        Markable.__init__(self)
@@ -145,17 +158,17 @@ class Results(Markable):
    #---Public
    def apply_filter(self, filter_str):
-        ''' Applies a filter 'filter_str' to self.groups
+        """Applies a filter ``filter_str`` to :attr:`groups`
-            When you apply the filter, only  dupes with the filename matching 'filter_str' will be in
+        When you apply the filter, only  dupes with the filename matching ``filter_str`` will be in
-            in the results. To cancel the filter, just call apply_filter with 'filter_str' to None, 
+        in the results. To cancel the filter, just call apply_filter with ``filter_str`` to None, 
        and the results will go back to normal.
        If call apply_filter on a filtered results, the filter will be applied 
        *on the filtered results*.
-            'filter_str' is a string containing a regexp to filter dupes with.
+        :param str filter_str: a string containing a regexp to filter dupes with.
-        '''
+        """
        if not filter_str:
            self.__filtered_dupes = None
            self.__filtered_groups = None
@@ -182,6 +195,8 @@ class Results(Markable):
        self.__dupes = None
    def get_group_of_duplicate(self, dupe):
        """Returns :class:`~core.engine.Group` in which ``dupe`` belongs.
        """
        try:
            return self.__group_of_duplicate[dupe]
        except (TypeError, KeyError):
@@ -190,6 +205,12 @@ class Results(Markable):
    is_markable = _is_markable
    def load_from_xml(self, infile, get_file, j=nulljob):
        """Load results from ``infile``.
        :param infile: a file or path pointing to an XML file created with :meth:`save_to_xml`.
        :param get_file: a function f(path) returning a :class:`~core.fs.File` wrapping the path.
        :param j: A :ref:`job progress instance <jobs>`.
        """
        def do_match(ref_file, other_files, group):
            if not other_files:
                return
@@ -242,6 +263,8 @@ class Results(Markable):
        self.is_modified = False
    def make_ref(self, dupe):
        """Make ``dupe`` take the :attr:`~core.engine.Group.ref` position of its group.
        """
        g = self.get_group_of_duplicate(dupe)
        r = g.ref
        if not g.switch_ref(dupe):
@@ -258,8 +281,14 @@ class Results(Markable):
        return True
    def perform_on_marked(self, func, remove_from_results):
-        # Performs `func` on all marked dupes. If an EnvironmentError is raised during the call,
+        """Performs ``func`` on all marked dupes.
-        # the problematic dupe is added to self.problems.
+        
        If an ``EnvironmentError`` is raised during the call, the problematic dupe is added to
        self.problems.
        :param bool remove_from_results: If true, dupes which had ``func`` applied and didn't cause
                                         any problem.
        """
        self.problems = []
        to_remove = []
        marked = (dupe for dupe in self.dupes if self.is_marked(dupe))
@@ -276,8 +305,10 @@ class Results(Markable):
                self.mark(dupe)
    def remove_duplicates(self, dupes):
-        '''Remove 'dupes' from their respective group, and remove the group is it ends up empty.
+        """Remove ``dupes`` from their respective :class:`~core.engine.Group`.
-        '''
+        
        Also, remove the group from :attr:`groups` if it ends up empty.
        """
        affected_groups = set()
        for dupe in dupes:
            group = self.get_group_of_duplicate(dupe)
@@ -302,9 +333,12 @@ class Results(Markable):
        self.is_modified = bool(self.__groups)
    def save_to_xml(self, outfile):
        """Save results to ``outfile`` in XML.
        :param outfile: file object or path.
        """
        self.apply_filter(None)
        root = ET.Element('results')
        # writer = XMLGenerator(outfile, 'utf-8')
        for g in self.groups:
            group_elem = ET.SubElement(root, 'group')
            dupe2index = {}
@@ -349,13 +383,26 @@ class Results(Markable):
        self.is_modified = False
    def sort_dupes(self, key, asc=True, delta=False):
        """Sort :attr:`dupes` according to ``key``.
        :param str key: key attribute name to sort with.
        :param bool asc: If false, sorting is reversed.
        :param bool delta: If true, sorting occurs using :ref:`delta values <deltavalues>`.
        """
        if not self.__dupes:
            self.__get_dupe_list()
        keyfunc = lambda d: self.app._get_dupe_sort_key(d, lambda: self.get_group_of_duplicate(d), key, delta)
        self.__dupes.sort(key=keyfunc, reverse=not asc)
        self.__dupes_sort_descriptor = (key,asc,delta)
-    def sort_groups(self,key,asc=True):
+    def sort_groups(self, key, asc=True):
        """Sort :attr:`groups` according to ``key``.
        The :attr:`~core.engine.Group.ref` of each group is used to extract values for sorting.
        :param str key: key attribute name to sort with.
        :param bool asc: If false, sorting is reversed.
        """
        keyfunc = lambda g: self.app._get_group_sort_key(g, key)
        self.groups.sort(key=keyfunc, reverse=not asc)
        self.__groups_sort_descriptor = (key,asc)
--- a/core_me/init.py
+++ b/core_me/init.py
@@ -1,2 +1,2 @@
-__version__ = '6.5.1'
+__version__ = '6.6.0'
 __appname__ = 'dupeGuru Music Edition'
--- a/core_pe/init.py
+++ b/core_pe/init.py
@@ -1,2 +1,2 @@
-__version__ = '2.7.1'
+__version__ = '2.8.0'
 __appname__ = 'dupeGuru Picture Edition'
--- a/core_pe/modules/block_osx.m
+++ b/core_pe/modules/block_osx.m
@@ -32,7 +32,7 @@ pystring2cfstring(PyObject *pystring)
    }
    s = (UInt8*)PyBytes_AS_STRING(encoded);
-    size = PyUnicode_GET_SIZE(encoded);
+    size = PyBytes_GET_SIZE(encoded);
    result = CFStringCreateWithBytes(NULL, s, size, kCFStringEncodingUTF8, FALSE);
    Py_DECREF(encoded);
    return result;
--- a/core_se/init.py
+++ b/core_se/init.py
@@ -1,2 +1,2 @@
-__version__ = '3.7.0'
+__version__ = '3.7.1'
 __appname__ = 'dupeGuru'
--- a/core_se/app.py
+++ b/core_se/app.py
@@ -17,6 +17,7 @@ class DupeGuru(DupeGuruBase):
    def __init__(self, view, appdata):
        DupeGuruBase.__init__(self, view, appdata)
        self.directories.fileclasses = [fs.File]
        self.directories.folderclass = fs.Folder
    def _prioritization_categories(self):
        return prioritize.all_categories()
--- a/core_se/fs.py
+++ b/core_se/fs.py
@@ -11,11 +11,10 @@ from hscommon.util import format_size
 from core import fs
 from core.app import format_timestamp, format_perc, format_words, format_dupe_count
-class File(fs.File):
+def get_display_info(dupe, group, delta):
-    def get_display_info(self, group, delta):
+    size = dupe.size
-        size = self.size
+    mtime = dupe.mtime
-        mtime = self.mtime
+    m = group.get_match_of(dupe)
        m = group.get_match_of(self)
    if m:
        percentage = m.percentage
        dupe_count = 0
@@ -27,13 +26,22 @@ class File(fs.File):
        percentage = group.percentage
        dupe_count = len(group.dupes)
    return {
-            'name': self.name,
+        'name': dupe.name,
-            'folder_path': str(self.folder_path),
+        'folder_path': str(dupe.folder_path),
        'size': format_size(size, 0, 1, False),
-            'extension': self.extension,
+        'extension': dupe.extension,
        'mtime': format_timestamp(mtime, delta and m),
        'percentage': format_perc(percentage),
-            'words': format_words(self.words) if hasattr(self, 'words') else '',
+        'words': format_words(dupe.words) if hasattr(dupe, 'words') else '',
        'dupe_count': format_dupe_count(dupe_count),
    }
 class File(fs.File):
    def get_display_info(self, group, delta):
        return get_display_info(self, group, delta)
 class Folder(fs.Folder):
    def get_display_info(self, group, delta):
        return get_display_info(self, group, delta)
--- a/help/changelog_me
+++ b/help/changelog_me
@@ -1,3 +1,11 @@
 === 6.6.0 (2013-08-18)
 * Improved delta values to support non-numerical values. (#213)
 * Improved the Re-Prioritize dialog's UI. (#224)
 * Added hardlink/symlink support on Windows Vista+. (#220)
 * Dropped 32bit support on Mac OS X.
 * Added Vietnamese localization by Phan Anh.
 === 6.5.1 (2013-05-18)
 * Improved "Make Selection Reference" to make it clearer. (#222)
--- a/help/changelog_pe
+++ b/help/changelog_pe
@@ -1,3 +1,12 @@
 === 2.8.0 (2013-08-25)
 * Improved delta values to support non-numerical values. (#213)
 * Improved the Re-Prioritize dialog's UI. (#224)
 * Added hardlink/symlink support on Windows Vista+. (#220)
 * Added keybinding for the "Clear Picture Cache" action. [Linux, Windows] (#230)
 * Dropped 32bit support on Mac OS X.
 * Added Vietnamese localization by Phan Anh.
 === 2.7.1 (2013-05-05)
 * Fixed false matching bug in EXIF matching. (#219)
--- a/help/changelog_se
+++ b/help/changelog_se
@@ -1,3 +1,7 @@
 === 3.7.1 (2013-08-19)
 * Fixed folder scan type, which was broken in v3.7.0.
 === 3.7.0 (2013-08-17)
 * Improved delta values to support non-numerical values. (#213)
--- a/help/conf.tmpl
+++ b/help/conf.tmpl
@@ -16,7 +16,9 @@ import sys, os
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-#sys.path.insert(0, os.path.abspath('.'))
+
 # for autodocs
 sys.path.insert(0, os.path.abspath(os.path.join('..', '..')))
 # -- General configuration -----------------------------------------------------
@@ -25,7 +27,7 @@ import sys, os
 # Add any Sphinx extension module names here, as strings. They can be extensions
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ['sphinx.ext.todo']
+extensions = ['sphinx.ext.todo', 'sphinx.ext.autodoc']
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
--- a/help/en/developer/core/app.rst
+++ b/help/en/developer/core/app.rst
@@ -0,0 +1,5 @@
 core.app
 ========
 .. automodule:: core.app
    :members:
--- a/help/en/developer/core/directories.rst
+++ b/help/en/developer/core/directories.rst
@@ -0,0 +1,5 @@
 core.directories
 ================
 .. automodule:: core.directories
    :members:
--- a/help/en/developer/core/engine.rst
+++ b/help/en/developer/core/engine.rst
@@ -0,0 +1,36 @@
 core.engine
 ===========
 .. automodule:: core.engine
    .. autoclass:: Match
    .. autoclass:: Group
        :members:
    .. autofunction:: build_word_dict
    .. autofunction:: compare
    .. autofunction:: compare_fields
    .. autofunction:: getmatches
    .. autofunction:: getmatches_by_contents
    .. autofunction:: get_groups
    .. autofunction:: merge_similar_words
    .. autofunction:: reduce_common_words
 .. _fields:
 Fields
 ------
 Fields are groups of words which each represent a significant part of the whole name. This concept
 is sifnificant in music file names, where we often have names like "My Artist - a very long title
 with many many words".
 This title has 10 words. If you run as scan with a bit of tolerance, let's say 90%, you'll be able
 to find a dupe that has only one "many" in the song title. However, you would also get false
 duplicates from a title like "My Giraffe - a very long title with many many words", which is of
 course a very different song and it doesn't make sense to match them.
 When matching by fields, each field (separated by "-") is considered as a separate string to match
 independently. After all fields are matched, the lowest result is kept. In the "Giraffe" example we
 gave, the result would be 50% instead of 90% in normal mode.
--- a/help/en/developer/core/fs.rst
+++ b/help/en/developer/core/fs.rst
@@ -0,0 +1,5 @@
 core.fs
 =======
 .. automodule:: core.fs
    :members:
--- a/help/en/developer/core/gui.rst
+++ b/help/en/developer/core/gui.rst
@@ -0,0 +1,5 @@
 core.gui
 ========
 .. automodule:: core.gui
    :members:
--- a/help/en/developer/core/results.rst
+++ b/help/en/developer/core/results.rst
@@ -0,0 +1,5 @@
 core.results
 ============
 .. automodule:: core.results
    :members:
--- a/help/en/developer/index.rst
+++ b/help/en/developer/index.rst
@@ -23,6 +23,8 @@ codebase. For example, when performing "Remove Selected From Results",
 ``base.app.DupeGuru.remove_duplicates()`` on the PyQt side, are respectively called to perform the
 thing. All of this is quite ugly, I know (see the "Refactoring" section below).
 .. _jobs:
 Jobs
 ----
@@ -44,3 +46,16 @@ a list of matches and returns a list of ``Group`` instances (a ``Group`` is basi
 When a scan is over, the final result (the list of groups from ``get_groups()``) is placed into
 ``app.DupeGuru.results``, which is a ``results.Results`` instance. The ``Results`` instance is where
 all the dupe marking, sorting, removing, power marking, etc. takes place.
 API
 ---
 .. toctree::
    :maxdepth: 2
    core/app
    core/fs
    core/engine
    core/directories
    core/results
    core/gui
--- a/help/en/index.rst
+++ b/help/en/index.rst
@@ -54,6 +54,6 @@ Contents:
    results
    reprioritize
    faq
-    developer
+    developer/index
    changelog
    credits
--- a/help/en/results.rst
+++ b/help/en/results.rst
@@ -45,6 +45,8 @@ The dupeGuru results, when in normal mode, are sorted according to duplicate gro
 * Hold Shift and click on it.
 * Press Space to mark all selected duplicates.
 .. _deltavalues:
 Delta Values
 ------------
--- a/hscommon/sphinxgen.py
+++ b/hscommon/sphinxgen.py
@@ -9,7 +9,9 @@
 import os.path as op
 import re
-from .build import print_and_do, read_changelog_file, filereplace
+from pkg_resources import load_entry_point
 from .build import read_changelog_file, filereplace
 CHANGELOG_FORMAT = """
 {version} ({date})
@@ -58,5 +60,10 @@ def gen(basepath, destpath, changelogpath, tixurl, confrepl=None, confpath=None,
    filereplace(changelogtmpl, changelog_out, changelog='\n'.join(rendered_logs))
    conf_out = op.join(basepath, 'conf.py')
    filereplace(confpath, conf_out, **confrepl)
-    cmd = 'sphinx-build "{}" "{}"'.format(basepath, destpath)
+    # We used to call sphinx-build with print_and_do(), but the problem was that the virtualenv
-    print_and_do(cmd)
+    # of the calling python wasn't correctly considered and caused problems with documentation
    # relying on autodoc (which tries to import the module to auto-document, but fail because of
    # missing dependencies which are in the virtualenv). Here, we do exactly what is done when
    # calling the command from bash.
    cmd = load_entry_point('Sphinx', 'console_scripts', 'sphinx-build')
    cmd(['sphinx-build', basepath, destpath])
--- a/qt/pe/app.py
+++ b/qt/pe/app.py
@@ -80,6 +80,9 @@ class DupeGuru(DupeGuruBase):
    def _setup(self):
        self.model.directories.fileclasses = [File]
        DupeGuruBase._setup(self)
        self.directories_dialog.menuFile.insertAction(
            self.directories_dialog.actionLoadResults, self.resultWindow.actionClearPictureCache
        )
    def _update_options(self):
        DupeGuruBase._update_options(self)
--- a/qt/pe/result_window.py
+++ b/qt/pe/result_window.py
@@ -17,6 +17,7 @@ class ResultWindow(ResultWindowBase):
    def _setupMenu(self):
        ResultWindowBase._setupMenu(self)
        self.actionClearPictureCache = QAction(tr("Clear Picture Cache"), self)
        self.actionClearPictureCache.setShortcut('Ctrl+Shift+P')
        self.menuFile.insertAction(self.actionSaveResults, self.actionClearPictureCache)
        self.actionClearPictureCache.triggered.connect(self.clearPictureCacheTriggered)
Author	SHA1	Message	Date
Virgil Dupras	350b2c64e0	Fixed nasty crash during PE's Cocoa block scanning Using PyUnicode_GET_SIZE was obviously wrong, but I'm guessing that the str changes in py3.3 made that wrongness significant...	2013-08-26 07:17:02 -04:00
Virgil Dupras	dcc57a7afb	Ah crap, another Cocoa fatal mistake	2013-08-25 17:10:26 -04:00
Virgil Dupras	8b510994ad	pe v2.8.0	2013-08-25 10:53:08 -04:00
Virgil Dupras	4a4d1bbfcd	Eased "Clear Picture Cache" triggering under Qt Added a keybinding and added the action to the directories dialog's menu (it was previously only in the results window's menu). Fixes #230.	2013-08-25 10:47:10 -04:00
Virgil Dupras	78c3c8ec2d	Improved dev docs	2013-08-20 22:52:43 -04:00
Virgil Dupras	e99e2b18e0	Call sphinx-build from withing Python instead of a subprocess	2013-08-19 17:43:32 -04:00
Virgil Dupras	ae1283f2e1	se v3.7.1	2013-08-19 16:48:07 -04:00
Virgil Dupras	cc76f3ca87	Fixed SE folder scanning under Cocoa	2013-08-18 21:07:33 -04:00
Virgil Dupras	be8efea081	Fixed folder scanning in SE, which was completely broken Oops	2013-08-18 20:50:31 -04:00
Virgil Dupras	7e8f9036d8	Began serious code documentation effort Enabled the autodoc Sphinx extension and started adding docstrings to classes, methods, etc.. It's quickly becoming quite interesting...	2013-08-18 18:36:09 -04:00
Virgil Dupras	8a8ac027f5	Fixed ME's cocoa interface file, which was broken (again) The Remove Dead Tracks didn't use the new job system and appscript wasn't properly packaged.	2013-08-18 11:23:20 -04:00
Virgil Dupras	1d9d09fdf7	Fixed ME's cocoa interface file, which was broken It tried to update JOBID2TITLE from inter.app, but it has moved to core.app.	2013-08-18 10:48:02 -04:00
Virgil Dupras	5dc956870d	me v6.6.0	2013-08-18 10:16:39 -04:00
`@@ -1,2 +1,2 @@`
	`__version__ = '6.5.1'`	`__version__ = '6.6.0'`
	`__appname__ = 'dupeGuru Music Edition'`	`__appname__ = 'dupeGuru Music Edition'`
`@@ -1,2 +1,2 @@`
	`__version__ = '2.7.1'`	`__version__ = '2.8.0'`
	`__appname__ = 'dupeGuru Picture Edition'`	`__appname__ = 'dupeGuru Picture Edition'`
`@@ -1,2 +1,2 @@`
	`__version__ = '3.7.0'`	`__version__ = '3.7.1'`
	`__appname__ = 'dupeGuru'`	`__appname__ = 'dupeGuru'`