1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2026-01-22 14:41:39 +00:00

Began serious code documentation effort

Enabled the autodoc Sphinx extension and started adding docstrings to
classes, methods, etc.. It's quickly becoming quite interesting...
This commit is contained in:
Virgil Dupras
2013-08-18 18:36:09 -04:00
parent 8a8ac027f5
commit 7e8f9036d8
15 changed files with 284 additions and 25 deletions

View File

@@ -224,6 +224,23 @@ def getmatches_by_contents(files, sizeattr='size', partial=False, j=job.nulljob)
return result
class Group:
"""A group of :class:`~core.fs.File` that match together.
This manages match pairs into groups and ensures that all files in the group match to each
other.
.. attribute:: ref
The "reference" file, which is the file among the group that isn't going to be deleted.
.. attribute:: ordered
Ordered list of duplicates in the group (including the :attr:`ref`).
.. attribute:: unordered
Set duplicates in the group (including the :attr:`ref`).
"""
#---Override
def __init__(self):
self._clear()
@@ -257,6 +274,15 @@ class Group:
#---Public
def add_match(self, match):
"""Adds ``match`` to internal match list and possibly add duplicates to the group.
A duplicate can only be considered as such if it matches all other duplicates in the group.
This method registers that pair (A, B) represented in ``match`` as possible candidates and,
if A and/or B end up matching every other duplicates in the group, add these duplicates to
the group.
:param tuple match: pair of :class:`~core.fs.File` to add
"""
def add_candidate(item, match):
matches = self.candidates[item]
matches.add(match)
@@ -276,12 +302,18 @@ class Group:
self._matches_for_ref = None
def discard_matches(self):
"""Remove all recorded matches that didn't result in a duplicate being added to the group.
You can call this after the duplicate scanning process to free a bit of memory.
"""
discarded = set(m for m in self.matches if not all(obj in self.unordered for obj in [m.first, m.second]))
self.matches -= discarded
self.candidates = defaultdict(set)
return discarded
def get_match_of(self, item):
"""Returns the match pair between ``item`` and :attr:`ref`.
"""
if item is self.ref:
return
for m in self._get_matches_for_ref():
@@ -289,6 +321,12 @@ class Group:
return m
def prioritize(self, key_func, tie_breaker=None):
"""Reorders :attr:`ordered` according to ``key_func``.
:param key_func: Key (f(x)) to be used for sorting
:param tie_breaker: function to be used to select the reference position in case the top
duplicates have the same key_func() result.
"""
# tie_breaker(ref, dupe) --> True if dupe should be ref
# Returns True if anything changed during prioritization.
master_key_func = lambda x: (-x.is_ref, key_func(x))