1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2026-01-22 06:37:17 +00:00

[#92 state:fixed] Added an option to ignore duplicates hardlinking to the same file.

This commit is contained in:
Virgil Dupras
2010-09-25 12:28:34 +02:00
parent 456a835285
commit 0a64d653e1
24 changed files with 415 additions and 61 deletions

View File

@@ -50,6 +50,7 @@ class DupeGuru(RegistrableApplication, Broadcaster):
self.options = {
'escape_filter_regexp': True,
'clean_empty_dirs': False,
'ignore_hardlink_matches': False,
}
self.selected_dupes = []
@@ -117,6 +118,17 @@ class DupeGuru(RegistrableApplication, Broadcaster):
def _reveal_path(path):
raise NotImplementedError()
@staticmethod
def _remove_hardlink_dupes(files):
seen_inodes = set()
result = []
for file in files:
inode = io.stat(file.path).st_ino
if inode not in seen_inodes:
seen_inodes.add(inode)
result.append(file)
return result
def _select_dupes(self, dupes):
if dupes == self.selected_dupes:
return
@@ -341,6 +353,8 @@ class DupeGuru(RegistrableApplication, Broadcaster):
def do(j):
j.set_progress(0, 'Collecting files to scan')
files = list(self.directories.get_files())
if self.options['ignore_hardlink_matches']:
files = self._remove_hardlink_dupes(files)
logging.info('Scanning %d files' % len(files))
self.results.groups = self.scanner.GetDupeGroups(files, j)

View File

@@ -116,15 +116,22 @@ class PyDupeGuruBase(PyRegistrable):
return bool(self.py.results.problems)
#---Properties
@signature('v@:c')
def setMixFileKind_(self, mix_file_kind):
self.py.scanner.mix_file_kind = mix_file_kind
@signature('v@:c')
def setEscapeFilterRegexp_(self, escape_filter_regexp):
self.py.options['escape_filter_regexp'] = escape_filter_regexp
@signature('v@:c')
def setRemoveEmptyFolders_(self, remove_empty_folders):
self.py.options['clean_empty_dirs'] = remove_empty_folders
@signature('v@:c')
def setIgnoreHardlinkMatches_(self, ignore_hardlink_matches):
self.py.options['ignore_hardlink_matches'] = ignore_hardlink_matches
#---Worker
def getJobProgress(self):
try:

View File

@@ -24,6 +24,7 @@ from ..app import DupeGuru as DupeGuruBase
from ..gui.details_panel import DetailsPanel
from ..gui.directory_tree import DirectoryTree
from ..gui.result_table import ResultTable
from ..scanner import ScanType
class DupeGuru(DupeGuruBase):
def __init__(self):
@@ -122,6 +123,19 @@ class TCDupeGuru(TestCase):
app.directories._dirs.append('this is just so Scan() doesnt return 3')
app.start_scanning() # no exception
def test_ignore_hardlink_matches(self):
# If the ignore_hardlink_matches option is set, don't match files hardlinking to the same
# inode.
tmppath = Path(self.tmpdir())
io.open(tmppath + 'myfile', 'w').write('foo')
os.link(str(tmppath + 'myfile'), str(tmppath + 'hardlink'))
app = DupeGuru()
app.directories.add_path(tmppath)
app.scanner.scan_type = ScanType.Contents
app.options['ignore_hardlink_matches'] = True
app.start_scanning()
eq_(len(app.results.groups), 0)
class TCDupeGuru_clean_empty_dirs(TestCase):
cls_tested_module = app