1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2026-01-22 14:41:39 +00:00

[#92 state:fixed] Added an option to ignore duplicates hardlinking to the same file.

This commit is contained in:
Virgil Dupras
2010-09-25 12:28:34 +02:00
parent 456a835285
commit 0a64d653e1
24 changed files with 415 additions and 61 deletions

View File

@@ -50,6 +50,7 @@ class DupeGuru(RegistrableApplication, Broadcaster):
self.options = {
'escape_filter_regexp': True,
'clean_empty_dirs': False,
'ignore_hardlink_matches': False,
}
self.selected_dupes = []
@@ -117,6 +118,17 @@ class DupeGuru(RegistrableApplication, Broadcaster):
def _reveal_path(path):
raise NotImplementedError()
@staticmethod
def _remove_hardlink_dupes(files):
seen_inodes = set()
result = []
for file in files:
inode = io.stat(file.path).st_ino
if inode not in seen_inodes:
seen_inodes.add(inode)
result.append(file)
return result
def _select_dupes(self, dupes):
if dupes == self.selected_dupes:
return
@@ -341,6 +353,8 @@ class DupeGuru(RegistrableApplication, Broadcaster):
def do(j):
j.set_progress(0, 'Collecting files to scan')
files = list(self.directories.get_files())
if self.options['ignore_hardlink_matches']:
files = self._remove_hardlink_dupes(files)
logging.info('Scanning %d files' % len(files))
self.results.groups = self.scanner.GetDupeGroups(files, j)