[#89 state:fixed] Added a Folders scan type in dgse.

--HG--
rename : core_se/tests/fs_test.py => core/tests/fs_test.py
This commit is contained in:
Virgil Dupras 2011-04-12 13:22:29 +02:00
parent 0fea59007c
commit 279d44b7f3
23 changed files with 292 additions and 154 deletions

View File

@ -42,7 +42,10 @@ http://www.hardcoded.net/licenses/bsd_license
- (id)init
{
self = [super init];
VTIsIntIn *vt = [[[VTIsIntIn alloc] initWithValues:[NSIndexSet indexSetWithIndex:1] reverse:YES] autorelease];
NSMutableIndexSet *contentsIndexes = [NSMutableIndexSet indexSet];
[contentsIndexes addIndex:1];
[contentsIndexes addIndex:2];
VTIsIntIn *vt = [[[VTIsIntIn alloc] initWithValues:contentsIndexes reverse:YES] autorelease];
[NSValueTransformer setValueTransformer:vt forName:@"vtScanTypeIsNotContent"];
_directoryPanel = nil;
return self;

View File

@ -29,6 +29,7 @@ class PyDupeGuru(PyDupeGuruBase):
self.py.scanner.scan_type = [
ScanType.Filename,
ScanType.Contents,
ScanType.Folders,
][scan_type]
except IndexError:
pass

View File

@ -2,13 +2,13 @@
<archive type="com.apple.InterfaceBuilder3.Cocoa.XIB" version="7.10">
<data>
<int key="IBDocument.SystemTarget">1050</int>
<string key="IBDocument.SystemVersion">10J567</string>
<string key="IBDocument.InterfaceBuilderVersion">823</string>
<string key="IBDocument.SystemVersion">10J869</string>
<string key="IBDocument.InterfaceBuilderVersion">851</string>
<string key="IBDocument.AppKitVersion">1038.35</string>
<string key="IBDocument.HIToolboxVersion">462.00</string>
<string key="IBDocument.HIToolboxVersion">461.00</string>
<object class="NSMutableDictionary" key="IBDocument.PluginVersions">
<string key="NS.key.0">com.apple.InterfaceBuilder.CocoaPlugin</string>
<string key="NS.object.0">823</string>
<string key="NS.object.0">851</string>
</object>
<object class="NSMutableArray" key="IBDocument.EditedObjectIDs">
<bool key="EncodedWithXMLCoder">YES</bool>
@ -34,10 +34,6 @@
<string key="NSClassName">NSApplication</string>
</object>
<object class="NSUserDefaultsController" id="75941798">
<object class="NSMutableArray" key="NSDeclaredKeys">
<bool key="EncodedWithXMLCoder">YES</bool>
<string>DebugMode</string>
</object>
<bool key="NSSharedInstance">YES</bool>
</object>
<object class="NSWindowTemplate" id="489014306">
@ -273,6 +269,16 @@
<string key="NSAction">_popUpItemAction:</string>
<reference key="NSTarget" ref="63752222"/>
</object>
<object class="NSMenuItem" id="510059249">
<reference key="NSMenu" ref="38553798"/>
<string key="NSTitle">Folders</string>
<string key="NSKeyEquiv"/>
<int key="NSMnemonicLoc">2147483647</int>
<reference key="NSOnImage" ref="1002480020"/>
<reference key="NSMixedImage" ref="394002035"/>
<string key="NSAction">_popUpItemAction:</string>
<reference key="NSTarget" ref="63752222"/>
</object>
</object>
</object>
<int key="NSPreferredEdge">3</int>
@ -1372,6 +1378,7 @@
<bool key="EncodedWithXMLCoder">YES</bool>
<reference ref="101272617"/>
<reference ref="352817522"/>
<reference ref="510059249"/>
</object>
<reference key="parent" ref="63752222"/>
</object>
@ -1657,6 +1664,11 @@
<reference key="object" ref="236967908"/>
<reference key="parent" ref="727223254"/>
</object>
<object class="IBObjectRecord">
<int key="objectID">149</int>
<reference key="object" ref="510059249"/>
<reference key="parent" ref="38553798"/>
</object>
</object>
</object>
<object class="NSMutableDictionary" key="flattenedProperties">
@ -1688,6 +1700,7 @@
<string>145.IBViewBoundsToFrameTransform</string>
<string>145.ImportedFromIB2</string>
<string>146.IBPluginDependency</string>
<string>149.IBPluginDependency</string>
<string>51.IBPluginDependency</string>
<string>51.ImportedFromIB2</string>
<string>52.IBEditorWindowLastContentRect</string>
@ -1747,6 +1760,7 @@
<string>76.IBPluginDependency</string>
<string>77.IBPluginDependency</string>
<string>78.IBPluginDependency</string>
<string>79.IBEditorWindowLastContentRect</string>
<string>79.IBPluginDependency</string>
<string>79.ImportedFromIB2</string>
<string>80.IBPluginDependency</string>
@ -1815,6 +1829,7 @@
<boolean value="YES"/>
<string>com.apple.InterfaceBuilder.CocoaPlugin</string>
<string>com.apple.InterfaceBuilder.CocoaPlugin</string>
<string>com.apple.InterfaceBuilder.CocoaPlugin</string>
<boolean value="YES"/>
<string>{{88, 520}, {389, 325}}</string>
<string>com.apple.InterfaceBuilder.CocoaPlugin</string>
@ -1887,6 +1902,7 @@
<string>com.apple.InterfaceBuilder.CocoaPlugin</string>
<string>com.apple.InterfaceBuilder.CocoaPlugin</string>
<string>com.apple.InterfaceBuilder.CocoaPlugin</string>
<string>{{216, 742}, {216, 63}}</string>
<string>com.apple.InterfaceBuilder.CocoaPlugin</string>
<boolean value="YES"/>
<string>com.apple.InterfaceBuilder.CocoaPlugin</string>
@ -1936,7 +1952,7 @@
</object>
</object>
<nil key="sourceID"/>
<int key="maxID">148</int>
<int key="maxID">149</int>
</object>
<object class="IBClassDescriber" key="IBDocument.Classes">
<object class="NSMutableArray" key="referencedPartialClassDescriptions">

View File

@ -72,4 +72,7 @@
"142.title" = "Ignorer doublons avec hardlink vers le même fichier";
/* Class = "NSButtonCell"; title = "Debug mode (restart required)"; ObjectID = "146"; */
"146.title" = "Mode de déboguage (redémarrage requis)";
"146.title" = "Mode de déboguage (redémarrage requis)";
/* Class = "NSMenuItem"; title = "Folders"; ObjectID = "149"; */
"149.title" = "Dossiers";

View File

@ -2,13 +2,13 @@
<archive type="com.apple.InterfaceBuilder3.Cocoa.XIB" version="7.10">
<data>
<int key="IBDocument.SystemTarget">1050</int>
<string key="IBDocument.SystemVersion">10J567</string>
<string key="IBDocument.InterfaceBuilderVersion">823</string>
<string key="IBDocument.SystemVersion">10J869</string>
<string key="IBDocument.InterfaceBuilderVersion">851</string>
<string key="IBDocument.AppKitVersion">1038.35</string>
<string key="IBDocument.HIToolboxVersion">462.00</string>
<string key="IBDocument.HIToolboxVersion">461.00</string>
<object class="NSMutableDictionary" key="IBDocument.PluginVersions">
<string key="NS.key.0">com.apple.InterfaceBuilder.CocoaPlugin</string>
<string key="NS.object.0">823</string>
<string key="NS.object.0">851</string>
</object>
<object class="NSMutableArray" key="IBDocument.EditedObjectIDs">
<bool key="EncodedWithXMLCoder">YES</bool>
@ -268,6 +268,16 @@
<string key="NSAction">_popUpItemAction:</string>
<reference key="NSTarget" ref="63752222"/>
</object>
<object class="NSMenuItem" id="510059249">
<reference key="NSMenu" ref="38553798"/>
<string key="NSTitle">Dossiers</string>
<string key="NSKeyEquiv"/>
<int key="NSMnemonicLoc">2147483647</int>
<reference key="NSOnImage" ref="1002480020"/>
<reference key="NSMixedImage" ref="394002035"/>
<string key="NSAction">_popUpItemAction:</string>
<reference key="NSTarget" ref="63752222"/>
</object>
</object>
</object>
<int key="NSPreferredEdge">3</int>
@ -290,7 +300,7 @@
<reference key="NSControlView" ref="637819333"/>
<int key="NSButtonFlags">1211912703</int>
<int key="NSButtonFlags2">2</int>
<object class="NSCustomResource" key="NSNormalImage" id="587167894">
<object class="NSCustomResource" key="NSNormalImage" id="266862978">
<string key="NSClassName">NSImage</string>
<string key="NSResourceName">NSSwitch</string>
</object>
@ -317,7 +327,7 @@
<reference key="NSControlView" ref="1067721243"/>
<int key="NSButtonFlags">1211912703</int>
<int key="NSButtonFlags2">2</int>
<reference key="NSNormalImage" ref="587167894"/>
<reference key="NSNormalImage" ref="266862978"/>
<reference key="NSAlternateImage" ref="589920880"/>
<string key="NSAlternateContents"/>
<string key="NSKeyEquivalent"/>
@ -339,7 +349,7 @@
<reference key="NSControlView" ref="290008886"/>
<int key="NSButtonFlags">1211912703</int>
<int key="NSButtonFlags2">2</int>
<reference key="NSNormalImage" ref="587167894"/>
<reference key="NSNormalImage" ref="266862978"/>
<reference key="NSAlternateImage" ref="589920880"/>
<string key="NSAlternateContents"/>
<string key="NSKeyEquivalent"/>
@ -361,7 +371,7 @@
<reference key="NSControlView" ref="551239185"/>
<int key="NSButtonFlags">1211912703</int>
<int key="NSButtonFlags2">2</int>
<reference key="NSNormalImage" ref="587167894"/>
<reference key="NSNormalImage" ref="266862978"/>
<reference key="NSAlternateImage" ref="589920880"/>
<string key="NSAlternateContents"/>
<string key="NSKeyEquivalent"/>
@ -383,7 +393,7 @@
<reference key="NSControlView" ref="208488736"/>
<int key="NSButtonFlags">1211912703</int>
<int key="NSButtonFlags2">2</int>
<reference key="NSNormalImage" ref="587167894"/>
<reference key="NSNormalImage" ref="266862978"/>
<reference key="NSAlternateImage" ref="589920880"/>
<string key="NSAlternateContents"/>
<string key="NSKeyEquivalent"/>
@ -405,7 +415,7 @@
<reference key="NSControlView" ref="427690895"/>
<int key="NSButtonFlags">1211912703</int>
<int key="NSButtonFlags2">2</int>
<reference key="NSNormalImage" ref="587167894"/>
<reference key="NSNormalImage" ref="266862978"/>
<reference key="NSAlternateImage" ref="589920880"/>
<string key="NSAlternateContents"/>
<string key="NSKeyEquivalent"/>
@ -621,7 +631,7 @@
<reference key="NSControlView" ref="724127338"/>
<int key="NSButtonFlags">1211912703</int>
<int key="NSButtonFlags2">2</int>
<reference key="NSNormalImage" ref="587167894"/>
<reference key="NSNormalImage" ref="266862978"/>
<reference key="NSAlternateImage" ref="589920880"/>
<string key="NSAlternateContents"/>
<string key="NSKeyEquivalent"/>
@ -643,7 +653,7 @@
<reference key="NSControlView" ref="647216699"/>
<int key="NSButtonFlags">1211912703</int>
<int key="NSButtonFlags2">2</int>
<reference key="NSNormalImage" ref="587167894"/>
<reference key="NSNormalImage" ref="266862978"/>
<reference key="NSAlternateImage" ref="589920880"/>
<string key="NSAlternateContents"/>
<string key="NSKeyEquivalent"/>
@ -665,7 +675,7 @@
<reference key="NSControlView" ref="727223254"/>
<int key="NSButtonFlags">1211912703</int>
<int key="NSButtonFlags2">2</int>
<reference key="NSNormalImage" ref="587167894"/>
<reference key="NSNormalImage" ref="266862978"/>
<reference key="NSAlternateImage" ref="589920880"/>
<string key="NSAlternateContents"/>
<string key="NSKeyEquivalent"/>
@ -1378,6 +1388,7 @@
<bool key="EncodedWithXMLCoder">YES</bool>
<reference ref="101272617"/>
<reference ref="352817522"/>
<reference ref="510059249"/>
</object>
<reference key="parent" ref="63752222"/>
</object>
@ -1663,6 +1674,11 @@
<reference key="object" ref="236967908"/>
<reference key="parent" ref="727223254"/>
</object>
<object class="IBObjectRecord">
<int key="objectID">149</int>
<reference key="object" ref="510059249"/>
<reference key="parent" ref="38553798"/>
</object>
</object>
</object>
<object class="NSMutableDictionary" key="flattenedProperties">
@ -1694,6 +1710,7 @@
<string>145.IBViewBoundsToFrameTransform</string>
<string>145.ImportedFromIB2</string>
<string>146.IBPluginDependency</string>
<string>149.IBPluginDependency</string>
<string>51.IBPluginDependency</string>
<string>51.ImportedFromIB2</string>
<string>52.IBEditorWindowLastContentRect</string>
@ -1754,6 +1771,7 @@
<string>76.IBPluginDependency</string>
<string>77.IBPluginDependency</string>
<string>78.IBPluginDependency</string>
<string>79.IBEditorWindowLastContentRect</string>
<string>79.IBPluginDependency</string>
<string>79.ImportedFromIB2</string>
<string>80.IBPluginDependency</string>
@ -1822,6 +1840,7 @@
<boolean value="YES"/>
<string>com.apple.InterfaceBuilder.CocoaPlugin</string>
<string>com.apple.InterfaceBuilder.CocoaPlugin</string>
<string>com.apple.InterfaceBuilder.CocoaPlugin</string>
<boolean value="YES"/>
<string>{{88, 520}, {389, 325}}</string>
<string>com.apple.InterfaceBuilder.CocoaPlugin</string>
@ -1895,6 +1914,7 @@
<string>com.apple.InterfaceBuilder.CocoaPlugin</string>
<string>com.apple.InterfaceBuilder.CocoaPlugin</string>
<string>com.apple.InterfaceBuilder.CocoaPlugin</string>
<string>{{216, 742}, {216, 63}}</string>
<string>com.apple.InterfaceBuilder.CocoaPlugin</string>
<boolean value="YES"/>
<string>com.apple.InterfaceBuilder.CocoaPlugin</string>
@ -1944,7 +1964,7 @@
</object>
</object>
<nil key="sourceID"/>
<int key="maxID">148</int>
<int key="maxID">149</int>
</object>
<object class="IBClassDescriber" key="IBDocument.Classes">
<object class="NSMutableArray" key="referencedPartialClassDescriptions">

View File

@ -368,7 +368,10 @@ class DupeGuru(RegistrableApplication, Broadcaster):
def start_scanning(self):
def do(j):
j.set_progress(0, tr("Collecting files to scan"))
files = list(self.directories.get_files())
if self.scanner.scan_type == scanner.ScanType.Folders:
files = list(self.directories.get_folders())
else:
files = list(self.directories.get_files())
if self.options['ignore_hardlink_matches']:
files = self._remove_hardlink_dupes(files)
logging.info('Scanning %d files' % len(files))

View File

@ -15,9 +15,10 @@ from hscommon.util import FileOrPath
from . import fs
(STATE_NORMAL,
STATE_REFERENCE,
STATE_EXCLUDED) = range(3)
class DirectoryState:
Normal = 0
Reference = 1
Excluded = 2
class AlreadyThereError(Exception):
"""The path being added is already in the directory list"""
@ -51,11 +52,11 @@ class Directories:
def _default_state_for_path(self, path):
# Override this in subclasses to specify the state of some special folders.
if path[-1].startswith('.'): # hidden
return STATE_EXCLUDED
return DirectoryState.Excluded
def _get_files(self, from_path):
state = self.get_state(from_path)
if state == STATE_EXCLUDED:
if state == DirectoryState.Excluded:
# Recursively get files from folders with lots of subfolder is expensive. However, there
# might be a subfolder in this path that is not excluded. What we want to do is to skim
# through self.states and see if we must continue, or we can stop right here to save time
@ -63,11 +64,11 @@ class Directories:
return
try:
filepaths = set()
if state != STATE_EXCLUDED:
if state != DirectoryState.Excluded:
found_files = fs.get_files(from_path, fileclasses=self.fileclasses)
logging.debug("Collected {} files in folder {}".format(len(found_files), str(from_path)))
logging.debug("Collected %d files in folder %s", len(found_files), str(from_path))
for file in found_files:
file.is_ref = state == STATE_REFERENCE
file.is_ref = state == DirectoryState.Reference
filepaths.add(file.path)
yield file
subpaths = [from_path + name for name in io.listdir(from_path)]
@ -79,6 +80,18 @@ class Directories:
except (EnvironmentError, fs.InvalidPath):
pass
def _get_folders(self, from_folder):
state = self.get_state(from_folder.path)
try:
for subfolder in from_folder.subfolders:
for folder in self._get_folders(subfolder):
yield folder
if state != DirectoryState.Excluded:
from_folder.is_ref = state == DirectoryState.Reference
yield from_folder
except (EnvironmentError, fs.InvalidPath):
pass
#---Public
def add_path(self, path):
"""Adds 'path' to self, if not already there.
@ -113,6 +126,16 @@ class Directories:
for file in self._get_files(path):
yield file
def get_folders(self):
"""Returns a list of all folders that are not excluded.
Returned folders also have their 'is_ref' attr set.
"""
for path in self._dirs:
from_folder = fs.Folder(path)
for folder in self._get_folders(from_folder):
yield folder
def get_state(self, path):
"""Returns the state of 'path' (One of the STATE_* const.)
"""
@ -125,7 +148,7 @@ class Directories:
if parent in self:
return self.get_state(parent)
else:
return STATE_NORMAL
return DirectoryState.Normal
def has_any_file(self):
try:

View File

@ -63,6 +63,9 @@ class File:
self._md5partial_offset = 0x4000 #16Kb
self._md5partial_size = 0x4000 #16Kb
def __repr__(self):
return "<{} {}>".format(self.__class__.__name__, str(self.path))
def __getattr__(self, attrname):
# Only called when attr is not there
if attrname in self.INITIAL_INFO:
@ -147,6 +150,49 @@ class File:
return self.path[-1]
class Folder(File):
"""A wrapper around a folder path.
It has the size/md5 info of a File, but it's value are the sum of its subitems.
"""
def __init__(self, path):
File.__init__(self, path)
self._subfolders = None
def _all_items(self):
folders = self.subfolders
files = get_files(self.path)
return folders + files
def _read_info(self, field):
if field in {'size', 'mtime'}:
size = sum((f.size for f in self._all_items()), 0)
self.size = size
stats = io.stat(self.path)
self.mtime = nonone(stats.st_mtime, 0)
elif field in {'md5', 'md5partial'}:
# What's sensitive here is that we must make sure that subfiles'
# md5 are always added up in the same order, but we also want a
# different md5 if a file gets moved in a different subdirectory.
def get_dir_md5_concat():
items = self._all_items()
items.sort(key=lambda f:f.path)
md5s = [getattr(f, field) for f in items]
return b''.join(md5s)
md5 = hashlib.md5(get_dir_md5_concat())
digest = md5.digest()
setattr(self, field, digest)
@property
def subfolders(self):
if self._subfolders is None:
subpaths = [self.path + name for name in io.listdir(self.path)]
subfolders = [p for p in subpaths if not io.islink(p) and io.isdir(p)]
self._subfolders = [Folder(p) for p in subfolders]
return self._subfolders
def get_file(path, fileclasses=[File]):
for fileclass in fileclasses:
if fileclass.can_handle(path):
@ -172,12 +218,3 @@ def get_files(path, fileclasses=[File]):
return result
except EnvironmentError:
raise InvalidPath(path)
def get_all_files(path, fileclasses=[File]):
files = get_files(path, fileclasses=fileclasses)
filepaths = set(f.path for f in files)
subpaths = [path + name for name in io.listdir(path)]
# it's possible that a folder (bundle) gets into the file list. in that case, we don't want to recurse into it
subfolders = [p for p in subpaths if not io.islink(p) and io.isdir(p) and p not in filepaths]
subfiles = flatten(get_all_files(subpath, fileclasses=fileclasses) for subpath in subfolders)
return subfiles + files

View File

@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
# Created By: Virgil Dupras
# Created On: 2010-02-06
# Copyright 2011 Hardcoded Software (http://www.hardcoded.net)
@ -9,10 +8,10 @@
from hscommon.gui.tree import Tree, Node
from ..directories import STATE_NORMAL, STATE_REFERENCE, STATE_EXCLUDED
from ..directories import DirectoryState
from .base import GUIObject
STATE_ORDER = [STATE_NORMAL, STATE_REFERENCE, STATE_EXCLUDED]
STATE_ORDER = [DirectoryState.Normal, DirectoryState.Reference, DirectoryState.Excluded]
# Lazily loads children
class DirectoryNode(Node):

View File

@ -22,7 +22,7 @@ class ScanType:
Fields = 1
FieldsNoOrder = 2
Tag = 3
# number 4 is obsolete
Folders = 4
Contents = 5
ContentsAudio = 6
@ -48,8 +48,8 @@ class Scanner:
for f in j.iter_with_progress(files, tr("Read size of %d/%d files")):
f.size # pre-read, makes a smoother progress if read here (especially for bundles)
files = [f for f in files if f.size >= self.size_threshold]
if self.scan_type in (ScanType.Contents, ScanType.ContentsAudio):
sizeattr = 'size' if self.scan_type == ScanType.Contents else 'audiosize'
if self.scan_type in {ScanType.Contents, ScanType.ContentsAudio, ScanType.Folders}:
sizeattr = 'audiosize' if self.scan_type == ScanType.ContentsAudio else 'size'
return engine.getmatches_by_contents(files, sizeattr, partial=self.scan_type==ScanType.ContentsAudio, j=j)
else:
j = j.start_subjob([2, 8])
@ -92,10 +92,22 @@ class Scanner:
j = j.start_subjob([8, 2])
for f in [f for f in files if not hasattr(f, 'is_ref')]:
f.is_ref = False
logging.info('Getting matches')
logging.info("Getting matches. Scan type: %d", self.scan_type)
matches = self._getmatches(files, j)
logging.info('Found %d matches' % len(matches))
j.set_progress(100, tr("Removing false matches"))
if self.scan_type == ScanType.Folders and matches:
allpath = {m.first.path for m in matches}
allpath |= {m.second.path for m in matches}
sortedpaths = sorted(allpath)
toremove = set()
last_parent_path = sortedpaths[0]
for p in sortedpaths[1:]:
if p in last_parent_path:
toremove.add(p)
else:
last_parent_path = p
matches = [m for m in matches if m.first.path not in toremove or m.second.path not in toremove]
if not self.mix_file_kind:
matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)]
matches = [m for m in matches if io.exists(m.first.path) and io.exists(m.second.path)]

View File

@ -122,52 +122,52 @@ def test_states():
d = Directories()
p = testpath + 'onefile'
d.add_path(p)
eq_(STATE_NORMAL,d.get_state(p))
d.set_state(p,STATE_REFERENCE)
eq_(STATE_REFERENCE,d.get_state(p))
eq_(STATE_REFERENCE,d.get_state(p + 'dir1'))
eq_(DirectoryState.Normal ,d.get_state(p))
d.set_state(p, DirectoryState.Reference)
eq_(DirectoryState.Reference ,d.get_state(p))
eq_(DirectoryState.Reference ,d.get_state(p + 'dir1'))
eq_(1,len(d.states))
eq_(p,list(d.states.keys())[0])
eq_(STATE_REFERENCE,d.states[p])
eq_(DirectoryState.Reference ,d.states[p])
def test_get_state_with_path_not_there():
# When the path's not there, just return STATE_NORMAL
# When the path's not there, just return DirectoryState.Normal
d = Directories()
d.add_path(testpath + 'onefile')
eq_(d.get_state(testpath), STATE_NORMAL)
eq_(d.get_state(testpath), DirectoryState.Normal)
def test_states_remain_when_larger_directory_eat_smaller_ones():
d = Directories()
p = testpath + 'onefile'
d.add_path(p)
d.set_state(p,STATE_EXCLUDED)
d.set_state(p, DirectoryState.Excluded)
d.add_path(testpath)
d.set_state(testpath,STATE_REFERENCE)
eq_(STATE_EXCLUDED,d.get_state(p))
eq_(STATE_EXCLUDED,d.get_state(p + 'dir1'))
eq_(STATE_REFERENCE,d.get_state(testpath))
d.set_state(testpath, DirectoryState.Reference)
eq_(DirectoryState.Excluded ,d.get_state(p))
eq_(DirectoryState.Excluded ,d.get_state(p + 'dir1'))
eq_(DirectoryState.Reference ,d.get_state(testpath))
def test_set_state_keep_state_dict_size_to_minimum():
d = Directories()
p = testpath + 'fs'
d.add_path(p)
d.set_state(p,STATE_REFERENCE)
d.set_state(p + 'dir1',STATE_REFERENCE)
d.set_state(p, DirectoryState.Reference)
d.set_state(p + 'dir1', DirectoryState.Reference)
eq_(1,len(d.states))
eq_(STATE_REFERENCE,d.get_state(p + 'dir1'))
d.set_state(p + 'dir1',STATE_NORMAL)
eq_(DirectoryState.Reference ,d.get_state(p + 'dir1'))
d.set_state(p + 'dir1', DirectoryState.Normal)
eq_(2,len(d.states))
eq_(STATE_NORMAL,d.get_state(p + 'dir1'))
d.set_state(p + 'dir1',STATE_REFERENCE)
eq_(DirectoryState.Normal ,d.get_state(p + 'dir1'))
d.set_state(p + 'dir1', DirectoryState.Reference)
eq_(1,len(d.states))
eq_(STATE_REFERENCE,d.get_state(p + 'dir1'))
eq_(DirectoryState.Reference ,d.get_state(p + 'dir1'))
def test_get_files():
d = Directories()
p = testpath + 'fs'
d.add_path(p)
d.set_state(p + 'dir1',STATE_REFERENCE)
d.set_state(p + 'dir2',STATE_EXCLUDED)
d.set_state(p + 'dir1', DirectoryState.Reference)
d.set_state(p + 'dir2', DirectoryState.Excluded)
files = list(d.get_files())
eq_(5, len(files))
for f in files:
@ -176,11 +176,26 @@ def test_get_files():
else:
assert not f.is_ref
def test_get_folders():
d = Directories()
p = testpath + 'fs'
d.add_path(p)
d.set_state(p + 'dir1', DirectoryState.Reference)
d.set_state(p + 'dir2', DirectoryState.Excluded)
folders = list(d.get_folders())
eq_(len(folders), 3)
ref = [f for f in folders if f.is_ref]
not_ref = [f for f in folders if not f.is_ref]
eq_(len(ref), 1)
eq_(ref[0].path, p + 'dir1')
eq_(len(not_ref), 2)
eq_(ref[0].size, 1)
def test_get_files_with_inherited_exclusion():
d = Directories()
p = testpath + 'onefile'
d.add_path(p)
d.set_state(p,STATE_EXCLUDED)
d.set_state(p, DirectoryState.Excluded)
eq_([], list(d.get_files()))
def test_save_and_load(tmpdir):
@ -192,14 +207,14 @@ def test_save_and_load(tmpdir):
io.mkdir(p2)
d1.add_path(p1)
d1.add_path(p2)
d1.set_state(p1, STATE_REFERENCE)
d1.set_state(p1 + 'dir1',STATE_EXCLUDED)
d1.set_state(p1, DirectoryState.Reference)
d1.set_state(p1 + 'dir1', DirectoryState.Excluded)
tmpxml = str(tmpdir.join('directories_testunit.xml'))
d1.save_to_file(tmpxml)
d2.load_from_file(tmpxml)
eq_(2, len(d2))
eq_(STATE_REFERENCE,d2.get_state(p1))
eq_(STATE_EXCLUDED,d2.get_state(p1 + 'dir1'))
eq_(DirectoryState.Reference ,d2.get_state(p1))
eq_(DirectoryState.Excluded ,d2.get_state(p1 + 'dir1'))
def test_invalid_path():
d = Directories()
@ -211,7 +226,7 @@ def test_invalid_path():
def test_set_state_on_invalid_path():
d = Directories()
try:
d.set_state(Path('foobar',),STATE_NORMAL)
d.set_state(Path('foobar',), DirectoryState.Normal)
except LookupError:
assert False
@ -237,7 +252,7 @@ def test_unicode_save(tmpdir):
io.mkdir(p1)
io.mkdir(p1 + 'foo\xe9')
d.add_path(p1)
d.set_state(p1 + 'foo\xe9', STATE_EXCLUDED)
d.set_state(p1 + 'foo\xe9', DirectoryState.Excluded)
tmpxml = str(tmpdir.join('directories_testunit.xml'))
try:
d.save_to_file(tmpxml)
@ -268,17 +283,17 @@ def test_get_state_returns_excluded_by_default_for_hidden_directories(tmpdir):
hidden_dir_path = p + '.foo'
io.mkdir(p + '.foo')
d.add_path(p)
eq_(d.get_state(hidden_dir_path), STATE_EXCLUDED)
eq_(d.get_state(hidden_dir_path), DirectoryState.Excluded)
# But it can be overriden
d.set_state(hidden_dir_path, STATE_NORMAL)
eq_(d.get_state(hidden_dir_path), STATE_NORMAL)
d.set_state(hidden_dir_path, DirectoryState.Normal)
eq_(d.get_state(hidden_dir_path), DirectoryState.Normal)
def test_default_path_state_override(tmpdir):
# It's possible for a subclass to override the default state of a path
class MyDirectories(Directories):
def _default_state_for_path(self, path):
if 'foobar' in path:
return STATE_EXCLUDED
return DirectoryState.Excluded
d = MyDirectories()
p1 = Path(str(tmpdir))
@ -287,11 +302,11 @@ def test_default_path_state_override(tmpdir):
io.mkdir(p1 + 'foobaz')
io.open(p1 + 'foobaz/somefile', 'w').close()
d.add_path(p1)
eq_(d.get_state(p1 + 'foobaz'), STATE_NORMAL)
eq_(d.get_state(p1 + 'foobar'), STATE_EXCLUDED)
eq_(d.get_state(p1 + 'foobaz'), DirectoryState.Normal)
eq_(d.get_state(p1 + 'foobar'), DirectoryState.Excluded)
eq_(len(list(d.get_files())), 1) # only the 'foobaz' file is there
# However, the default state can be changed
d.set_state(p1 + 'foobar', STATE_NORMAL)
eq_(d.get_state(p1 + 'foobar'), STATE_NORMAL)
d.set_state(p1 + 'foobar', DirectoryState.Normal)
eq_(d.get_state(p1 + 'foobar'), DirectoryState.Normal)
eq_(len(list(d.get_files())), 2)

View File

@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
# Created By: Virgil Dupras
# Created On: 2009-10-23
# Copyright 2011 Hardcoded Software (http://www.hardcoded.net)
@ -11,14 +10,13 @@ import hashlib
from hscommon.path import Path
from hscommon.testutil import eq_
from core.fs import File
from core.tests.directories_test import create_fake_fs
from .. import fs
def test_size_aggregates_subfiles(tmpdir):
p = create_fake_fs(Path(str(tmpdir)))
b = fs.Bundle(p)
b = fs.Folder(p)
eq_(b.size, 12)
def test_md5_aggregate_subfiles_sorted(tmpdir):
@ -26,18 +24,22 @@ def test_md5_aggregate_subfiles_sorted(tmpdir):
#all files' md5 it contains, but it must make sure that it does so in the
#same order everytime.
p = create_fake_fs(Path(str(tmpdir)))
b = fs.Bundle(p)
md5s = File(p + ('dir1', 'file1.test')).md5
md5s += File(p + ('dir2', 'file2.test')).md5
md5s += File(p + ('dir3', 'file3.test')).md5
md5s += File(p + 'file1.test').md5
md5s += File(p + 'file2.test').md5
md5s += File(p + 'file3.test').md5
md5 = hashlib.md5(md5s)
b = fs.Folder(p)
md51 = fs.File(p + ('dir1', 'file1.test')).md5
md52 = fs.File(p + ('dir2', 'file2.test')).md5
md53 = fs.File(p + ('dir3', 'file3.test')).md5
md54 = fs.File(p + 'file1.test').md5
md55 = fs.File(p + 'file2.test').md5
md56 = fs.File(p + 'file3.test').md5
# The expected md5 is the md5 of md5s for folders and the direct md5 for files
folder_md51 = hashlib.md5(md51).digest()
folder_md52 = hashlib.md5(md52).digest()
folder_md53 = hashlib.md5(md53).digest()
md5 = hashlib.md5(folder_md51+folder_md52+folder_md53+md54+md55+md56)
eq_(b.md5, md5.digest())
def test_has_file_attrs(tmpdir):
#a Bundle must behave like a file, so it must have mtime attributes
b = fs.Bundle(Path(str(tmpdir)))
#a Folder must behave like a file, so it must have mtime attributes
b = fs.Folder(Path(str(tmpdir)))
assert b.mtime > 0
eq_(b.extension, '')

View File

@ -471,3 +471,27 @@ def test_dont_group_files_that_dont_exist(tmpdir):
s._getmatches = getmatches
assert not s.GetDupeGroups([file1, file2])
def test_folder_scan_exclude_subfolder_matches(fake_fileexists):
# when doing a Folders scan type, don't include matches for folders whose parent folder already
# match.
s = Scanner()
s.scan_type = ScanType.Folders
topf1 = no("top folder 1", size=42)
topf1.md5 = topf1.md5partial = b"some_md5_1"
topf1.path = Path('/topf1')
topf2 = no("top folder 2", size=42)
topf2.md5 = topf2.md5partial = b"some_md5_1"
topf2.path = Path('/topf2')
subf1 = no("sub folder 1", size=41)
subf1.md5 = subf1.md5partial = b"some_md5_2"
subf1.path = Path('/topf1/sub')
subf2 = no("sub folder 2", size=41)
subf2.md5 = subf2.md5partial = b"some_md5_2"
subf2.path = Path('/topf2/sub')
eq_(len(s.GetDupeGroups([topf1, topf2, subf1, subf2])), 1) # only top folders
# however, if another folder matches a subfolder, keep in in the matches
otherf = no("other folder", size=41)
otherf.md5 = otherf.md5partial = b"some_md5_2"
otherf.path = Path('/otherfolder')
eq_(len(s.GetDupeGroups([topf1, topf2, subf1, subf2, otherf])), 2)

View File

@ -94,7 +94,7 @@ class Directories(directories.Directories):
directories.Directories.__init__(self, fileclasses=[Photo])
try:
self.iphoto_libpath = get_iphoto_database_path()
self.set_state(self.iphoto_libpath[:-1], directories.STATE_EXCLUDED)
self.set_state(self.iphoto_libpath[:-1], directories.DirectoryState.Excluded)
except directories.InvalidPathError:
self.iphoto_libpath = None
@ -102,7 +102,7 @@ class Directories(directories.Directories):
if from_path == IPHOTO_PATH:
if self.iphoto_libpath is None:
return []
is_ref = self.get_state(from_path) == directories.STATE_REFERENCE
is_ref = self.get_state(from_path) == directories.DirectoryState.Reference
photos = get_iphoto_pictures(self.iphoto_libpath)
for photo in photos:
photo.is_ref = is_ref

View File

@ -14,9 +14,8 @@ from hscommon.cocoa.objcmin import NSWorkspace
from core import fs
from core.app_cocoa import DupeGuru as DupeGuruBase
from core.directories import Directories as DirectoriesBase, STATE_EXCLUDED
from core.directories import Directories as DirectoriesBase, DirectoryState
from . import data
from .fs import Bundle as BundleBase
def is_bundle(str_path):
sw = NSWorkspace.sharedWorkspace()
@ -25,7 +24,7 @@ def is_bundle(str_path):
logging.warning('There was an error trying to detect the UTI of %s', str_path)
return sw.type_conformsToType_(uti, 'com.apple.bundle') or sw.type_conformsToType_(uti, 'com.apple.package')
class Bundle(BundleBase):
class Bundle(fs.Folder):
@classmethod
def can_handle(cls, path):
return not io.islink(path) and io.isdir(path) and is_bundle(str(path))
@ -42,9 +41,22 @@ class Directories(DirectoriesBase):
if result is not None:
return result
if path in self.ROOT_PATH_TO_EXCLUDE:
return STATE_EXCLUDED
return DirectoryState.Excluded
if path[:2] == Path('/Users') and path[3:] in self.HOME_PATH_TO_EXCLUDE:
return STATE_EXCLUDED
return DirectoryState.Excluded
def _get_folders(self, from_folder):
# We don't want to scan bundle's subfolder even in Folders mode. Bundle's integrity has to
# stay intact.
if is_bundle(str(from_folder.path)):
# just yield the current folder and bail
state = self.get_state(from_folder.path)
from_folder.is_ref = state == DirectoryState.Reference
yield from_folder
return
else:
for folder in DirectoriesBase._get_folders(self, from_folder):
yield folder
@staticmethod
def get_subfolders(path):

View File

@ -1,41 +0,0 @@
# -*- coding: utf-8 -*-
# Created By: Virgil Dupras
# Created On: 2009-10-23
# Copyright 2011 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/bsd_license
import hashlib
from hscommon import io
from hscommon.util import nonone
from core import fs
class Bundle(fs.File):
"""This class is for Mac OSX bundles (.app). Bundles are seen by the OS as
normal directories, but I don't want that in dupeGuru. I want dupeGuru
to see them as files.
"""
def _read_info(self, field):
if field in ('size', 'mtime'):
files = fs.get_all_files(self.path)
size = sum((file.size for file in files), 0)
self.size = size
stats = io.stat(self.path)
self.mtime = nonone(stats.st_mtime, 0)
elif field in ('md5', 'md5partial'):
# What's sensitive here is that we must make sure that subfiles'
# md5 are always added up in the same order, but we also want a
# different md5 if a file gets moved in a different subdirectory.
def get_dir_md5_concat():
files = fs.get_all_files(self.path)
files.sort(key=lambda f:f.path)
md5s = [getattr(f, field) for f in files]
return b''.join(md5s)
md5 = hashlib.md5(get_dir_md5_concat())
digest = md5.digest()
setattr(self, field, digest)

View File

@ -5,6 +5,8 @@ Preferences
**Scan Type:** This option determines what aspect of the files will be compared in the duplicate scan. If you select **Filename**, dupeGuru will compare every filenames word-by-word and, depending on the other settings below, it will determine if enough words are matching to consider 2 files duplicates. If you select **Content**, only files with the exact same content will match.
The **Folders** scan type is a bit special. When you choose it, dupeGuru will scan for duplicate *folders* instead of duplicate files. To determine whether two folders are duplicates, all files contained in the folders will be scanned, and if the contents of **all** files in the folders match, the folders will be considered duplicates.
**Filter Hardness:** If you chose the **Filename** scan type, this option determines how similar two filenames must be for dupeGuru to consider them duplicates. If the filter hardness is, for example 80, it means that 80% of the words of two filenames must match. To determine the matching percentage, dupeGuru first counts the total number of words in **both** filenames, then count the number of words matching (every word matching count as 2), and then divide the number of words matching by the total number of words. If the result is higher or equal to the filter hardness, we have a duplicate match. For example, "a b c d" and "c d e" have a matching percentage of 57 (4 words matching, 7 total words).
.. only:: edition_me

View File

@ -5,6 +5,8 @@ Préférences
**Type de scan:** Cette option détermine quels aspects du fichier doit être comparé. Un scan par **Nom de fichier** compare les noms de fichiers mot-à-mot et, dépendant des autres préférences ci-dessous, déterminera si les noms se ressemblent assez pour être considérés comme doublons. Un scan par **Contenu** trouvera les doublons qui ont exactement le même contenu.
Le scan **Dossiers** est spécial. Si vous le sélectionnez, dupeGuru cherchera des doublons de *dossiers* plutôt que des doublons de fichiers. Pour déterminer si deux dossiers sont des doublons, dupeGuru regarde le contenu de tous les fichiers dans les dossiers, et si **tous** sont les mêmes, les dossiers sont considérés comme des doublons.
**Seuil du filtre:** Pour les scan de type **Nom de fichier**, cette option détermine le degré de similtude nécessaire afin de considérer deux noms comme doublons. Avec un seuil de 80, 80% des mots doivent être égaux. Pour déterminer ce pourcentage, dupeGuru compte le nombre de mots total des deux noms, puis compte le nombre de mots égaux, puis fait la division des deux. Un résultat égalisant ou dépassant le seuil sera considéré comme un doublon. Exemple: "a b c d" et "c d e" ont un pourcentage de 57 (4 mots égaux, 7 au total).
.. only:: edition_me

View File

@ -413,6 +413,10 @@
<source>Contents</source>
<translation>Contenu</translation>
</message>
<message>
<source>Folders</source>
<translation>Dossiers</translation>
</message>
<message>
<source>Filter Hardness:</source>
<translation>Seuil du filtre:</translation>

View File

@ -7,7 +7,7 @@
# http://www.hardcoded.net/licenses/bsd_license
from core_se import data, __appname__
from core.directories import Directories as DirectoriesBase, STATE_EXCLUDED
from core.directories import Directories as DirectoriesBase, DirectoryState
from ..base.app import DupeGuru as DupeGuruBase
from .details_dialog import DetailsDialog
@ -21,7 +21,7 @@ class Directories(DirectoriesBase):
if result is not None:
return result
if len(path) == 2 and path[1].lower() in self.ROOT_PATH_TO_EXCLUDE:
return STATE_EXCLUDED
return DirectoryState.Excluded
class DupeGuru(DupeGuruBase):
EDITION = 'se'

View File

@ -7,7 +7,7 @@
# http://www.hardcoded.net/licenses/bsd_license
import sys
from PyQt4.QtCore import SIGNAL, QSize
from PyQt4.QtCore import QSize
from PyQt4.QtGui import (QVBoxLayout, QHBoxLayout, QLabel, QComboBox, QSizePolicy, QSpacerItem,
QWidget, QLineEdit, QApplication)
@ -22,13 +22,14 @@ from . import preferences
SCAN_TYPE_ORDER = [
ScanType.Filename,
ScanType.Contents,
ScanType.Folders,
]
class PreferencesDialog(PreferencesDialogBase):
def __init__(self, parent, app):
PreferencesDialogBase.__init__(self, parent, app)
self.connect(self.scanTypeComboBox, SIGNAL('currentIndexChanged(int)'), self.scanTypeChanged)
self.scanTypeComboBox.currentIndexChanged[int].connect(self.scanTypeChanged)
def _setupPreferenceWidgets(self):
self.horizontalLayout = QHBoxLayout()
@ -38,8 +39,8 @@ class PreferencesDialog(PreferencesDialogBase):
self.label_2.setMaximumSize(QSize(100, 16777215))
self.horizontalLayout.addWidget(self.label_2)
self.scanTypeComboBox = QComboBox(self)
self.scanTypeComboBox.addItem(tr("Filename"))
self.scanTypeComboBox.addItem(tr("Contents"))
for label in [tr("Filename"), tr("Contents"), tr("Folders")]:
self.scanTypeComboBox.addItem(label)
self.horizontalLayout.addWidget(self.scanTypeComboBox)
self.widgetsVLayout.addLayout(self.horizontalLayout)
self._setupFilterHardnessBox()