Changed the build system (that commit is *huge*)

--HG-- rename : base/cocoa/AppDelegate.h => cocoa/base/AppDelegate.h rename : base/cocoa/AppDelegate.m => cocoa/base/AppDelegate.m rename : base/cocoa/Consts.h => cocoa/base/Consts.h rename : base/cocoa/DetailsPanel.h => cocoa/base/DetailsPanel.h rename : base/cocoa/DetailsPanel.m => cocoa/base/DetailsPanel.m rename : base/cocoa/DirectoryPanel.h => cocoa/base/DirectoryPanel.h rename : base/cocoa/DirectoryPanel.m => cocoa/base/DirectoryPanel.m rename : base/cocoa/PyDupeGuru.h => cocoa/base/PyDupeGuru.h rename : base/cocoa/ResultWindow.h => cocoa/base/ResultWindow.h rename : base/cocoa/ResultWindow.m => cocoa/base/ResultWindow.m rename : base/cocoa/dsa_pub.pem => cocoa/base/dsa_pub.pem rename : base/cocoa/xib/DetailsPanel.xib => cocoa/base/xib/DetailsPanel.xib rename : base/cocoa/xib/DirectoryPanel.xib => cocoa/base/xib/DirectoryPanel.xib rename : base/cocoa/xib/MainMenu.xib => cocoa/base/xib/MainMenu.xib rename : me/cocoa/AppDelegate.h => cocoa/me/AppDelegate.h rename : me/cocoa/AppDelegate.m => cocoa/me/AppDelegate.m rename : me/cocoa/Consts.h => cocoa/me/Consts.h rename : me/cocoa/DetailsPanel.h => cocoa/me/DetailsPanel.h rename : me/cocoa/DetailsPanel.m => cocoa/me/DetailsPanel.m rename : me/cocoa/DirectoryPanel.h => cocoa/me/DirectoryPanel.h rename : me/cocoa/DirectoryPanel.m => cocoa/me/DirectoryPanel.m rename : me/cocoa/Info.plist => cocoa/me/Info.plist rename : me/cocoa/PyDupeGuru.h => cocoa/me/PyDupeGuru.h rename : me/cocoa/ResultWindow.h => cocoa/me/ResultWindow.h rename : me/cocoa/ResultWindow.m => cocoa/me/ResultWindow.m rename : me/cocoa/dupeguru.icns => cocoa/me/dupeguru.icns rename : me/cocoa/dupeguru.xcodeproj/project.pbxproj => cocoa/me/dupeguru.xcodeproj/project.pbxproj rename : me/cocoa/gen.py => cocoa/me/gen.py rename : me/cocoa/main.m => cocoa/me/main.m rename : me/cocoa/py/dg_cocoa.py => cocoa/me/py/dg_cocoa.py rename : me/cocoa/py/setup.py => cocoa/me/py/setup.py rename : me/cocoa/xib/Preferences.xib => cocoa/me/xib/Preferences.xib rename : pe/cocoa/AppDelegate.h => cocoa/pe/AppDelegate.h rename : pe/cocoa/AppDelegate.m => cocoa/pe/AppDelegate.m rename : pe/cocoa/Consts.h => cocoa/pe/Consts.h rename : pe/cocoa/DetailsPanel.h => cocoa/pe/DetailsPanel.h rename : pe/cocoa/DetailsPanel.m => cocoa/pe/DetailsPanel.m rename : pe/cocoa/DirectoryPanel.h => cocoa/pe/DirectoryPanel.h rename : pe/cocoa/DirectoryPanel.m => cocoa/pe/DirectoryPanel.m rename : pe/cocoa/Info.plist => cocoa/pe/Info.plist rename : pe/cocoa/PictureBlocks.h => cocoa/pe/PictureBlocks.h rename : pe/cocoa/PictureBlocks.m => cocoa/pe/PictureBlocks.m rename : pe/cocoa/PyDupeGuru.h => cocoa/pe/PyDupeGuru.h rename : pe/cocoa/ResultWindow.h => cocoa/pe/ResultWindow.h rename : pe/cocoa/ResultWindow.m => cocoa/pe/ResultWindow.m rename : pe/cocoa/dupeguru.icns => cocoa/pe/dupeguru.icns rename : pe/cocoa/dupeguru.xcodeproj/project.pbxproj => cocoa/pe/dupeguru.xcodeproj/project.pbxproj rename : pe/cocoa/gen.py => cocoa/pe/gen.py rename : pe/cocoa/main.m => cocoa/pe/main.m rename : pe/cocoa/py/dg_cocoa.py => cocoa/pe/py/dg_cocoa.py rename : pe/cocoa/py/setup.py => cocoa/pe/py/setup.py rename : pe/cocoa/xib/DetailsPanel.xib => cocoa/pe/xib/DetailsPanel.xib rename : pe/cocoa/xib/Preferences.xib => cocoa/pe/xib/Preferences.xib rename : se/cocoa/AppDelegate.h => cocoa/se/AppDelegate.h rename : se/cocoa/AppDelegate.m => cocoa/se/AppDelegate.m rename : se/cocoa/Consts.h => cocoa/se/Consts.h rename : se/cocoa/DetailsPanel.h => cocoa/se/DetailsPanel.h rename : se/cocoa/DetailsPanel.m => cocoa/se/DetailsPanel.m rename : se/cocoa/DirectoryPanel.h => cocoa/se/DirectoryPanel.h rename : se/cocoa/DirectoryPanel.m => cocoa/se/DirectoryPanel.m rename : se/cocoa/Info.plist => cocoa/se/Info.plist rename : se/cocoa/PyDupeGuru.h => cocoa/se/PyDupeGuru.h rename : se/cocoa/ResultWindow.h => cocoa/se/ResultWindow.h rename : se/cocoa/ResultWindow.m => cocoa/se/ResultWindow.m rename : se/cocoa/dupeguru.icns => cocoa/se/dupeguru.icns rename : se/cocoa/dupeguru.xcodeproj/project.pbxproj => cocoa/se/dupeguru.xcodeproj/project.pbxproj rename : se/cocoa/gen.py => cocoa/se/gen.py rename : se/cocoa/main.m => cocoa/se/main.m rename : se/cocoa/py/dg_cocoa.py => cocoa/se/py/dg_cocoa.py rename : se/cocoa/py/setup.py => cocoa/se/py/setup.py rename : se/cocoa/xib/Preferences.xib => cocoa/se/xib/Preferences.xib rename : base/core/LICENSE => core/LICENSE rename : base/core/__init__.py => core/__init__.py rename : base/core/app.py => core/app.py rename : base/core/app_cocoa.py => core/app_cocoa.py rename : base/core/data.py => core/data.py rename : base/core/directories.py => core/directories.py rename : base/core/engine.py => core/engine.py rename : base/core/export.py => core/export.py rename : base/core/fs.py => core/fs.py rename : base/core/ignore.py => core/ignore.py rename : base/core/results.py => core/results.py rename : base/core/scanner.py => core/scanner.py rename : base/core/tests/__init__.py => core/tests/__init__.py rename : base/core/tests/app_cocoa_test.py => core/tests/app_cocoa_test.py rename : base/core/tests/app_test.py => core/tests/app_test.py rename : base/core/tests/data.py => core/tests/data.py rename : base/core/tests/directories_test.py => core/tests/directories_test.py rename : base/core/tests/engine_test.py => core/tests/engine_test.py rename : base/core/tests/ignore_test.py => core/tests/ignore_test.py rename : base/core/tests/results_test.py => core/tests/results_test.py rename : base/core/tests/scanner_test.py => core/tests/scanner_test.py rename : me/core/__init__.py => core_me/__init__.py rename : me/core/app_cocoa.py => core_me/app_cocoa.py rename : me/core/data.py => core_me/data.py rename : me/core/fs.py => core_me/fs.py rename : me/core/scanner.py => core_me/scanner.py rename : me/core/tests/__init__.py => core_me/tests/__init__.py rename : me/core/tests/scanner_test.py => core_me/tests/scanner_test.py rename : pe/core/LICENSE => core_pe/LICENSE rename : pe/core/__init__.py => core_pe/__init__.py rename : pe/core/app_cocoa.py => core_pe/app_cocoa.py rename : pe/core/block.py => core_pe/block.py rename : pe/core/cache.py => core_pe/cache.py rename : pe/core/data.py => core_pe/data.py rename : pe/core/gen.py => core_pe/gen.py rename : pe/core/matchbase.py => core_pe/matchbase.py rename : pe/core/modules/block/block.pyx => core_pe/modules/block/block.pyx rename : pe/core/modules/block/setup.py => core_pe/modules/block/setup.py rename : pe/core/modules/cache/cache.pyx => core_pe/modules/cache/cache.pyx rename : pe/core/modules/cache/setup.py => core_pe/modules/cache/setup.py rename : pe/core/scanner.py => core_pe/scanner.py rename : pe/core/tests/__init__.py => core_pe/tests/__init__.py rename : pe/core/tests/block_test.py => core_pe/tests/block_test.py rename : pe/core/tests/cache_test.py => core_pe/tests/cache_test.py rename : se/core/LICENSE => core_se/LICENSE rename : se/core/__init__.py => core_se/__init__.py rename : se/core/app_cocoa.py => core_se/app_cocoa.py rename : se/core/data.py => core_se/data.py rename : se/core/fs.py => core_se/fs.py rename : se/core/tests/__init__.py => core_se/tests/__init__.py rename : se/core/tests/fs_test.py => core_se/tests/fs_test.py rename : me/help/LICENSE => help_me/LICENSE rename : me/help/__init__.py => help_me/__init__.py rename : me/help/changelog.yaml => help_me/changelog.yaml rename : me/help/gen.py => help_me/gen.py rename : me/help/skeleton/hardcoded.css => help_me/skeleton/hardcoded.css rename : me/help/skeleton/images/hs_title.png => help_me/skeleton/images/hs_title.png rename : me/help/templates/base_dg.mako => help_me/templates/base_dg.mako rename : me/help/templates/credits.mako => help_me/templates/credits.mako rename : me/help/templates/directories.mako => help_me/templates/directories.mako rename : me/help/templates/faq.mako => help_me/templates/faq.mako rename : me/help/templates/intro.mako => help_me/templates/intro.mako rename : me/help/templates/power_marker.mako => help_me/templates/power_marker.mako rename : me/help/templates/preferences.mako => help_me/templates/preferences.mako rename : me/help/templates/quick_start.mako => help_me/templates/quick_start.mako rename : me/help/templates/results.mako => help_me/templates/results.mako rename : me/help/templates/versions.mako => help_me/templates/versions.mako rename : pe/help/LICENSE => help_pe/LICENSE rename : pe/help/__init__.py => help_pe/__init__.py rename : pe/help/changelog.yaml => help_pe/changelog.yaml rename : pe/help/gen.py => help_pe/gen.py rename : pe/help/skeleton/hardcoded.css => help_pe/skeleton/hardcoded.css rename : pe/help/skeleton/images/hs_title.png => help_pe/skeleton/images/hs_title.png rename : pe/help/templates/base_dg.mako => help_pe/templates/base_dg.mako rename : pe/help/templates/credits.mako => help_pe/templates/credits.mako rename : pe/help/templates/directories.mako => help_pe/templates/directories.mako rename : pe/help/templates/faq.mako => help_pe/templates/faq.mako rename : pe/help/templates/intro.mako => help_pe/templates/intro.mako rename : pe/help/templates/power_marker.mako => help_pe/templates/power_marker.mako rename : pe/help/templates/preferences.mako => help_pe/templates/preferences.mako rename : pe/help/templates/quick_start.mako => help_pe/templates/quick_start.mako rename : pe/help/templates/results.mako => help_pe/templates/results.mako rename : pe/help/templates/versions.mako => help_pe/templates/versions.mako rename : se/help/LICENSE => help_se/LICENSE rename : se/help/changelog.yaml => help_se/changelog.yaml rename : se/help/gen.py => help_se/gen.py rename : se/help/skeleton/hardcoded.css => help_se/skeleton/hardcoded.css rename : se/help/skeleton/images/hs_title.png => help_se/skeleton/images/hs_title.png rename : se/help/templates/base_dg.mako => help_se/templates/base_dg.mako rename : se/help/templates/credits.mako => help_se/templates/credits.mako rename : se/help/templates/directories.mako => help_se/templates/directories.mako rename : se/help/templates/faq.mako => help_se/templates/faq.mako rename : se/help/templates/intro.mako => help_se/templates/intro.mako rename : se/help/templates/power_marker.mako => help_se/templates/power_marker.mako rename : se/help/templates/preferences.mako => help_se/templates/preferences.mako rename : se/help/templates/quick_start.mako => help_se/templates/quick_start.mako rename : se/help/templates/results.mako => help_se/templates/results.mako rename : se/help/templates/versions.mako => help_se/templates/versions.mako rename : base/qt/WARNING => qt/WARNING rename : base/qt/__init__.py => qt/base/__init__.py rename : base/qt/app.py => qt/base/app.py rename : base/qt/details_table.py => qt/base/details_table.py rename : base/qt/dg.qrc => qt/base/dg.qrc rename : base/qt/directories_dialog.py => qt/base/directories_dialog.py rename : base/qt/directories_dialog.ui => qt/base/directories_dialog.ui rename : base/qt/directories_model.py => qt/base/directories_model.py rename : base/qt/main_window.py => qt/base/main_window.py rename : base/qt/main_window.ui => qt/base/main_window.ui rename : base/qt/platform.py => qt/base/platform.py rename : base/qt/platform_osx.py => qt/base/platform_osx.py rename : base/qt/platform_win.py => qt/base/platform_win.py rename : base/qt/preferences.py => qt/base/preferences.py rename : base/qt/results_model.py => qt/base/results_model.py rename : me/qt/app.py => qt/me/app.py rename : me/qt/build.py => qt/me/build.py rename : me/qt/details_dialog.py => qt/me/details_dialog.py rename : me/qt/details_dialog.ui => qt/me/details_dialog.ui rename : me/qt/dgme.spec => qt/me/dgme.spec rename : me/qt/gen.py => qt/me/gen.py rename : me/qt/installer.aip => qt/me/installer.aip rename : me/qt/preferences.py => qt/me/preferences.py rename : me/qt/preferences_dialog.py => qt/me/preferences_dialog.py rename : me/qt/preferences_dialog.ui => qt/me/preferences_dialog.ui rename : me/qt/profile.py => qt/me/profile.py rename : me/qt/start.py => qt/me/start.py rename : me/qt/verinfo => qt/me/verinfo rename : pe/qt/app.py => qt/pe/app.py rename : pe/qt/block.py => qt/pe/block.py rename : pe/qt/build.py => qt/pe/build.py rename : pe/qt/details_dialog.py => qt/pe/details_dialog.py rename : pe/qt/details_dialog.ui => qt/pe/details_dialog.ui rename : pe/qt/dgpe.spec => qt/pe/dgpe.spec rename : pe/qt/gen.py => qt/pe/gen.py rename : pe/qt/installer.aip => qt/pe/installer.aip rename : pe/qt/main_window.py => qt/pe/main_window.py rename : pe/qt/modules/block/block.pyx => qt/pe/modules/block/block.pyx rename : pe/qt/modules/block/setup.py => qt/pe/modules/block/setup.py rename : pe/qt/preferences.py => qt/pe/preferences.py rename : pe/qt/preferences_dialog.py => qt/pe/preferences_dialog.py rename : pe/qt/preferences_dialog.ui => qt/pe/preferences_dialog.ui rename : pe/qt/profile.py => qt/pe/profile.py rename : pe/qt/start.py => qt/pe/start.py rename : pe/qt/verinfo => qt/pe/verinfo rename : se/qt/app.py => qt/se/app.py rename : se/qt/build.py => qt/se/build.py rename : se/qt/details_dialog.py => qt/se/details_dialog.py rename : se/qt/details_dialog.ui => qt/se/details_dialog.ui rename : se/qt/dgse.spec => qt/se/dgse.spec rename : se/qt/gen.py => qt/se/gen.py rename : se/qt/installer.aip => qt/se/installer.aip rename : se/qt/preferences.py => qt/se/preferences.py rename : se/qt/preferences_dialog.py => qt/se/preferences_dialog.py rename : se/qt/preferences_dialog.ui => qt/se/preferences_dialog.ui rename : se/qt/profile.py => qt/se/profile.py rename : se/qt/start.py => qt/se/start.py rename : se/qt/verinfo => qt/se/verinfo extra : convert_revision : svn%3Ac306627e-7827-47d3-bdf0-9a457c9553a1/trunk%40285
2026-03-08 10:11:38 +00:00 · 2009-12-30 16:34:41 +00:00
parent 5645515d90
commit 838f8ae352
251 changed files with 602 additions and 500 deletions
--- a/core/tests/init.py
+++ b/core/tests/init.py
--- a/core/tests/app_cocoa_test.py
+++ b/core/tests/app_cocoa_test.py
@@ -0,0 +1,366 @@
+# Created By: Virgil Dupras
+# Created On: 2006/11/11
+# $Id$
+# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
+# 
+# This software is licensed under the "HS" License as described in the "LICENSE" file, 
+# which should be included with this package. The terms are also available at 
+# http://www.hardcoded.net/licenses/hs_license
+
+import tempfile
+import shutil
+import logging
+import os.path as op
+
+from nose.tools import eq_
+
+from hsutil.path import Path
+from hsutil.testcase import TestCase
+from hsutil.decorators import log_calls
+from hsutil import io
+
+from . import data
+from .results_test import GetTestGroups
+from .. import engine, fs
+try:
+    from ..app_cocoa import DupeGuru as DupeGuruBase
+except ImportError:
+    from nose.plugins.skip import SkipTest
+    raise SkipTest("These tests can only be run on OS X")
+
+class DupeGuru(DupeGuruBase):
+    def __init__(self):
+        DupeGuruBase.__init__(self, data, '/tmp', appid=4)
+    
+    def _start_job(self, jobid, func):
+        func(nulljob)
+    
+def r2np(rows):
+    #Transforms a list of rows [1,2,3] into a list of node paths [[1],[2],[3]]
+    return [[i] for i in rows]
+
+class TCDupeGuru(TestCase):
+    def setUp(self):
+        self.app = DupeGuru()
+        self.objects,self.matches,self.groups = GetTestGroups()
+        self.app.results.groups = self.groups
+        tmppath = self.tmppath()
+        io.mkdir(tmppath + 'foo')
+        io.mkdir(tmppath + 'bar')
+        self.app.directories.add_path(tmppath)
+    
+    def test_GetObjects(self):
+        app = self.app
+        objects = self.objects
+        groups = self.groups
+        g,d = app.GetObjects([0])
+        self.assert_(g is groups[0])
+        self.assert_(d is None)
+        g,d = app.GetObjects([0,0])
+        self.assert_(g is groups[0])
+        self.assert_(d is objects[1])
+        g,d = app.GetObjects([1,0])
+        self.assert_(g is groups[1])
+        self.assert_(d is objects[4])
+    
+    def test_GetObjects_after_sort(self):
+        app = self.app
+        objects = self.objects
+        groups = self.groups[:] #To keep the old order in memory
+        app.sort_groups(0,False) #0 = Filename
+        #Now, the group order is supposed to be reversed
+        g,d = app.GetObjects([0,0])
+        self.assert_(g is groups[1])
+        self.assert_(d is objects[4])
+    
+    def test_GetObjects_out_of_range(self):
+        app = self.app
+        self.assertEqual((None,None),app.GetObjects([2]))
+        self.assertEqual((None,None),app.GetObjects([]))
+        self.assertEqual((None,None),app.GetObjects([1,2]))
+    
+    def test_selected_result_node_paths(self):
+        # app.selected_dupes is correctly converted into node paths
+        app = self.app
+        objects = self.objects
+        paths = [[0, 0], [0, 1], [1]]
+        app.SelectResultNodePaths(paths)
+        eq_(app.selected_result_node_paths(), paths)
+    
+    def test_selected_result_node_paths_after_deletion(self):
+        # cases where the selected dupes aren't there are correctly handled
+        app = self.app
+        objects = self.objects
+        paths = [[0, 0], [0, 1], [1]]
+        app.SelectResultNodePaths(paths)
+        app.RemoveSelected()
+        # The first 2 dupes have been removed. The 3rd one is a ref. it stays there, in first pos.
+        eq_(app.selected_result_node_paths(), [[0]]) # no exception
+    
+    def test_selectResultNodePaths(self):
+        app = self.app
+        objects = self.objects
+        app.SelectResultNodePaths([[0,0],[0,1]])
+        self.assertEqual(2,len(app.selected_dupes))
+        self.assert_(app.selected_dupes[0] is objects[1])
+        self.assert_(app.selected_dupes[1] is objects[2])
+    
+    def test_selectResultNodePaths_with_ref(self):
+        app = self.app
+        objects = self.objects
+        app.SelectResultNodePaths([[0,0],[0,1],[1]])
+        self.assertEqual(3,len(app.selected_dupes))
+        self.assert_(app.selected_dupes[0] is objects[1])
+        self.assert_(app.selected_dupes[1] is objects[2])
+        self.assert_(app.selected_dupes[2] is self.groups[1].ref)
+    
+    def test_selectResultNodePaths_empty(self):
+        self.app.SelectResultNodePaths([])
+        self.assertEqual(0,len(self.app.selected_dupes))        
+    
+    def test_selectResultNodePaths_after_sort(self):
+        app = self.app
+        objects = self.objects
+        groups = self.groups[:] #To keep the old order in memory
+        app.sort_groups(0,False) #0 = Filename
+        #Now, the group order is supposed to be reversed
+        app.SelectResultNodePaths([[0,0],[1],[1,0]])
+        self.assertEqual(3,len(app.selected_dupes))
+        self.assert_(app.selected_dupes[0] is objects[4])
+        self.assert_(app.selected_dupes[1] is groups[0].ref)
+        self.assert_(app.selected_dupes[2] is objects[1])
+    
+    def test_selectResultNodePaths_out_of_range(self):
+        app = self.app
+        app.SelectResultNodePaths([[0,0],[0,1],[1],[1,1],[2]])
+        self.assertEqual(3,len(app.selected_dupes))
+    
+    def test_selected_powermarker_node_paths(self):
+        # app.selected_dupes is correctly converted into paths
+        app = self.app
+        objects = self.objects
+        paths = r2np([0, 1, 2])
+        app.SelectPowerMarkerNodePaths(paths)
+        eq_(app.selected_powermarker_node_paths(), paths)
+    
+    def test_selected_powermarker_node_paths_after_deletion(self):
+        # cases where the selected dupes aren't there are correctly handled
+        app = self.app
+        objects = self.objects
+        paths = r2np([0, 1, 2])
+        app.SelectPowerMarkerNodePaths(paths)
+        app.RemoveSelected()
+        eq_(app.selected_powermarker_node_paths(), []) # no exception
+    
+    def test_selectPowerMarkerRows(self):
+        app = self.app
+        objects = self.objects
+        app.SelectPowerMarkerNodePaths(r2np([0,1,2]))
+        self.assertEqual(3,len(app.selected_dupes))
+        self.assert_(app.selected_dupes[0] is objects[1])
+        self.assert_(app.selected_dupes[1] is objects[2])
+        self.assert_(app.selected_dupes[2] is objects[4])
+    
+    def test_selectPowerMarkerRows_empty(self):
+        self.app.SelectPowerMarkerNodePaths([])
+        self.assertEqual(0,len(self.app.selected_dupes))
+    
+    def test_selectPowerMarkerRows_after_sort(self):
+        app = self.app
+        objects = self.objects
+        app.sort_dupes(0,False) #0 = Filename
+        app.SelectPowerMarkerNodePaths(r2np([0,1,2]))
+        self.assertEqual(3,len(app.selected_dupes))
+        self.assert_(app.selected_dupes[0] is objects[4])
+        self.assert_(app.selected_dupes[1] is objects[2])
+        self.assert_(app.selected_dupes[2] is objects[1])
+    
+    def test_selectPowerMarkerRows_out_of_range(self):
+        app = self.app
+        app.SelectPowerMarkerNodePaths(r2np([0,1,2,3]))
+        self.assertEqual(3,len(app.selected_dupes))
+    
+    def test_toggleSelectedMark(self):
+        app = self.app
+        objects = self.objects
+        app.ToggleSelectedMarkState()
+        self.assertEqual(0,app.results.mark_count)
+        app.SelectPowerMarkerNodePaths(r2np([0,2]))
+        app.ToggleSelectedMarkState()
+        self.assertEqual(2,app.results.mark_count)
+        self.assert_(not app.results.is_marked(objects[0]))
+        self.assert_(app.results.is_marked(objects[1]))
+        self.assert_(not app.results.is_marked(objects[2]))
+        self.assert_(not app.results.is_marked(objects[3]))
+        self.assert_(app.results.is_marked(objects[4]))
+    
+    def test_refreshDetailsWithSelected(self):
+        def mock_refresh(dupe,group):
+            self.called = True
+            if self.app.selected_dupes:
+                self.assert_(dupe is self.app.selected_dupes[0])
+                self.assert_(group is self.app.results.get_group_of_duplicate(dupe))
+            else:
+                self.assert_(dupe is None)
+                self.assert_(group is None)
+        
+        self.app.RefreshDetailsTable = mock_refresh
+        self.called = False
+        self.app.SelectPowerMarkerNodePaths(r2np([0,2]))
+        self.app.RefreshDetailsWithSelected()
+        self.assert_(self.called)
+        self.called = False
+        self.app.SelectPowerMarkerNodePaths([])
+        self.app.RefreshDetailsWithSelected()
+        self.assert_(self.called)
+    
+    def test_makeSelectedReference(self):
+        app = self.app
+        objects = self.objects
+        groups = self.groups
+        app.SelectPowerMarkerNodePaths(r2np([0,2]))
+        app.MakeSelectedReference()
+        self.assert_(groups[0].ref is objects[1])
+        self.assert_(groups[1].ref is objects[4])
+    
+    def test_makeSelectedReference_by_selecting_two_dupes_in_the_same_group(self):
+        app = self.app
+        objects = self.objects
+        groups = self.groups
+        app.SelectPowerMarkerNodePaths(r2np([0,1,2]))
+        #Only 0 and 2 must go ref, not 1 because it is a part of the same group
+        app.MakeSelectedReference()
+        self.assert_(groups[0].ref is objects[1])
+        self.assert_(groups[1].ref is objects[4])
+    
+    def test_removeSelected(self):
+        app = self.app
+        app.SelectPowerMarkerNodePaths(r2np([0,2]))
+        app.RemoveSelected()
+        self.assertEqual(1,len(app.results.dupes))
+        app.RemoveSelected()
+        self.assertEqual(1,len(app.results.dupes))
+        app.SelectPowerMarkerNodePaths(r2np([0,2]))
+        app.RemoveSelected()
+        self.assertEqual(0,len(app.results.dupes))
+    
+    def test_addDirectory_simple(self):
+        # There's already a directory in self.app, so adding another once makes 2 of em
+        app = self.app
+        eq_(app.add_directory(self.datadirpath()), 0)
+        eq_(len(app.directories), 2)
+    
+    def test_addDirectory_already_there(self):
+        app = self.app
+        self.assertEqual(0,app.add_directory(self.datadirpath()))
+        self.assertEqual(1,app.add_directory(self.datadirpath()))
+    
+    def test_addDirectory_does_not_exist(self):
+        app = self.app
+        self.assertEqual(2,app.add_directory('/does_not_exist'))
+    
+    def test_ignore(self):
+        app = self.app
+        app.SelectPowerMarkerNodePaths(r2np([2])) #The dupe of the second, 2 sized group
+        app.AddSelectedToIgnoreList()
+        self.assertEqual(1,len(app.scanner.ignore_list))
+        app.SelectPowerMarkerNodePaths(r2np([0])) #first dupe of the 3 dupes group
+        app.AddSelectedToIgnoreList()
+        #BOTH the ref and the other dupe should have been added
+        self.assertEqual(3,len(app.scanner.ignore_list))
+    
+    def test_purgeIgnoreList(self):
+        app = self.app
+        p1 = self.filepath('zerofile')
+        p2 = self.filepath('zerofill')
+        dne = '/does_not_exist'
+        app.scanner.ignore_list.Ignore(dne,p1)
+        app.scanner.ignore_list.Ignore(p2,dne)
+        app.scanner.ignore_list.Ignore(p1,p2)
+        app.PurgeIgnoreList()
+        self.assertEqual(1,len(app.scanner.ignore_list))
+        self.assert_(app.scanner.ignore_list.AreIgnored(p1,p2))
+        self.assert_(not app.scanner.ignore_list.AreIgnored(dne,p1))
+    
+    def test_only_unicode_is_added_to_ignore_list(self):
+        def FakeIgnore(first,second):
+            if not isinstance(first,unicode):
+                self.fail()
+            if not isinstance(second,unicode):
+                self.fail()
+        
+        app = self.app
+        app.scanner.ignore_list.Ignore = FakeIgnore
+        app.SelectPowerMarkerNodePaths(r2np([2])) #The dupe of the second, 2 sized group
+        app.AddSelectedToIgnoreList()
+    
+    def test_GetOutlineViewChildCounts_out_of_range(self):
+        # Out of range requests don't crash and return an empty value
+        app = self.app
+        # [0, 2] is out of range
+        eq_(app.GetOutlineViewChildCounts(1, [0, 2]), []) # no crash
+    
+    def test_GetOutlineViewValues_out_of_range(self):
+        # Out of range requests don't crash and return an empty value
+        app = self.app
+        # [0, 2] is out of range
+        eq_(app.GetOutlineViewValues(1, [0, 2]), []) # no crash
+    
+
+class TCDupeGuru_renameSelected(TestCase):
+    def setUp(self):
+        p = self.tmppath()
+        fp = open(unicode(p + 'foo bar 1'),mode='w')
+        fp.close()
+        fp = open(unicode(p + 'foo bar 2'),mode='w')
+        fp.close()
+        fp = open(unicode(p + 'foo bar 3'),mode='w')
+        fp.close()
+        files = fs.get_files(p)
+        matches = engine.getmatches(files)
+        groups = engine.get_groups(matches)
+        g = groups[0]
+        g.prioritize(lambda x:x.name)
+        app = DupeGuru()
+        app.results.groups = groups
+        self.app = app
+        self.groups = groups
+        self.p = p
+        self.files = files
+    
+    def test_simple(self):
+        app = self.app
+        g = self.groups[0]
+        app.SelectPowerMarkerNodePaths(r2np([0]))
+        assert app.RenameSelected('renamed')
+        names = io.listdir(self.p)
+        assert 'renamed' in names
+        assert 'foo bar 2' not in names
+        eq_(g.dupes[0].name, 'renamed')
+    
+    def test_none_selected(self):
+        app = self.app
+        g = self.groups[0]
+        app.SelectPowerMarkerNodePaths([])
+        self.mock(logging, 'warning', log_calls(lambda msg: None))
+        assert not app.RenameSelected('renamed')
+        msg = logging.warning.calls[0]['msg']
+        eq_('dupeGuru Warning: list index out of range', msg)
+        names = io.listdir(self.p)
+        assert 'renamed' not in names
+        assert 'foo bar 2' in names
+        eq_(g.dupes[0].name, 'foo bar 2')
+    
+    def test_name_already_exists(self):
+        app = self.app
+        g = self.groups[0]
+        app.SelectPowerMarkerNodePaths(r2np([0]))
+        self.mock(logging, 'warning', log_calls(lambda msg: None))
+        assert not app.RenameSelected('foo bar 1')
+        msg = logging.warning.calls[0]['msg']
+        assert msg.startswith('dupeGuru Warning: \'foo bar 1\' already exists in')
+        names = io.listdir(self.p)
+        assert 'foo bar 1' in names
+        assert 'foo bar 2' in names
+        eq_(g.dupes[0].name, 'foo bar 2')
+    
--- a/core/tests/app_test.py
+++ b/core/tests/app_test.py
@@ -0,0 +1,136 @@
+# Created By: Virgil Dupras
+# Created On: 2007-06-23
+# $Id$
+# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
+# 
+# This software is licensed under the "HS" License as described in the "LICENSE" file, 
+# which should be included with this package. The terms are also available at 
+# http://www.hardcoded.net/licenses/hs_license
+
+import os
+
+from hsutil.testcase import TestCase
+from hsutil import io
+from hsutil.path import Path
+from hsutil.decorators import log_calls
+import hsutil.files
+from hsutil.job import nulljob
+
+from . import data
+from .. import app, fs
+from ..app import DupeGuru as DupeGuruBase
+
+class DupeGuru(DupeGuruBase):
+    def __init__(self):
+        DupeGuruBase.__init__(self, data, '/tmp', appid=4)
+    
+    def _start_job(self, jobid, func):
+        func(nulljob)
+    
+
+class TCDupeGuru(TestCase):
+    cls_tested_module = app
+    def test_apply_filter_calls_results_apply_filter(self):
+        app = DupeGuru()
+        self.mock(app.results, 'apply_filter', log_calls(app.results.apply_filter))
+        app.apply_filter('foo')
+        self.assertEqual(2, len(app.results.apply_filter.calls))
+        call = app.results.apply_filter.calls[0]
+        self.assert_(call['filter_str'] is None)
+        call = app.results.apply_filter.calls[1]
+        self.assertEqual('foo', call['filter_str'])
+    
+    def test_apply_filter_escapes_regexp(self):
+        app = DupeGuru()
+        self.mock(app.results, 'apply_filter', log_calls(app.results.apply_filter))
+        app.apply_filter('()[]\\.|+?^abc')
+        call = app.results.apply_filter.calls[1]
+        self.assertEqual('\\(\\)\\[\\]\\\\\\.\\|\\+\\?\\^abc', call['filter_str'])
+        app.apply_filter('(*)') # In "simple mode", we want the * to behave as a wilcard
+        call = app.results.apply_filter.calls[3]
+        self.assertEqual('\(.*\)', call['filter_str'])
+        app.options['escape_filter_regexp'] = False
+        app.apply_filter('(abc)')
+        call = app.results.apply_filter.calls[5]
+        self.assertEqual('(abc)', call['filter_str'])
+    
+    def test_copy_or_move(self):
+        # The goal here is just to have a test for a previous blowup I had. I know my test coverage
+        # for this unit is pathetic. What's done is done. My approach now is to add tests for
+        # every change I want to make. The blowup was caused by a missing import.
+        p = self.tmppath()
+        io.open(p + 'foo', 'w').close()
+        self.mock(hsutil.files, 'copy', log_calls(lambda source_path, dest_path: None))
+        self.mock(os, 'makedirs', lambda path: None) # We don't want the test to create that fake directory
+        app = DupeGuru()
+        app.directories.add_path(p)
+        [f] = app.directories.get_files()
+        app.copy_or_move(f, True, 'some_destination', 0)
+        self.assertEqual(1, len(hsutil.files.copy.calls))
+        call = hsutil.files.copy.calls[0]
+        self.assertEqual('some_destination', call['dest_path'])
+        self.assertEqual(f.path, call['source_path'])
+    
+    def test_copy_or_move_clean_empty_dirs(self):
+        tmppath = Path(self.tmpdir())
+        sourcepath = tmppath + 'source'
+        io.mkdir(sourcepath)
+        io.open(sourcepath + 'myfile', 'w')
+        app = DupeGuru()
+        app.directories.add_path(tmppath)
+        [myfile] = app.directories.get_files()
+        self.mock(app, 'clean_empty_dirs', log_calls(lambda path: None))
+        app.copy_or_move(myfile, False, tmppath + 'dest', 0)
+        calls = app.clean_empty_dirs.calls
+        self.assertEqual(1, len(calls))
+        self.assertEqual(sourcepath, calls[0]['path'])
+    
+    def test_Scan_with_objects_evaluating_to_false(self):
+        class FakeFile(fs.File):
+            def __nonzero__(self):
+                return False
+            
+        
+        # At some point, any() was used in a wrong way that made Scan() wrongly return 1
+        app = DupeGuru()
+        f1, f2 = [FakeFile('foo') for i in range(2)]
+        f1.is_ref, f2.is_ref = (False, False)
+        assert not (bool(f1) and bool(f2))
+        app.directories.get_files = lambda: [f1, f2]
+        app.directories._dirs.append('this is just so Scan() doesnt return 3')
+        app.start_scanning() # no exception
+    
+
+class TCDupeGuru_clean_empty_dirs(TestCase):
+    cls_tested_module = app
+    def setUp(self):
+        self.mock(hsutil.files, 'delete_if_empty', log_calls(lambda path, files_to_delete=[]: None))
+        self.app = DupeGuru()
+    
+    def test_option_off(self):
+        self.app.clean_empty_dirs(Path('/foo/bar'))
+        self.assertEqual(0, len(hsutil.files.delete_if_empty.calls))
+    
+    def test_option_on(self):
+        self.app.options['clean_empty_dirs'] = True
+        self.app.clean_empty_dirs(Path('/foo/bar'))
+        calls = hsutil.files.delete_if_empty.calls
+        self.assertEqual(1, len(calls))
+        self.assertEqual(Path('/foo/bar'), calls[0]['path'])
+        self.assertEqual(['.DS_Store'], calls[0]['files_to_delete'])
+    
+    def test_recurse_up(self):
+        # delete_if_empty must be recursively called up in the path until it returns False
+        @log_calls
+        def mock_delete_if_empty(path, files_to_delete=[]):
+            return len(path) > 1
+        
+        self.mock(hsutil.files, 'delete_if_empty', mock_delete_if_empty)
+        self.app.options['clean_empty_dirs'] = True
+        self.app.clean_empty_dirs(Path('not-empty/empty/empty'))
+        calls = hsutil.files.delete_if_empty.calls
+        self.assertEqual(3, len(calls))
+        self.assertEqual(Path('not-empty/empty/empty'), calls[0]['path'])
+        self.assertEqual(Path('not-empty/empty'), calls[1]['path'])
+        self.assertEqual(Path('not-empty'), calls[2]['path'])
+    
--- a/core/tests/data.py
+++ b/core/tests/data.py
@@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+# Created By: Virgil Dupras
+# Created On: 2009-10-23
+# $Id$
+# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
+# 
+# This software is licensed under the "HS" License as described in the "LICENSE" file, 
+# which should be included with this package. The terms are also available at 
+# http://www.hardcoded.net/licenses/hs_license
+
+# data module for tests
+
+from hsutil.str import format_size
+from ..data import format_path, cmp_value
+
+COLUMNS = [
+    {'attr':'name','display':'Filename'},
+    {'attr':'path','display':'Directory'},
+    {'attr':'size','display':'Size (KB)'},
+    {'attr':'extension','display':'Kind'},
+]
+
+METADATA_TO_READ = ['size']
+
+def GetDisplayInfo(dupe, group, delta):
+    size = dupe.size
+    m = group.get_match_of(dupe)
+    if m and delta:
+        r = group.ref
+        size -= r.size
+    return [
+        dupe.name,
+        format_path(dupe.path),
+        format_size(size, 0, 1, False),
+        dupe.extension,
+    ]
+
+def GetDupeSortKey(dupe, get_group, key, delta):
+    r = cmp_value(getattr(dupe, COLUMNS[key]['attr']))
+    if delta and (key == 2):
+        r -= cmp_value(getattr(get_group().ref, COLUMNS[key]['attr']))
+    return r
+
+def GetGroupSortKey(group, key):
+    return cmp_value(getattr(group.ref, COLUMNS[key]['attr']))
--- a/core/tests/directories_test.py
+++ b/core/tests/directories_test.py
@@ -0,0 +1,279 @@
+# Created By: Virgil Dupras
+# Created On: 2006/02/27
+# $Id$
+# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
+# 
+# This software is licensed under the "HS" License as described in the "LICENSE" file, 
+# which should be included with this package. The terms are also available at 
+# http://www.hardcoded.net/licenses/hs_license
+
+import os.path as op
+import os
+import time
+
+from nose.tools import eq_
+
+from hsutil import io
+from hsutil.path import Path
+from hsutil.testcase import TestCase
+
+from ..directories import *
+
+testpath = Path(TestCase.datadirpath())
+
+def create_fake_fs(rootpath):
+    rootpath = rootpath + 'fs'
+    io.mkdir(rootpath)
+    io.mkdir(rootpath + 'dir1')
+    io.mkdir(rootpath + 'dir2')
+    io.mkdir(rootpath + 'dir3')
+    fp = io.open(rootpath + 'file1.test', 'w')
+    fp.write('1')
+    fp.close()
+    fp = io.open(rootpath + 'file2.test', 'w')
+    fp.write('12')
+    fp.close()
+    fp = io.open(rootpath + 'file3.test', 'w')
+    fp.write('123')
+    fp.close()
+    fp = io.open(rootpath + ('dir1', 'file1.test'), 'w')
+    fp.write('1')
+    fp.close()
+    fp = io.open(rootpath + ('dir2', 'file2.test'), 'w')
+    fp.write('12')
+    fp.close()
+    fp = io.open(rootpath + ('dir3', 'file3.test'), 'w')
+    fp.write('123')
+    fp.close()
+    return rootpath
+
+class TCDirectories(TestCase):
+    def test_empty(self):
+        d = Directories()
+        self.assertEqual(0,len(d))
+        self.assert_('foobar' not in d)
+    
+    def test_add_path(self):
+        d = Directories()
+        p = testpath + 'utils'
+        d.add_path(p)
+        self.assertEqual(1,len(d))
+        self.assert_(p in d)
+        self.assert_((p + 'foobar') in d)
+        self.assert_(p[:-1] not in d)
+        p = self.tmppath()
+        d.add_path(p)
+        self.assertEqual(2,len(d))
+        self.assert_(p in d)
+    
+    def test_AddPath_when_path_is_already_there(self):
+        d = Directories()
+        p = testpath + 'utils'
+        d.add_path(p)
+        self.assertRaises(AlreadyThereError, d.add_path, p)
+        self.assertRaises(AlreadyThereError, d.add_path, p + 'foobar')
+        self.assertEqual(1, len(d))
+    
+    def test_add_path_containing_paths_already_there(self):
+        d = Directories()
+        d.add_path(testpath + 'utils')
+        self.assertEqual(1, len(d))
+        d.add_path(testpath)
+        eq_(len(d), 1)
+        eq_(d[0], testpath)
+    
+    def test_AddPath_non_latin(self):
+    	p = Path(self.tmpdir())
+    	to_add = p + u'unicode\u201a'
+    	os.mkdir(unicode(to_add))
+    	d = Directories()
+    	try:
+    		d.add_path(to_add)
+    	except UnicodeDecodeError:
+    		self.fail()
+    
+    def test_del(self):
+        d = Directories()
+        d.add_path(testpath + 'utils')
+        try:
+            del d[1]
+            self.fail()
+        except IndexError:
+            pass
+        d.add_path(self.tmppath())
+        del d[1]
+        self.assertEqual(1, len(d))
+    
+    def test_states(self):
+        d = Directories()
+        p = testpath + 'utils'
+        d.add_path(p)
+        self.assertEqual(STATE_NORMAL,d.get_state(p))
+        d.set_state(p,STATE_REFERENCE)
+        self.assertEqual(STATE_REFERENCE,d.get_state(p))
+        self.assertEqual(STATE_REFERENCE,d.get_state(p + 'dir1'))
+        self.assertEqual(1,len(d.states))
+        self.assertEqual(p,d.states.keys()[0])
+        self.assertEqual(STATE_REFERENCE,d.states[p])
+    
+    def test_get_state_with_path_not_there(self):
+        # When the path's not there, just return STATE_NORMAL
+        d = Directories()
+        d.add_path(testpath + 'utils')
+        eq_(d.get_state(testpath), STATE_NORMAL)
+    
+    def test_states_remain_when_larger_directory_eat_smaller_ones(self):
+        d = Directories()
+        p = testpath + 'utils'
+        d.add_path(p)
+        d.set_state(p,STATE_EXCLUDED)
+        d.add_path(testpath)
+        d.set_state(testpath,STATE_REFERENCE)
+        self.assertEqual(STATE_EXCLUDED,d.get_state(p))
+        self.assertEqual(STATE_EXCLUDED,d.get_state(p + 'dir1'))
+        self.assertEqual(STATE_REFERENCE,d.get_state(testpath))
+    
+    def test_set_state_keep_state_dict_size_to_minimum(self):
+        d = Directories()
+        p = create_fake_fs(self.tmppath())
+        d.add_path(p)
+        d.set_state(p,STATE_REFERENCE)
+        d.set_state(p + 'dir1',STATE_REFERENCE)
+        self.assertEqual(1,len(d.states))
+        self.assertEqual(STATE_REFERENCE,d.get_state(p + 'dir1'))
+        d.set_state(p + 'dir1',STATE_NORMAL)
+        self.assertEqual(2,len(d.states))
+        self.assertEqual(STATE_NORMAL,d.get_state(p + 'dir1'))
+        d.set_state(p + 'dir1',STATE_REFERENCE)
+        self.assertEqual(1,len(d.states))
+        self.assertEqual(STATE_REFERENCE,d.get_state(p + 'dir1'))
+    
+    def test_get_files(self):
+        d = Directories()
+        p = create_fake_fs(self.tmppath())
+        d.add_path(p)
+        d.set_state(p + 'dir1',STATE_REFERENCE)
+        d.set_state(p + 'dir2',STATE_EXCLUDED)
+        files = list(d.get_files())
+        self.assertEqual(5, len(files))
+        for f in files:
+            if f.path[:-1] == p + 'dir1':
+                assert f.is_ref
+            else:
+                assert not f.is_ref
+    
+    def test_get_files_with_inherited_exclusion(self):
+        d = Directories()
+        p = testpath + 'utils'
+        d.add_path(p)
+        d.set_state(p,STATE_EXCLUDED)
+        self.assertEqual([], list(d.get_files()))
+    
+    def test_save_and_load(self):
+        d1 = Directories()
+        d2 = Directories()
+        p1 = self.tmppath()
+        p2 = self.tmppath()
+        d1.add_path(p1)
+        d1.add_path(p2)
+        d1.set_state(p1, STATE_REFERENCE)
+        d1.set_state(p1 + 'dir1',STATE_EXCLUDED)
+        tmpxml = op.join(self.tmpdir(), 'directories_testunit.xml')
+        d1.save_to_file(tmpxml)
+        d2.load_from_file(tmpxml)
+        self.assertEqual(2, len(d2))
+        self.assertEqual(STATE_REFERENCE,d2.get_state(p1))
+        self.assertEqual(STATE_EXCLUDED,d2.get_state(p1 + 'dir1'))
+    
+    def test_invalid_path(self):
+        d = Directories()
+        p = Path('does_not_exist')
+        self.assertRaises(InvalidPathError, d.add_path, p)
+        self.assertEqual(0, len(d))
+    
+    def test_set_state_on_invalid_path(self):
+        d = Directories()
+        try:
+            d.set_state(Path('foobar',),STATE_NORMAL)
+        except LookupError:
+            self.fail()
+    
+    def test_load_from_file_with_invalid_path(self):
+        #This test simulates a load from file resulting in a
+        #InvalidPath raise. Other directories must be loaded.
+        d1 = Directories()
+        d1.add_path(testpath + 'utils')
+        #Will raise InvalidPath upon loading
+        p = self.tmppath()
+        d1.add_path(p)
+        io.rmdir(p)
+        tmpxml = op.join(self.tmpdir(), 'directories_testunit.xml')
+        d1.save_to_file(tmpxml)
+        d2 = Directories()
+        d2.load_from_file(tmpxml)
+        self.assertEqual(1, len(d2))
+    
+    def test_unicode_save(self):
+        d = Directories()
+        p1 = self.tmppath() + u'hello\xe9'
+        io.mkdir(p1)
+        io.mkdir(p1 + u'foo\xe9')
+        d.add_path(p1)
+        d.set_state(p1 + u'foo\xe9', STATE_EXCLUDED)
+        tmpxml = op.join(self.tmpdir(), 'directories_testunit.xml')
+        try:
+            d.save_to_file(tmpxml)
+        except UnicodeDecodeError:
+            self.fail()
+    
+    def test_get_files_refreshes_its_directories(self):
+        d = Directories()
+        p = create_fake_fs(self.tmppath())
+        d.add_path(p)
+        files = d.get_files()
+        self.assertEqual(6, len(list(files)))
+        time.sleep(1)
+        os.remove(str(p + ('dir1','file1.test')))
+        files = d.get_files()
+        self.assertEqual(5, len(list(files)))
+    
+    def test_get_files_does_not_choke_on_non_existing_directories(self):
+        d = Directories()
+        p = Path(self.tmpdir())
+        d.add_path(p)
+        io.rmtree(p)
+        self.assertEqual([], list(d.get_files()))
+    
+    def test_get_state_returns_excluded_by_default_for_hidden_directories(self):
+        d = Directories()
+        p = Path(self.tmpdir())
+        hidden_dir_path = p + '.foo'
+        io.mkdir(p + '.foo')
+        d.add_path(p)
+        self.assertEqual(d.get_state(hidden_dir_path), STATE_EXCLUDED)
+        # But it can be overriden
+        d.set_state(hidden_dir_path, STATE_NORMAL)
+        self.assertEqual(d.get_state(hidden_dir_path), STATE_NORMAL)
+    
+    def test_default_path_state_override(self):
+        # It's possible for a subclass to override the default state of a path
+        class MyDirectories(Directories):
+            def _default_state_for_path(self, path):
+                if 'foobar' in path:
+                    return STATE_EXCLUDED
+        
+        d = MyDirectories()
+        p1 = self.tmppath()
+        io.mkdir(p1 + 'foobar')
+        io.open(p1 + 'foobar/somefile', 'w').close()
+        io.mkdir(p1 + 'foobaz')
+        io.open(p1 + 'foobaz/somefile', 'w').close()
+        d.add_path(p1)
+        eq_(d.get_state(p1 + 'foobaz'), STATE_NORMAL)
+        eq_(d.get_state(p1 + 'foobar'), STATE_EXCLUDED)
+        eq_(len(list(d.get_files())), 1) # only the 'foobaz' file is there
+        # However, the default state can be changed
+        d.set_state(p1 + 'foobar', STATE_NORMAL)
+        eq_(d.get_state(p1 + 'foobar'), STATE_NORMAL)
+        eq_(len(list(d.get_files())), 2)
+    
--- a/core/tests/engine_test.py
+++ b/core/tests/engine_test.py
@@ -0,0 +1,815 @@
+# Created By: Virgil Dupras
+# Created On: 2006/01/29
+# $Id$
+# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
+# 
+# This software is licensed under the "HS" License as described in the "LICENSE" file, 
+# which should be included with this package. The terms are also available at 
+# http://www.hardcoded.net/licenses/hs_license
+
+import sys
+
+from nose.tools import eq_
+
+from hsutil import job
+from hsutil.decorators import log_calls
+from hsutil.testcase import TestCase
+
+from .. import engine, fs
+from ..engine import *
+
+class NamedObject(object):
+    def __init__(self, name="foobar", with_words=False, size=1):
+        self.name = name
+        self.size = size
+        self.md5partial = name
+        self.md5 = name
+        if with_words:
+            self.words = getwords(name)
+    
+
+no = NamedObject
+
+def get_match_triangle():
+    o1 = NamedObject(with_words=True)
+    o2 = NamedObject(with_words=True)
+    o3 = NamedObject(with_words=True)
+    m1 = get_match(o1,o2)
+    m2 = get_match(o1,o3)
+    m3 = get_match(o2,o3)
+    return [m1, m2, m3]
+
+def get_test_group():
+    m1, m2, m3 = get_match_triangle()
+    result = Group()
+    result.add_match(m1)
+    result.add_match(m2)
+    result.add_match(m3)
+    return result
+
+class TCgetwords(TestCase):
+    def test_spaces(self):
+        self.assertEqual(['a', 'b', 'c', 'd'], getwords("a b c d"))
+        self.assertEqual(['a', 'b', 'c', 'd'], getwords(" a  b  c d "))
+    
+    def test_splitter_chars(self):
+        self.assertEqual(
+            [chr(i) for i in xrange(ord('a'),ord('z')+1)],
+            getwords("a-b_c&d+e(f)g;h\\i[j]k{l}m:n.o,p<q>r/s?t~u!v@w#x$y*z")
+        )
+    
+    def test_joiner_chars(self):
+        self.assertEqual(["aec"], getwords(u"a'e\u0301c"))
+    
+    def test_empty(self):
+        self.assertEqual([], getwords(''))
+        
+    def test_returns_lowercase(self):
+        self.assertEqual(['foo', 'bar'], getwords('FOO BAR'))
+    
+    def test_decompose_unicode(self):
+        self.assertEqual(getwords(u'foo\xe9bar'), ['fooebar'])
+    
+
+class TCgetfields(TestCase):
+    def test_simple(self):
+        self.assertEqual([['a', 'b'], ['c', 'd', 'e']], getfields('a b - c d e'))
+    
+    def test_empty(self):
+        self.assertEqual([], getfields(''))
+        
+    def test_cleans_empty_fields(self):
+        expected = [['a', 'bc', 'def']]
+        actual = getfields(' - a bc def')
+        self.assertEqual(expected, actual)
+        expected = [['bc', 'def']]
+    
+
+class TCunpack_fields(TestCase):
+    def test_with_fields(self):
+        expected = ['a', 'b', 'c', 'd', 'e', 'f']
+        actual = unpack_fields([['a'], ['b', 'c'], ['d', 'e', 'f']])
+        self.assertEqual(expected, actual)
+    
+    def test_without_fields(self):
+        expected = ['a', 'b', 'c', 'd', 'e', 'f']
+        actual = unpack_fields(['a', 'b', 'c', 'd', 'e', 'f'])
+        self.assertEqual(expected, actual)
+    
+    def test_empty(self):
+        self.assertEqual([], unpack_fields([]))
+    
+
+class TCWordCompare(TestCase):
+    def test_list(self):
+        self.assertEqual(100, compare(['a', 'b', 'c', 'd'],['a', 'b', 'c', 'd']))
+        self.assertEqual(86, compare(['a', 'b', 'c', 'd'],['a', 'b', 'c']))
+    
+    def test_unordered(self):
+        #Sometimes, users don't want fuzzy matching too much When they set the slider
+        #to 100, they don't expect a filename with the same words, but not the same order, to match.
+        #Thus, we want to return 99 in that case.
+        self.assertEqual(99, compare(['a', 'b', 'c', 'd'], ['d', 'b', 'c', 'a']))
+    
+    def test_word_occurs_twice(self):
+        #if a word occurs twice in first, but once in second, we want the word to be only counted once
+        self.assertEqual(89, compare(['a', 'b', 'c', 'd', 'a'], ['d', 'b', 'c', 'a']))
+    
+    def test_uses_copy_of_lists(self):
+        first = ['foo', 'bar']
+        second = ['bar', 'bleh']
+        compare(first, second)
+        self.assertEqual(['foo', 'bar'], first)
+        self.assertEqual(['bar', 'bleh'], second)
+    
+    def test_word_weight(self):
+        self.assertEqual(int((6.0 / 13.0) * 100), compare(['foo', 'bar'], ['bar', 'bleh'], (WEIGHT_WORDS, )))
+    
+    def test_similar_words(self):
+        self.assertEqual(100, compare(['the', 'white', 'stripes'],['the', 'whites', 'stripe'], (MATCH_SIMILAR_WORDS, )))
+    
+    def test_empty(self):
+        self.assertEqual(0, compare([], []))
+    
+    def test_with_fields(self):
+        self.assertEqual(67, compare([['a', 'b'], ['c', 'd', 'e']], [['a', 'b'], ['c', 'd', 'f']]))
+    
+    def test_propagate_flags_with_fields(self):
+        def mock_compare(first, second, flags):
+            self.assertEqual((0, 1, 2, 3, 5), flags)
+        
+        self.mock(engine, 'compare_fields', mock_compare)
+        compare([['a']], [['a']], (0, 1, 2, 3, 5))
+    
+
+class TCWordCompareWithFields(TestCase):
+    def test_simple(self):
+        self.assertEqual(67, compare_fields([['a', 'b'], ['c', 'd', 'e']], [['a', 'b'], ['c', 'd', 'f']]))
+    
+    def test_empty(self):
+        self.assertEqual(0, compare_fields([], []))
+    
+    def test_different_length(self):
+        self.assertEqual(0, compare_fields([['a'], ['b']], [['a'], ['b'], ['c']]))
+    
+    def test_propagates_flags(self):
+        def mock_compare(first, second, flags):
+            self.assertEqual((0, 1, 2, 3, 5), flags)
+        
+        self.mock(engine, 'compare_fields', mock_compare)
+        compare_fields([['a']], [['a']],(0, 1, 2, 3, 5))
+    
+    def test_order(self):
+        first = [['a', 'b'], ['c', 'd', 'e']]
+        second = [['c', 'd', 'f'], ['a', 'b']]
+        self.assertEqual(0, compare_fields(first, second))
+    
+    def test_no_order(self):
+        first = [['a','b'],['c','d','e']]
+        second = [['c','d','f'],['a','b']]
+        self.assertEqual(67, compare_fields(first, second, (NO_FIELD_ORDER, )))
+        first = [['a','b'],['a','b']] #a field can only be matched once.
+        second = [['c','d','f'],['a','b']]
+        self.assertEqual(0, compare_fields(first, second, (NO_FIELD_ORDER, )))
+        first = [['a','b'],['a','b','c']] 
+        second = [['c','d','f'],['a','b']]
+        self.assertEqual(33, compare_fields(first, second, (NO_FIELD_ORDER, )))
+    
+    def test_compare_fields_without_order_doesnt_alter_fields(self):
+        #The NO_ORDER comp type altered the fields!
+        first = [['a','b'],['c','d','e']]
+        second = [['c','d','f'],['a','b']]
+        self.assertEqual(67, compare_fields(first, second, (NO_FIELD_ORDER, )))
+        self.assertEqual([['a','b'],['c','d','e']],first)
+        self.assertEqual([['c','d','f'],['a','b']],second)
+    
+
+class TCbuild_word_dict(TestCase):
+    def test_with_standard_words(self):
+        l = [NamedObject('foo bar',True)]
+        l.append(NamedObject('bar baz',True))
+        l.append(NamedObject('baz bleh foo',True))
+        d = build_word_dict(l)
+        self.assertEqual(4,len(d))
+        self.assertEqual(2,len(d['foo']))
+        self.assert_(l[0] in d['foo'])
+        self.assert_(l[2] in d['foo'])
+        self.assertEqual(2,len(d['bar']))
+        self.assert_(l[0] in d['bar'])
+        self.assert_(l[1] in d['bar'])
+        self.assertEqual(2,len(d['baz']))
+        self.assert_(l[1] in d['baz'])
+        self.assert_(l[2] in d['baz'])
+        self.assertEqual(1,len(d['bleh']))
+        self.assert_(l[2] in d['bleh'])
+    
+    def test_unpack_fields(self):
+        o = NamedObject('')
+        o.words = [['foo','bar'],['baz']]
+        d = build_word_dict([o])
+        self.assertEqual(3,len(d))
+        self.assertEqual(1,len(d['foo']))
+    
+    def test_words_are_unaltered(self):
+        o = NamedObject('')
+        o.words = [['foo','bar'],['baz']]
+        d = build_word_dict([o])
+        self.assertEqual([['foo','bar'],['baz']],o.words)
+    
+    def test_object_instances_can_only_be_once_in_words_object_list(self):
+        o = NamedObject('foo foo',True)
+        d = build_word_dict([o])
+        self.assertEqual(1,len(d['foo']))
+    
+    def test_job(self):
+        def do_progress(p,d=''):
+            self.log.append(p)
+            return True
+        
+        j = job.Job(1,do_progress)
+        self.log = []
+        s = "foo bar"
+        build_word_dict([NamedObject(s, True), NamedObject(s, True), NamedObject(s, True)], j)
+        self.assertEqual(0,self.log[0])
+        self.assertEqual(33,self.log[1])
+        self.assertEqual(66,self.log[2])
+        self.assertEqual(100,self.log[3])
+    
+
+class TCmerge_similar_words(TestCase):
+    def test_some_similar_words(self):
+        d = {
+            'foobar':set([1]),
+            'foobar1':set([2]),
+            'foobar2':set([3]),
+        }
+        merge_similar_words(d)
+        self.assertEqual(1,len(d))
+        self.assertEqual(3,len(d['foobar']))
+    
+    
+
+class TCreduce_common_words(TestCase):
+    def test_typical(self):
+        d = {
+            'foo': set([NamedObject('foo bar',True) for i in range(50)]),
+            'bar': set([NamedObject('foo bar',True) for i in range(49)])
+        }
+        reduce_common_words(d, 50)
+        self.assert_('foo' not in d)
+        self.assertEqual(49,len(d['bar']))
+    
+    def test_dont_remove_objects_with_only_common_words(self):
+        d = {
+            'common': set([NamedObject("common uncommon",True) for i in range(50)] + [NamedObject("common",True)]),
+            'uncommon': set([NamedObject("common uncommon",True)])
+        }
+        reduce_common_words(d, 50)
+        self.assertEqual(1,len(d['common']))
+        self.assertEqual(1,len(d['uncommon']))
+    
+    def test_values_still_are_set_instances(self):
+        d = {
+            'common': set([NamedObject("common uncommon",True) for i in range(50)] + [NamedObject("common",True)]),
+            'uncommon': set([NamedObject("common uncommon",True)])
+        }
+        reduce_common_words(d, 50)
+        self.assert_(isinstance(d['common'],set))
+        self.assert_(isinstance(d['uncommon'],set))
+    
+    def test_dont_raise_KeyError_when_a_word_has_been_removed(self):
+        #If a word has been removed by the reduce, an object in a subsequent common word that
+        #contains the word that has been removed would cause a KeyError.
+        d = {
+            'foo': set([NamedObject('foo bar baz',True) for i in range(50)]),
+            'bar': set([NamedObject('foo bar baz',True) for i in range(50)]),
+            'baz': set([NamedObject('foo bar baz',True) for i in range(49)])
+        }
+        try:
+            reduce_common_words(d, 50)
+        except KeyError:
+            self.fail()
+    
+    def test_unpack_fields(self):
+        #object.words may be fields.
+        def create_it():
+            o = NamedObject('')
+            o.words = [['foo','bar'],['baz']]
+            return o
+        
+        d = {
+            'foo': set([create_it() for i in range(50)])
+        }
+        try:
+            reduce_common_words(d, 50)
+        except TypeError:
+            self.fail("must support fields.")
+    
+    def test_consider_a_reduced_common_word_common_even_after_reduction(self):
+        #There was a bug in the code that causeda word that has already been reduced not to
+        #be counted as a common word for subsequent words. For example, if 'foo' is processed
+        #as a common word, keeping a "foo bar" file in it, and the 'bar' is processed, "foo bar"
+        #would not stay in 'bar' because 'foo' is not a common word anymore.
+        only_common = NamedObject('foo bar',True)
+        d = {
+            'foo': set([NamedObject('foo bar baz',True) for i in range(49)] + [only_common]),
+            'bar': set([NamedObject('foo bar baz',True) for i in range(49)] + [only_common]),
+            'baz': set([NamedObject('foo bar baz',True) for i in range(49)])
+        }
+        reduce_common_words(d, 50)
+        self.assertEqual(1,len(d['foo']))
+        self.assertEqual(1,len(d['bar']))
+        self.assertEqual(49,len(d['baz']))
+    
+
+class TCget_match(TestCase):
+    def test_simple(self):
+        o1 = NamedObject("foo bar",True)
+        o2 = NamedObject("bar bleh",True)
+        m = get_match(o1,o2)
+        self.assertEqual(50,m.percentage)
+        self.assertEqual(['foo','bar'],m.first.words)
+        self.assertEqual(['bar','bleh'],m.second.words)
+        self.assert_(m.first is o1)
+        self.assert_(m.second is o2)
+    
+    def test_in(self):
+        o1 = NamedObject("foo",True)
+        o2 = NamedObject("bar",True)
+        m = get_match(o1,o2)
+        self.assert_(o1 in m)
+        self.assert_(o2 in m)
+        self.assert_(object() not in m)
+    
+    def test_word_weight(self):
+        self.assertEqual(int((6.0 / 13.0) * 100),get_match(NamedObject("foo bar",True),NamedObject("bar bleh",True),(WEIGHT_WORDS,)).percentage)
+    
+
+class GetMatches(TestCase):
+    def test_empty(self):
+        eq_(getmatches([]), [])
+    
+    def test_simple(self):
+        l = [NamedObject("foo bar"),NamedObject("bar bleh"),NamedObject("a b c foo")]
+        r = getmatches(l)
+        self.assertEqual(2,len(r))
+        seek = [m for m in r if m.percentage == 50] #"foo bar" and "bar bleh"
+        m = seek[0]
+        self.assertEqual(['foo','bar'],m.first.words)
+        self.assertEqual(['bar','bleh'],m.second.words)
+        seek = [m for m in r if m.percentage == 33] #"foo bar" and "a b c foo"
+        m = seek[0]
+        self.assertEqual(['foo','bar'],m.first.words)
+        self.assertEqual(['a','b','c','foo'],m.second.words)
+    
+    def test_null_and_unrelated_objects(self):
+        l = [NamedObject("foo bar"),NamedObject("bar bleh"),NamedObject(""),NamedObject("unrelated object")]
+        r = getmatches(l)
+        self.assertEqual(1,len(r))
+        m = r[0]
+        self.assertEqual(50,m.percentage)
+        self.assertEqual(['foo','bar'],m.first.words)
+        self.assertEqual(['bar','bleh'],m.second.words)
+    
+    def test_twice_the_same_word(self):
+        l = [NamedObject("foo foo bar"),NamedObject("bar bleh")]
+        r = getmatches(l)
+        self.assertEqual(1,len(r))
+    
+    def test_twice_the_same_word_when_preworded(self):
+        l = [NamedObject("foo foo bar",True),NamedObject("bar bleh",True)]
+        r = getmatches(l)
+        self.assertEqual(1,len(r))
+    
+    def test_two_words_match(self):
+        l = [NamedObject("foo bar"),NamedObject("foo bar bleh")]
+        r = getmatches(l)
+        self.assertEqual(1,len(r))
+    
+    def test_match_files_with_only_common_words(self):
+        #If a word occurs more than 50 times, it is excluded from the matching process
+        #The problem with the common_word_threshold is that the files containing only common
+        #words will never be matched together. We *should* match them.
+        # This test assumes that the common word threashold const is 50
+        l = [NamedObject("foo") for i in range(50)]
+        r = getmatches(l)
+        self.assertEqual(1225,len(r))
+    
+    def test_use_words_already_there_if_there(self):
+        o1 = NamedObject('foo')
+        o2 = NamedObject('bar')
+        o2.words = ['foo']
+        eq_(1, len(getmatches([o1,o2])))
+    
+    def test_job(self):
+        def do_progress(p,d=''):
+            self.log.append(p)
+            return True
+        
+        j = job.Job(1,do_progress)
+        self.log = []
+        s = "foo bar"
+        getmatches([NamedObject(s), NamedObject(s), NamedObject(s)], j=j)
+        self.assert_(len(self.log) > 2)
+        self.assertEqual(0,self.log[0])
+        self.assertEqual(100,self.log[-1])
+    
+    def test_weight_words(self):
+        l = [NamedObject("foo bar"),NamedObject("bar bleh")]
+        m = getmatches(l, weight_words=True)[0]
+        self.assertEqual(int((6.0 / 13.0) * 100),m.percentage)
+    
+    def test_similar_word(self):
+        l = [NamedObject("foobar"),NamedObject("foobars")]
+        eq_(len(getmatches(l, match_similar_words=True)), 1)
+        eq_(getmatches(l, match_similar_words=True)[0].percentage, 100)
+        l = [NamedObject("foobar"),NamedObject("foo")]
+        eq_(len(getmatches(l, match_similar_words=True)), 0) #too far
+        l = [NamedObject("bizkit"),NamedObject("bizket")]
+        eq_(len(getmatches(l, match_similar_words=True)), 1)
+        l = [NamedObject("foobar"),NamedObject("foosbar")]
+        eq_(len(getmatches(l, match_similar_words=True)), 1)
+    
+    def test_single_object_with_similar_words(self):
+        l = [NamedObject("foo foos")]
+        eq_(len(getmatches(l, match_similar_words=True)), 0)
+    
+    def test_double_words_get_counted_only_once(self):
+        l = [NamedObject("foo bar foo bleh"),NamedObject("foo bar bleh bar")]
+        m = getmatches(l)[0]
+        self.assertEqual(75,m.percentage)
+    
+    def test_with_fields(self):
+        o1 = NamedObject("foo bar - foo bleh")
+        o2 = NamedObject("foo bar - bleh bar")
+        o1.words = getfields(o1.name)
+        o2.words = getfields(o2.name)
+        m = getmatches([o1, o2])[0]
+        self.assertEqual(50, m.percentage)
+    
+    def test_with_fields_no_order(self):
+        o1 = NamedObject("foo bar - foo bleh")
+        o2 = NamedObject("bleh bang - foo bar")
+        o1.words = getfields(o1.name)
+        o2.words = getfields(o2.name)
+        m = getmatches([o1, o2], no_field_order=True)[0]
+        eq_(m.percentage, 50)
+    
+    def test_only_match_similar_when_the_option_is_set(self):
+        l = [NamedObject("foobar"),NamedObject("foobars")]
+        eq_(len(getmatches(l, match_similar_words=False)), 0)
+    
+    def test_dont_recurse_do_match(self):
+        # with nosetests, the stack is increased. The number has to be high enough not to be failing falsely
+        sys.setrecursionlimit(100)
+        files = [NamedObject('foo bar') for i in range(101)]
+        try:
+            getmatches(files)
+        except RuntimeError:
+            self.fail()
+        finally:
+            sys.setrecursionlimit(1000)
+    
+    def test_min_match_percentage(self):
+        l = [NamedObject("foo bar"),NamedObject("bar bleh"),NamedObject("a b c foo")]
+        r = getmatches(l, min_match_percentage=50)
+        self.assertEqual(1,len(r)) #Only "foo bar" / "bar bleh" should match
+    
+    def test_MemoryError(self):
+        @log_calls
+        def mocked_match(first, second, flags):
+            if len(mocked_match.calls) > 42:
+                raise MemoryError()
+            return Match(first, second, 0)
+        
+        objects = [NamedObject() for i in range(10)] # results in 45 matches
+        self.mock(engine, 'get_match', mocked_match)
+        try:
+            r = getmatches(objects)
+        except MemoryError:
+            self.fail('MemorryError must be handled')
+        self.assertEqual(42, len(r))
+    
+
+class GetMatchesByContents(TestCase):
+    def test_dont_compare_empty_files(self):
+        o1, o2 = no(size=0), no(size=0)
+        assert not getmatches_by_contents([o1, o2])
+    
+
+class TCGroup(TestCase):
+    def test_empy(self):
+        g = Group()
+        self.assertEqual(None,g.ref)
+        self.assertEqual([],g.dupes)
+        self.assertEqual(0,len(g.matches))
+    
+    def test_add_match(self):
+        g = Group()
+        m = get_match(NamedObject("foo",True),NamedObject("bar",True))
+        g.add_match(m)
+        self.assert_(g.ref is m.first)
+        self.assertEqual([m.second],g.dupes)
+        self.assertEqual(1,len(g.matches))
+        self.assert_(m in g.matches)
+    
+    def test_multiple_add_match(self):
+        g = Group()
+        o1 = NamedObject("a",True)
+        o2 = NamedObject("b",True)
+        o3 = NamedObject("c",True)
+        o4 = NamedObject("d",True)
+        g.add_match(get_match(o1,o2))
+        self.assert_(g.ref is o1)
+        self.assertEqual([o2],g.dupes)
+        self.assertEqual(1,len(g.matches))
+        g.add_match(get_match(o1,o3))
+        self.assertEqual([o2],g.dupes)
+        self.assertEqual(2,len(g.matches))
+        g.add_match(get_match(o2,o3))
+        self.assertEqual([o2,o3],g.dupes)
+        self.assertEqual(3,len(g.matches))
+        g.add_match(get_match(o1,o4))
+        self.assertEqual([o2,o3],g.dupes)
+        self.assertEqual(4,len(g.matches))
+        g.add_match(get_match(o2,o4))
+        self.assertEqual([o2,o3],g.dupes)
+        self.assertEqual(5,len(g.matches))
+        g.add_match(get_match(o3,o4))
+        self.assertEqual([o2,o3,o4],g.dupes)
+        self.assertEqual(6,len(g.matches))
+    
+    def test_len(self):
+        g = Group()
+        self.assertEqual(0,len(g))
+        g.add_match(get_match(NamedObject("foo",True),NamedObject("bar",True)))
+        self.assertEqual(2,len(g))
+    
+    def test_add_same_match_twice(self):
+        g = Group()
+        m = get_match(NamedObject("foo",True),NamedObject("foo",True))
+        g.add_match(m)
+        self.assertEqual(2,len(g))
+        self.assertEqual(1,len(g.matches))
+        g.add_match(m)
+        self.assertEqual(2,len(g))
+        self.assertEqual(1,len(g.matches))
+    
+    def test_in(self):
+        g = Group()
+        o1 = NamedObject("foo",True)
+        o2 = NamedObject("bar",True)
+        self.assert_(o1 not in g)
+        g.add_match(get_match(o1,o2))
+        self.assert_(o1 in g)
+        self.assert_(o2 in g)
+    
+    def test_remove(self):
+        g = Group()
+        o1 = NamedObject("foo",True)
+        o2 = NamedObject("bar",True)
+        o3 = NamedObject("bleh",True)
+        g.add_match(get_match(o1,o2))
+        g.add_match(get_match(o1,o3))
+        g.add_match(get_match(o2,o3))
+        self.assertEqual(3,len(g.matches))
+        self.assertEqual(3,len(g))
+        g.remove_dupe(o3)
+        self.assertEqual(1,len(g.matches))
+        self.assertEqual(2,len(g))
+        g.remove_dupe(o1)
+        self.assertEqual(0,len(g.matches))
+        self.assertEqual(0,len(g))
+    
+    def test_remove_with_ref_dupes(self):
+        g = Group()
+        o1 = NamedObject("foo",True)
+        o2 = NamedObject("bar",True)
+        o3 = NamedObject("bleh",True)
+        g.add_match(get_match(o1,o2))
+        g.add_match(get_match(o1,o3))
+        g.add_match(get_match(o2,o3))
+        o1.is_ref = True
+        o2.is_ref = True
+        g.remove_dupe(o3)
+        self.assertEqual(0,len(g))
+    
+    def test_switch_ref(self):
+        o1 = NamedObject(with_words=True)
+        o2 = NamedObject(with_words=True)
+        g = Group()
+        g.add_match(get_match(o1,o2))
+        self.assert_(o1 is g.ref)
+        g.switch_ref(o2)
+        self.assert_(o2 is g.ref)
+        self.assertEqual([o1],g.dupes)
+        g.switch_ref(o2)
+        self.assert_(o2 is g.ref)
+        g.switch_ref(NamedObject('',True))
+        self.assert_(o2 is g.ref)
+    
+    def test_get_match_of(self):
+        g = Group()
+        for m in get_match_triangle():
+            g.add_match(m)
+        o = g.dupes[0]
+        m = g.get_match_of(o)
+        self.assert_(g.ref in m)
+        self.assert_(o in m)
+        self.assert_(g.get_match_of(NamedObject('',True)) is None)
+        self.assert_(g.get_match_of(g.ref) is None)
+    
+    def test_percentage(self):
+        #percentage should return the avg percentage in relation to the ref
+        m1,m2,m3 = get_match_triangle()
+        m1 = Match(m1[0], m1[1], 100)
+        m2 = Match(m2[0], m2[1], 50)
+        m3 = Match(m3[0], m3[1], 33)
+        g = Group()
+        g.add_match(m1)
+        g.add_match(m2)
+        g.add_match(m3)
+        self.assertEqual(75,g.percentage)
+        g.switch_ref(g.dupes[0])
+        self.assertEqual(66,g.percentage)
+        g.remove_dupe(g.dupes[0])
+        self.assertEqual(33,g.percentage)
+        g.add_match(m1)
+        g.add_match(m2)
+        self.assertEqual(66,g.percentage)
+    
+    def test_percentage_on_empty_group(self):
+        g = Group()
+        self.assertEqual(0,g.percentage)
+    
+    def test_prioritize(self):
+        m1,m2,m3 = get_match_triangle()
+        o1 = m1.first
+        o2 = m1.second
+        o3 = m2.second
+        o1.name = 'c'
+        o2.name = 'b'
+        o3.name = 'a'
+        g = Group()
+        g.add_match(m1)
+        g.add_match(m2)
+        g.add_match(m3)
+        self.assert_(o1 is g.ref)
+        g.prioritize(lambda x:x.name)
+        self.assert_(o3 is g.ref)
+    
+    def test_prioritize_with_tie_breaker(self):
+        # if the ref has the same key as one or more of the dupe, run the tie_breaker func among them
+        g = get_test_group()
+        o1, o2, o3 = g.ordered
+        tie_breaker = lambda ref, dupe: dupe is o3
+        g.prioritize(lambda x:0, tie_breaker)
+        self.assertTrue(g.ref is o3)
+    
+    def test_prioritize_with_tie_breaker_runs_on_all_dupes(self):
+        # Even if a dupe is chosen to switch with ref with a tie breaker, we still run the tie breaker 
+        # with other dupes and the newly chosen ref
+        g = get_test_group()
+        o1, o2, o3 = g.ordered
+        o1.foo = 1
+        o2.foo = 2
+        o3.foo = 3
+        tie_breaker = lambda ref, dupe: dupe.foo > ref.foo
+        g.prioritize(lambda x:0, tie_breaker)
+        self.assertTrue(g.ref is o3)
+    
+    def test_prioritize_with_tie_breaker_runs_only_on_tie_dupes(self):
+        # The tie breaker only runs on dupes that had the same value for the key_func
+        g = get_test_group()
+        o1, o2, o3 = g.ordered
+        o1.foo = 2
+        o2.foo = 2
+        o3.foo = 1
+        o1.bar = 1
+        o2.bar = 2
+        o3.bar = 3
+        key_func = lambda x: -x.foo
+        tie_breaker = lambda ref, dupe: dupe.bar > ref.bar
+        g.prioritize(key_func, tie_breaker)
+        self.assertTrue(g.ref is o2)
+    
+    def test_list_like(self):
+        g = Group()
+        o1,o2 = (NamedObject("foo",True),NamedObject("bar",True))
+        g.add_match(get_match(o1,o2))
+        self.assert_(g[0] is o1)
+        self.assert_(g[1] is o2)
+    
+    def test_discard_matches(self):
+        g = Group()
+        o1,o2,o3 = (NamedObject("foo",True),NamedObject("bar",True),NamedObject("baz",True))
+        g.add_match(get_match(o1,o2))
+        g.add_match(get_match(o1,o3))
+        g.discard_matches()
+        self.assertEqual(1,len(g.matches))
+        self.assertEqual(0,len(g.candidates))
+    
+
+class TCget_groups(TestCase):
+    def test_empty(self):
+        r = get_groups([])
+        self.assertEqual([],r)
+    
+    def test_simple(self):
+        l = [NamedObject("foo bar"),NamedObject("bar bleh")]
+        matches = getmatches(l)
+        m = matches[0]
+        r = get_groups(matches)
+        self.assertEqual(1,len(r))
+        g = r[0]
+        self.assert_(g.ref is m.first)
+        self.assertEqual([m.second],g.dupes)
+    
+    def test_group_with_multiple_matches(self):
+        #This results in 3 matches
+        l = [NamedObject("foo"),NamedObject("foo"),NamedObject("foo")]
+        matches = getmatches(l)
+        r = get_groups(matches)
+        self.assertEqual(1,len(r))
+        g = r[0]
+        self.assertEqual(3,len(g))
+    
+    def test_must_choose_a_group(self):
+        l = [NamedObject("a b"),NamedObject("a b"),NamedObject("b c"),NamedObject("c d"),NamedObject("c d")]
+        #There will be 2 groups here: group "a b" and group "c d"
+        #"b c" can go either of them, but not both.
+        matches = getmatches(l)
+        r = get_groups(matches)
+        self.assertEqual(2,len(r))
+        self.assertEqual(5,len(r[0])+len(r[1]))
+    
+    def test_should_all_go_in_the_same_group(self):
+        l = [NamedObject("a b"),NamedObject("a b"),NamedObject("a b"),NamedObject("a b")]
+        #There will be 2 groups here: group "a b" and group "c d"
+        #"b c" can fit in both, but it must be in only one of them
+        matches = getmatches(l)
+        r = get_groups(matches)
+        self.assertEqual(1,len(r))
+    
+    def test_give_priority_to_matches_with_higher_percentage(self):
+        o1 = NamedObject(with_words=True)
+        o2 = NamedObject(with_words=True)
+        o3 = NamedObject(with_words=True)
+        m1 = Match(o1, o2, 1)
+        m2 = Match(o2, o3, 2)
+        r = get_groups([m1,m2])
+        self.assertEqual(1,len(r))
+        g = r[0]
+        self.assertEqual(2,len(g))
+        self.assert_(o1 not in g)
+        self.assert_(o2 in g)
+        self.assert_(o3 in g)
+    
+    def test_four_sized_group(self):
+        l = [NamedObject("foobar") for i in xrange(4)]
+        m = getmatches(l)
+        r = get_groups(m)
+        self.assertEqual(1,len(r))
+        self.assertEqual(4,len(r[0]))
+    
+    def test_referenced_by_ref2(self):
+        o1 = NamedObject(with_words=True)
+        o2 = NamedObject(with_words=True)
+        o3 = NamedObject(with_words=True)
+        m1 = get_match(o1,o2)
+        m2 = get_match(o3,o1)
+        m3 = get_match(o3,o2)
+        r = get_groups([m1,m2,m3])
+        self.assertEqual(3,len(r[0]))
+    
+    def test_job(self):
+        def do_progress(p,d=''):
+            self.log.append(p)
+            return True
+        
+        self.log = []
+        j = job.Job(1,do_progress)
+        m1,m2,m3 = get_match_triangle()
+        #101%: To make sure it is processed first so the job test works correctly
+        m4 = Match(NamedObject('a',True), NamedObject('a',True), 101)
+        get_groups([m1,m2,m3,m4],j)
+        self.assertEqual(0,self.log[0])
+        self.assertEqual(100,self.log[-1])
+    
+    def test_group_admissible_discarded_dupes(self):
+        # If, with a (A, B, C, D) set, all match with A, but C and D don't match with B and that the
+        # (A, B) match is the highest (thus resulting in an (A, B) group), still match C and D
+        # in a separate group instead of discarding them.
+        A, B, C, D = [NamedObject() for _ in range(4)]
+        m1 = Match(A, B, 90) # This is the strongest "A" match
+        m2 = Match(A, C, 80) # Because C doesn't match with B, it won't be in the group
+        m3 = Match(A, D, 80) # Same thing for D
+        m4 = Match(C, D, 70) # However, because C and D match, they should have their own group.
+        groups = get_groups([m1, m2, m3, m4])
+        eq_(len(groups), 2)
+        g1, g2 = groups
+        assert A in g1
+        assert B in g1
+        assert C in g2
+        assert D in g2
+    
--- a/core/tests/ignore_test.py
+++ b/core/tests/ignore_test.py
@@ -0,0 +1,152 @@
+# Created By: Virgil Dupras
+# Created On: 2006/05/02
+# $Id$
+# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
+# 
+# This software is licensed under the "HS" License as described in the "LICENSE" file, 
+# which should be included with this package. The terms are also available at 
+# http://www.hardcoded.net/licenses/hs_license
+
+import cStringIO
+import xml.dom.minidom
+
+from nose.tools import eq_
+
+from ..ignore import *
+
+def test_empty():
+    il = IgnoreList()
+    eq_(0,len(il))
+    assert not il.AreIgnored('foo','bar')
+
+def test_simple():
+    il = IgnoreList()
+    il.Ignore('foo','bar')
+    assert il.AreIgnored('foo','bar')
+    assert il.AreIgnored('bar','foo')
+    assert not il.AreIgnored('foo','bleh')
+    assert not il.AreIgnored('bleh','bar')
+    eq_(1,len(il))
+
+def test_multiple():
+    il = IgnoreList()
+    il.Ignore('foo','bar')
+    il.Ignore('foo','bleh')
+    il.Ignore('bleh','bar')
+    il.Ignore('aybabtu','bleh')
+    assert il.AreIgnored('foo','bar')
+    assert il.AreIgnored('bar','foo')
+    assert il.AreIgnored('foo','bleh')
+    assert il.AreIgnored('bleh','bar')
+    assert not il.AreIgnored('aybabtu','bar')
+    eq_(4,len(il))
+
+def test_clear():
+    il = IgnoreList()
+    il.Ignore('foo','bar')
+    il.Clear()
+    assert not il.AreIgnored('foo','bar')
+    assert not il.AreIgnored('bar','foo')
+    eq_(0,len(il))
+
+def test_add_same_twice():
+    il = IgnoreList()
+    il.Ignore('foo','bar')
+    il.Ignore('bar','foo')
+    eq_(1,len(il))
+
+def test_save_to_xml():
+    il = IgnoreList()
+    il.Ignore('foo','bar')
+    il.Ignore('foo','bleh')
+    il.Ignore('bleh','bar')
+    f = cStringIO.StringIO()
+    il.save_to_xml(f)
+    f.seek(0)
+    doc = xml.dom.minidom.parse(f)
+    root = doc.documentElement
+    eq_('ignore_list',root.nodeName)
+    children = [c for c in root.childNodes if c.localName]
+    eq_(2,len(children))
+    eq_(2,len([c for c in children if c.nodeName == 'file']))
+    f1,f2 = children
+    subchildren = [c for c in f1.childNodes if c.localName == 'file'] +\
+        [c for c in f2.childNodes if c.localName == 'file']
+    eq_(3,len(subchildren))
+
+def test_SaveThenLoad():
+    il = IgnoreList()
+    il.Ignore('foo','bar')
+    il.Ignore('foo','bleh')
+    il.Ignore('bleh','bar')
+    il.Ignore(u'\u00e9','bar')
+    f = cStringIO.StringIO()
+    il.save_to_xml(f)
+    f.seek(0)
+    il = IgnoreList()
+    il.load_from_xml(f)
+    eq_(4,len(il))
+    assert il.AreIgnored(u'\u00e9','bar')
+    
+def test_LoadXML_with_empty_file_tags():
+    f = cStringIO.StringIO()
+    f.write('<?xml version="1.0" encoding="utf-8"?><ignore_list><file><file/></file></ignore_list>')
+    f.seek(0)
+    il = IgnoreList()
+    il.load_from_xml(f)
+    eq_(0,len(il))
+    
+def test_AreIgnore_works_when_a_child_is_a_key_somewhere_else():
+    il = IgnoreList()
+    il.Ignore('foo','bar')
+    il.Ignore('bar','baz')
+    assert il.AreIgnored('bar','foo')
+
+
+def test_no_dupes_when_a_child_is_a_key_somewhere_else():
+    il = IgnoreList()
+    il.Ignore('foo','bar')
+    il.Ignore('bar','baz')
+    il.Ignore('bar','foo')
+    eq_(2,len(il))
+
+def test_iterate():
+    #It must be possible to iterate through ignore list
+    il = IgnoreList()
+    expected = [('foo','bar'),('bar','baz'),('foo','baz')]
+    for i in expected:
+        il.Ignore(i[0],i[1])
+    for i in il:
+        expected.remove(i) #No exception should be raised
+    assert not expected #expected should be empty
+
+def test_filter():
+    il = IgnoreList()
+    il.Ignore('foo','bar')
+    il.Ignore('bar','baz')
+    il.Ignore('foo','baz')
+    il.Filter(lambda f,s: f == 'bar')
+    eq_(1,len(il))
+    assert not il.AreIgnored('foo','bar')
+    assert il.AreIgnored('bar','baz')
+
+def test_save_with_non_ascii_non_unicode_items():
+    il = IgnoreList()
+    il.Ignore('\xac','\xbf')
+    f = cStringIO.StringIO()
+    try:
+        il.save_to_xml(f)
+    except Exception as e:
+        raise AssertionError(unicode(e))
+
+def test_len():
+    il = IgnoreList()
+    eq_(0,len(il))
+    il.Ignore('foo','bar')
+    eq_(1,len(il))
+
+def test_nonzero():
+    il = IgnoreList()
+    assert not il
+    il.Ignore('foo','bar')
+    assert il
--- a/core/tests/results_test.py
+++ b/core/tests/results_test.py
@@ -0,0 +1,717 @@
+# Created By: Virgil Dupras
+# Created On: 2006/02/23
+# $Id$                                  
+# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
+# 
+# This software is licensed under the "HS" License as described in the "LICENSE" file, 
+# which should be included with this package. The terms are also available at 
+# http://www.hardcoded.net/licenses/hs_license
+
+import unittest
+import StringIO
+import xml.dom.minidom
+import os.path as op
+
+from hsutil.path import Path
+from hsutil.testcase import TestCase
+from hsutil.misc import first
+
+from . import engine_test, data
+from .. import engine
+from ..results import *
+
+class NamedObject(engine_test.NamedObject):
+    path = property(lambda x:Path('basepath') + x.name)
+    is_ref = False
+    
+    def __nonzero__(self):
+        return False #Make sure that operations are made correctly when the bool value of files is false.
+
+# Returns a group set that looks like that:
+# "foo bar" (1)
+#   "bar bleh" (1024)
+#   "foo bleh" (1)
+# "ibabtu" (1)
+#   "ibabtu" (1)
+def GetTestGroups():
+    objects = [NamedObject("foo bar"),NamedObject("bar bleh"),NamedObject("foo bleh"),NamedObject("ibabtu"),NamedObject("ibabtu")]
+    objects[1].size = 1024
+    matches = engine.getmatches(objects) #we should have 5 matches
+    groups = engine.get_groups(matches) #We should have 2 groups
+    for g in groups:
+        g.prioritize(lambda x:objects.index(x)) #We want the dupes to be in the same order as the list is
+    groups.sort(key=len, reverse=True) # We want the group with 3 members to be first.
+    return (objects,matches,groups)
+
+class TCResultsEmpty(TestCase):
+    def setUp(self):
+        self.results = Results(data)
+    
+    def test_apply_invalid_filter(self):
+        # If the applied filter is an invalid regexp, just ignore the filter.
+        self.results.apply_filter('[') # invalid
+        self.test_stat_line() # make sure that the stats line isn't saying we applied a '[' filter
+    
+    def test_stat_line(self):
+        self.assertEqual("0 / 0 (0.00 B / 0.00 B) duplicates marked.",self.results.stat_line)
+    
+    def test_groups(self):
+        self.assertEqual(0,len(self.results.groups))
+    
+    def test_get_group_of_duplicate(self):
+        self.assert_(self.results.get_group_of_duplicate('foo') is None)
+    
+    def test_save_to_xml(self):
+        f = StringIO.StringIO()
+        self.results.save_to_xml(f)
+        f.seek(0)
+        doc = xml.dom.minidom.parse(f)
+        root = doc.documentElement
+        self.assertEqual('results',root.nodeName)
+    
+
+class TCResultsWithSomeGroups(TestCase):
+    def setUp(self):
+        self.results = Results(data)
+        self.objects,self.matches,self.groups = GetTestGroups()
+        self.results.groups = self.groups
+    
+    def test_stat_line(self):
+        self.assertEqual("0 / 3 (0.00 B / 1.01 KB) duplicates marked.",self.results.stat_line)
+    
+    def test_groups(self):
+        self.assertEqual(2,len(self.results.groups))
+    
+    def test_get_group_of_duplicate(self):
+        for o in self.objects:
+            g = self.results.get_group_of_duplicate(o)
+            self.assert_(isinstance(g, engine.Group))
+            self.assert_(o in g)
+        self.assert_(self.results.get_group_of_duplicate(self.groups[0]) is None)
+    
+    def test_remove_duplicates(self):
+        g1,g2 = self.results.groups
+        self.results.remove_duplicates([g1.dupes[0]])
+        self.assertEqual(2,len(g1))
+        self.assert_(g1 in self.results.groups)
+        self.results.remove_duplicates([g1.ref])
+        self.assertEqual(2,len(g1))
+        self.assert_(g1 in self.results.groups)
+        self.results.remove_duplicates([g1.dupes[0]])
+        self.assertEqual(0,len(g1))
+        self.assert_(g1 not in self.results.groups)
+        self.results.remove_duplicates([g2.dupes[0]])
+        self.assertEqual(0,len(g2))
+        self.assert_(g2 not in self.results.groups)
+        self.assertEqual(0,len(self.results.groups))
+    
+    def test_remove_duplicates_with_ref_files(self):
+        g1,g2 = self.results.groups
+        self.objects[0].is_ref = True
+        self.objects[1].is_ref = True
+        self.results.remove_duplicates([self.objects[2]])
+        self.assertEqual(0,len(g1))
+        self.assert_(g1 not in self.results.groups)
+    
+    def test_make_ref(self):
+        g = self.results.groups[0]
+        d = g.dupes[0]
+        self.results.make_ref(d)
+        self.assert_(d is g.ref)
+    
+    def test_sort_groups(self):
+        self.results.make_ref(self.objects[1]) #We want to make the 1024 sized object to go ref.
+        g1,g2 = self.groups
+        self.results.sort_groups(2) #2 is the key for size
+        self.assert_(self.results.groups[0] is g2)
+        self.assert_(self.results.groups[1] is g1)
+        self.results.sort_groups(2,False)
+        self.assert_(self.results.groups[0] is g1)
+        self.assert_(self.results.groups[1] is g2)
+    
+    def test_set_groups_when_sorted(self):
+        self.results.make_ref(self.objects[1]) #We want to make the 1024 sized object to go ref.
+        self.results.sort_groups(2)
+        objects,matches,groups = GetTestGroups()
+        g1,g2 = groups
+        g1.switch_ref(objects[1])
+        self.results.groups = groups
+        self.assert_(self.results.groups[0] is g2)
+        self.assert_(self.results.groups[1] is g1)
+    
+    def test_get_dupe_list(self):
+        self.assertEqual([self.objects[1],self.objects[2],self.objects[4]],self.results.dupes)
+    
+    def test_dupe_list_is_cached(self):
+        self.assert_(self.results.dupes is self.results.dupes)
+    
+    def test_dupe_list_cache_is_invalidated_when_needed(self):
+        o1,o2,o3,o4,o5 = self.objects
+        self.assertEqual([o2,o3,o5],self.results.dupes)
+        self.results.make_ref(o2)
+        self.assertEqual([o1,o3,o5],self.results.dupes)
+        objects,matches,groups = GetTestGroups()
+        o1,o2,o3,o4,o5 = objects
+        self.results.groups = groups
+        self.assertEqual([o2,o3,o5],self.results.dupes)
+    
+    def test_dupe_list_sort(self):
+        o1,o2,o3,o4,o5 = self.objects
+        o1.size = 5
+        o2.size = 4
+        o3.size = 3
+        o4.size = 2
+        o5.size = 1
+        self.results.sort_dupes(2)
+        self.assertEqual([o5,o3,o2],self.results.dupes)
+        self.results.sort_dupes(2,False)
+        self.assertEqual([o2,o3,o5],self.results.dupes)
+    
+    def test_dupe_list_remember_sort(self):
+        o1,o2,o3,o4,o5 = self.objects
+        o1.size = 5
+        o2.size = 4
+        o3.size = 3
+        o4.size = 2
+        o5.size = 1
+        self.results.sort_dupes(2)
+        self.results.make_ref(o2)
+        self.assertEqual([o5,o3,o1],self.results.dupes)
+    
+    def test_dupe_list_sort_delta_values(self):
+        o1,o2,o3,o4,o5 = self.objects
+        o1.size = 10
+        o2.size = 2 #-8
+        o3.size = 3 #-7
+        o4.size = 20
+        o5.size = 1 #-19
+        self.results.sort_dupes(2,delta=True)
+        self.assertEqual([o5,o2,o3],self.results.dupes)
+    
+    def test_sort_empty_list(self):
+        #There was an infinite loop when sorting an empty list.
+        r = Results(data)
+        r.sort_dupes(0)
+        self.assertEqual([],r.dupes)
+    
+    def test_dupe_list_update_on_remove_duplicates(self):
+        o1,o2,o3,o4,o5 = self.objects
+        self.assertEqual(3,len(self.results.dupes))
+        self.results.remove_duplicates([o2])
+        self.assertEqual(2,len(self.results.dupes))
+    
+
+class TCResultsMarkings(TestCase):
+    def setUp(self):
+        self.results = Results(data)
+        self.objects,self.matches,self.groups = GetTestGroups()
+        self.results.groups = self.groups
+    
+    def test_stat_line(self):
+        self.assertEqual("0 / 3 (0.00 B / 1.01 KB) duplicates marked.",self.results.stat_line)
+        self.results.mark(self.objects[1])
+        self.assertEqual("1 / 3 (1.00 KB / 1.01 KB) duplicates marked.",self.results.stat_line)
+        self.results.mark_invert()
+        self.assertEqual("2 / 3 (2.00 B / 1.01 KB) duplicates marked.",self.results.stat_line)
+        self.results.mark_invert()
+        self.results.unmark(self.objects[1])
+        self.results.mark(self.objects[2])
+        self.results.mark(self.objects[4])
+        self.assertEqual("2 / 3 (2.00 B / 1.01 KB) duplicates marked.",self.results.stat_line)
+        self.results.mark(self.objects[0]) #this is a ref, it can't be counted
+        self.assertEqual("2 / 3 (2.00 B / 1.01 KB) duplicates marked.",self.results.stat_line)
+        self.results.groups = self.groups
+        self.assertEqual("0 / 3 (0.00 B / 1.01 KB) duplicates marked.",self.results.stat_line)
+    
+    def test_with_ref_duplicate(self):
+        self.objects[1].is_ref = True
+        self.results.groups = self.groups
+        self.assert_(not self.results.mark(self.objects[1]))
+        self.results.mark(self.objects[2])
+        self.assertEqual("1 / 2 (1.00 B / 2.00 B) duplicates marked.",self.results.stat_line)
+    
+    def test_perform_on_marked(self):
+        def log_object(o):
+            log.append(o)
+            return True
+        
+        log = []
+        self.results.mark_all()
+        self.results.perform_on_marked(log_object,False)
+        self.assert_(self.objects[1] in log)
+        self.assert_(self.objects[2] in log)
+        self.assert_(self.objects[4] in log)
+        self.assertEqual(3,len(log))
+        log = []
+        self.results.mark_none()
+        self.results.mark(self.objects[4])
+        self.results.perform_on_marked(log_object,True)
+        self.assertEqual(1,len(log))
+        self.assert_(self.objects[4] in log)
+        self.assertEqual(1,len(self.results.groups))
+    
+    def test_perform_on_marked_with_problems(self):
+        def log_object(o):
+            log.append(o)
+            return o is not self.objects[1]
+        
+        log = []
+        self.results.mark_all()
+        self.assert_(self.results.is_marked(self.objects[1]))
+        self.assertEqual(1,self.results.perform_on_marked(log_object, True))
+        self.assertEqual(3,len(log))
+        self.assertEqual(1,len(self.results.groups))
+        self.assertEqual(2,len(self.results.groups[0]))
+        self.assert_(self.objects[1] in self.results.groups[0])
+        self.assert_(not self.results.is_marked(self.objects[2]))
+        self.assert_(self.results.is_marked(self.objects[1]))
+    
+    def test_perform_on_marked_with_ref(self):
+        def log_object(o):
+            log.append(o)
+            return True
+        
+        log = []
+        self.objects[0].is_ref = True
+        self.objects[1].is_ref = True
+        self.results.mark_all()
+        self.results.perform_on_marked(log_object,True)
+        self.assert_(self.objects[1] not in log)
+        self.assert_(self.objects[2] in log)
+        self.assert_(self.objects[4] in log)
+        self.assertEqual(2,len(log))
+        self.assertEqual(0,len(self.results.groups))
+    
+    def test_perform_on_marked_remove_objects_only_at_the_end(self):
+        def check_groups(o):
+            self.assertEqual(3,len(g1))
+            self.assertEqual(2,len(g2))
+            return True
+        
+        g1,g2 = self.results.groups
+        self.results.mark_all()
+        self.results.perform_on_marked(check_groups,True)
+        self.assertEqual(0,len(g1))
+        self.assertEqual(0,len(g2))
+        self.assertEqual(0,len(self.results.groups))
+    
+    def test_remove_duplicates(self):
+        g1 = self.results.groups[0]
+        g2 = self.results.groups[1]
+        self.results.mark(g1.dupes[0])
+        self.assertEqual("1 / 3 (1.00 KB / 1.01 KB) duplicates marked.",self.results.stat_line)
+        self.results.remove_duplicates([g1.dupes[1]])
+        self.assertEqual("1 / 2 (1.00 KB / 1.01 KB) duplicates marked.",self.results.stat_line)
+        self.results.remove_duplicates([g1.dupes[0]])
+        self.assertEqual("0 / 1 (0.00 B / 1.00 B) duplicates marked.",self.results.stat_line)
+    
+    def test_make_ref(self):
+        g = self.results.groups[0]
+        d = g.dupes[0]
+        self.results.mark(d)
+        self.assertEqual("1 / 3 (1.00 KB / 1.01 KB) duplicates marked.",self.results.stat_line)
+        self.results.make_ref(d)
+        self.assertEqual("0 / 3 (0.00 B / 3.00 B) duplicates marked.",self.results.stat_line)
+        self.results.make_ref(d)
+        self.assertEqual("0 / 3 (0.00 B / 3.00 B) duplicates marked.",self.results.stat_line)
+    
+    def test_SaveXML(self):
+        self.results.mark(self.objects[1])
+        self.results.mark_invert()
+        f = StringIO.StringIO()
+        self.results.save_to_xml(f)
+        f.seek(0)
+        doc = xml.dom.minidom.parse(f)
+        root = doc.documentElement
+        g1,g2 = root.getElementsByTagName('group')
+        d1,d2,d3 = g1.getElementsByTagName('file')
+        self.assertEqual('n',d1.getAttributeNode('marked').nodeValue)
+        self.assertEqual('n',d2.getAttributeNode('marked').nodeValue)
+        self.assertEqual('y',d3.getAttributeNode('marked').nodeValue)
+        d1,d2 = g2.getElementsByTagName('file')
+        self.assertEqual('n',d1.getAttributeNode('marked').nodeValue)
+        self.assertEqual('y',d2.getAttributeNode('marked').nodeValue)
+    
+    def test_LoadXML(self):
+        def get_file(path):
+            return [f for f in self.objects if str(f.path) == path][0]
+        
+        self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
+        self.results.mark(self.objects[1])
+        self.results.mark_invert()
+        f = StringIO.StringIO()
+        self.results.save_to_xml(f)
+        f.seek(0)
+        r = Results(data)
+        r.load_from_xml(f,get_file)
+        self.assert_(not r.is_marked(self.objects[0]))
+        self.assert_(not r.is_marked(self.objects[1]))
+        self.assert_(r.is_marked(self.objects[2]))
+        self.assert_(not r.is_marked(self.objects[3]))
+        self.assert_(r.is_marked(self.objects[4]))
+    
+
+class TCResultsXML(TestCase):
+    def setUp(self):
+        self.results = Results(data)
+        self.objects, self.matches, self.groups = GetTestGroups()
+        self.results.groups = self.groups
+    
+    def get_file(self, path): # use this as a callback for load_from_xml
+        return [o for o in self.objects if o.path == path][0]
+    
+    def test_save_to_xml(self):
+        self.objects[0].is_ref = True
+        self.objects[0].words = [['foo','bar']]
+        f = StringIO.StringIO()
+        self.results.save_to_xml(f)
+        f.seek(0)
+        doc = xml.dom.minidom.parse(f)
+        root = doc.documentElement
+        self.assertEqual('results',root.nodeName)
+        children = [c for c in root.childNodes if c.localName]
+        self.assertEqual(2,len(children))
+        self.assertEqual(2,len([c for c in children if c.nodeName == 'group']))
+        g1,g2 = children
+        children = [c for c in g1.childNodes if c.localName]
+        self.assertEqual(6,len(children))
+        self.assertEqual(3,len([c for c in children if c.nodeName == 'file']))
+        self.assertEqual(3,len([c for c in children if c.nodeName == 'match']))
+        d1,d2,d3 = [c for c in children if c.nodeName == 'file']
+        self.assertEqual(op.join('basepath','foo bar'),d1.getAttributeNode('path').nodeValue)
+        self.assertEqual(op.join('basepath','bar bleh'),d2.getAttributeNode('path').nodeValue)
+        self.assertEqual(op.join('basepath','foo bleh'),d3.getAttributeNode('path').nodeValue)
+        self.assertEqual('y',d1.getAttributeNode('is_ref').nodeValue)
+        self.assertEqual('n',d2.getAttributeNode('is_ref').nodeValue)
+        self.assertEqual('n',d3.getAttributeNode('is_ref').nodeValue)
+        self.assertEqual('foo,bar',d1.getAttributeNode('words').nodeValue)
+        self.assertEqual('bar,bleh',d2.getAttributeNode('words').nodeValue)
+        self.assertEqual('foo,bleh',d3.getAttributeNode('words').nodeValue)
+        children = [c for c in g2.childNodes if c.localName]
+        self.assertEqual(3,len(children))
+        self.assertEqual(2,len([c for c in children if c.nodeName == 'file']))
+        self.assertEqual(1,len([c for c in children if c.nodeName == 'match']))
+        d1,d2 = [c for c in children if c.nodeName == 'file']
+        self.assertEqual(op.join('basepath','ibabtu'),d1.getAttributeNode('path').nodeValue)
+        self.assertEqual(op.join('basepath','ibabtu'),d2.getAttributeNode('path').nodeValue)
+        self.assertEqual('n',d1.getAttributeNode('is_ref').nodeValue)
+        self.assertEqual('n',d2.getAttributeNode('is_ref').nodeValue)
+        self.assertEqual('ibabtu',d1.getAttributeNode('words').nodeValue)
+        self.assertEqual('ibabtu',d2.getAttributeNode('words').nodeValue)
+    
+    def test_LoadXML(self):
+        def get_file(path):
+            return [f for f in self.objects if str(f.path) == path][0]
+        
+        self.objects[0].is_ref = True
+        self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
+        f = StringIO.StringIO()
+        self.results.save_to_xml(f)
+        f.seek(0)
+        r = Results(data)
+        r.load_from_xml(f,get_file)
+        self.assertEqual(2,len(r.groups))
+        g1,g2 = r.groups
+        self.assertEqual(3,len(g1))
+        self.assert_(g1[0].is_ref)
+        self.assert_(not g1[1].is_ref)
+        self.assert_(not g1[2].is_ref)
+        self.assert_(g1[0] is self.objects[0])
+        self.assert_(g1[1] is self.objects[1])
+        self.assert_(g1[2] is self.objects[2])
+        self.assertEqual(['foo','bar'],g1[0].words)
+        self.assertEqual(['bar','bleh'],g1[1].words)
+        self.assertEqual(['foo','bleh'],g1[2].words)
+        self.assertEqual(2,len(g2))
+        self.assert_(not g2[0].is_ref)
+        self.assert_(not g2[1].is_ref)
+        self.assert_(g2[0] is self.objects[3])
+        self.assert_(g2[1] is self.objects[4])
+        self.assertEqual(['ibabtu'],g2[0].words)
+        self.assertEqual(['ibabtu'],g2[1].words)
+    
+    def test_LoadXML_with_filename(self):
+        def get_file(path):
+            return [f for f in self.objects if str(f.path) == path][0]
+        
+        filename = op.join(self.tmpdir(), 'dupeguru_results.xml')
+        self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
+        self.results.save_to_xml(filename)
+        r = Results(data)
+        r.load_from_xml(filename,get_file)
+        self.assertEqual(2,len(r.groups))
+    
+    def test_LoadXML_with_some_files_that_dont_exist_anymore(self):
+        def get_file(path):
+            if path.endswith('ibabtu 2'):
+                return None
+            return [f for f in self.objects if str(f.path) == path][0]
+        
+        self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
+        f = StringIO.StringIO()
+        self.results.save_to_xml(f)
+        f.seek(0)
+        r = Results(data)
+        r.load_from_xml(f,get_file)
+        self.assertEqual(1,len(r.groups))
+        self.assertEqual(3,len(r.groups[0]))
+    
+    def test_LoadXML_missing_attributes_and_bogus_elements(self):
+        def get_file(path):
+            return [f for f in self.objects if str(f.path) == path][0]
+        
+        doc = xml.dom.minidom.Document()
+        root = doc.appendChild(doc.createElement('foobar')) #The root element shouldn't matter, really.
+        group_node = root.appendChild(doc.createElement('group'))
+        dupe_node = group_node.appendChild(doc.createElement('file')) #Perfectly correct file
+        dupe_node.setAttribute('path',op.join('basepath','foo bar'))
+        dupe_node.setAttribute('is_ref','y')
+        dupe_node.setAttribute('words','foo,bar')
+        dupe_node = group_node.appendChild(doc.createElement('file')) #is_ref missing, default to 'n'
+        dupe_node.setAttribute('path',op.join('basepath','foo bleh'))
+        dupe_node.setAttribute('words','foo,bleh')
+        dupe_node = group_node.appendChild(doc.createElement('file')) #words are missing, invalid.
+        dupe_node.setAttribute('path',op.join('basepath','bar bleh'))
+        dupe_node = group_node.appendChild(doc.createElement('file')) #path is missing, invalid.
+        dupe_node.setAttribute('words','foo,bleh')
+        dupe_node = group_node.appendChild(doc.createElement('foobar')) #Invalid element name
+        dupe_node.setAttribute('path',op.join('basepath','bar bleh'))
+        dupe_node.setAttribute('is_ref','y')
+        dupe_node.setAttribute('words','bar,bleh')
+        match_node = group_node.appendChild(doc.createElement('match')) # match pointing to a bad index
+        match_node.setAttribute('first', '42')
+        match_node.setAttribute('second', '45')
+        match_node = group_node.appendChild(doc.createElement('match')) # match with missing attrs
+        match_node = group_node.appendChild(doc.createElement('match')) # match with non-int values
+        match_node.setAttribute('first', 'foo')
+        match_node.setAttribute('second', 'bar')
+        match_node.setAttribute('percentage', 'baz')
+        group_node = root.appendChild(doc.createElement('foobar')) #invalid group
+        group_node = root.appendChild(doc.createElement('group')) #empty group
+        f = StringIO.StringIO()
+        doc.writexml(f,'\t','\t','\n',encoding='utf-8')
+        f.seek(0)
+        r = Results(data)
+        r.load_from_xml(f,get_file)
+        self.assertEqual(1,len(r.groups))
+        self.assertEqual(2,len(r.groups[0]))
+    
+    def test_xml_non_ascii(self):
+        def get_file(path):
+            if path == op.join('basepath',u'\xe9foo bar'):
+                return objects[0]
+            if path == op.join('basepath',u'bar bleh'):
+                return objects[1]
+        
+        objects = [NamedObject(u"\xe9foo bar",True),NamedObject("bar bleh",True)]
+        matches = engine.getmatches(objects) #we should have 5 matches
+        groups = engine.get_groups(matches) #We should have 2 groups
+        for g in groups:
+            g.prioritize(lambda x:objects.index(x)) #We want the dupes to be in the same order as the list is
+        results = Results(data)
+        results.groups = groups
+        f = StringIO.StringIO()
+        results.save_to_xml(f)
+        f.seek(0)
+        r = Results(data)
+        r.load_from_xml(f,get_file)
+        g = r.groups[0]
+        self.assertEqual(u"\xe9foo bar",g[0].name)
+        self.assertEqual(['efoo','bar'],g[0].words)
+    
+    def test_load_invalid_xml(self):
+        f = StringIO.StringIO()
+        f.write('<this is invalid')
+        f.seek(0)
+        r = Results(data)
+        r.load_from_xml(f,None)
+        self.assertEqual(0,len(r.groups))
+    
+    def test_load_non_existant_xml(self):
+        r = Results(data)
+        try:
+            r.load_from_xml('does_not_exist.xml', None)
+        except IOError:
+            self.fail()
+        self.assertEqual(0,len(r.groups))
+    
+    def test_remember_match_percentage(self):
+        group = self.groups[0]
+        d1, d2, d3 = group
+        fake_matches = set()
+        fake_matches.add(engine.Match(d1, d2, 42))
+        fake_matches.add(engine.Match(d1, d3, 43))
+        fake_matches.add(engine.Match(d2, d3, 46))
+        group.matches = fake_matches
+        f = StringIO.StringIO()
+        results = self.results
+        results.save_to_xml(f)
+        f.seek(0)
+        results = Results(data)
+        results.load_from_xml(f, self.get_file)
+        group = results.groups[0]
+        d1, d2, d3 = group
+        match = group.get_match_of(d2) #d1 - d2
+        self.assertEqual(42, match[2])
+        match = group.get_match_of(d3) #d1 - d3
+        self.assertEqual(43, match[2])
+        group.switch_ref(d2)
+        match = group.get_match_of(d3) #d2 - d3
+        self.assertEqual(46, match[2])
+    
+    def test_save_and_load(self):
+        # previously, when reloading matches, they wouldn't be reloaded as namedtuples
+        f = StringIO.StringIO()
+        self.results.save_to_xml(f)
+        f.seek(0)
+        self.results.load_from_xml(f, self.get_file)
+        first(self.results.groups[0].matches).percentage
+    
+
+class TCResultsFilter(TestCase):
+    def setUp(self):
+        self.results = Results(data)
+        self.objects, self.matches, self.groups = GetTestGroups()
+        self.results.groups = self.groups
+        self.results.apply_filter(r'foo')
+    
+    def test_groups(self):
+        self.assertEqual(1, len(self.results.groups))
+        self.assert_(self.results.groups[0] is self.groups[0])
+    
+    def test_dupes(self):
+        # There are 2 objects matching. The first one is ref. Only the 3rd one is supposed to be in dupes.
+        self.assertEqual(1, len(self.results.dupes))
+        self.assert_(self.results.dupes[0] is self.objects[2])
+    
+    def test_cancel_filter(self):
+        self.results.apply_filter(None)
+        self.assertEqual(3, len(self.results.dupes))
+        self.assertEqual(2, len(self.results.groups))
+    
+    def test_dupes_reconstructed_filtered(self):
+        # make_ref resets self.__dupes to None. When it's reconstructed, we want it filtered
+        dupe = self.results.dupes[0] #3rd object
+        self.results.make_ref(dupe)
+        self.assertEqual(1, len(self.results.dupes))
+        self.assert_(self.results.dupes[0] is self.objects[0])
+    
+    def test_include_ref_dupes_in_filter(self):
+        # When only the ref of a group match the filter, include it in the group
+        self.results.apply_filter(None)
+        self.results.apply_filter(r'foo bar')
+        self.assertEqual(1, len(self.results.groups))
+        self.assertEqual(0, len(self.results.dupes))
+    
+    def test_filters_build_on_one_another(self):
+        self.results.apply_filter(r'bar')
+        self.assertEqual(1, len(self.results.groups))
+        self.assertEqual(0, len(self.results.dupes))
+    
+    def test_stat_line(self):
+        expected = '0 / 1 (0.00 B / 1.00 B) duplicates marked. filter: foo'
+        self.assertEqual(expected, self.results.stat_line)
+        self.results.apply_filter(r'bar')
+        expected = '0 / 0 (0.00 B / 0.00 B) duplicates marked. filter: foo --> bar'
+        self.assertEqual(expected, self.results.stat_line)
+        self.results.apply_filter(None)
+        expected = '0 / 3 (0.00 B / 1.01 KB) duplicates marked.'
+        self.assertEqual(expected, self.results.stat_line)
+    
+    def test_mark_count_is_filtered_as_well(self):
+        self.results.apply_filter(None)
+        # We don't want to perform mark_all() because we want the mark list to contain objects
+        for dupe in self.results.dupes:
+            self.results.mark(dupe)
+        self.results.apply_filter(r'foo')
+        expected = '1 / 1 (1.00 B / 1.00 B) duplicates marked. filter: foo'
+        self.assertEqual(expected, self.results.stat_line)
+    
+    def test_sort_groups(self):
+        self.results.apply_filter(None)
+        self.results.make_ref(self.objects[1]) # to have the 1024 b obkect as ref
+        g1,g2 = self.groups
+        self.results.apply_filter('a') # Matches both group
+        self.results.sort_groups(2) #2 is the key for size
+        self.assert_(self.results.groups[0] is g2)
+        self.assert_(self.results.groups[1] is g1)
+        self.results.apply_filter(None)
+        self.assert_(self.results.groups[0] is g2)
+        self.assert_(self.results.groups[1] is g1)
+        self.results.sort_groups(2, False)
+        self.results.apply_filter('a')
+        self.assert_(self.results.groups[1] is g2)
+        self.assert_(self.results.groups[0] is g1)
+    
+    def test_set_group(self):
+        #We want the new group to be filtered
+        self.objects, self.matches, self.groups = GetTestGroups()
+        self.results.groups = self.groups
+        self.assertEqual(1, len(self.results.groups))
+        self.assert_(self.results.groups[0] is self.groups[0])
+    
+    def test_load_cancels_filter(self):
+        def get_file(path):
+            return [f for f in self.objects if str(f.path) == path][0]
+        
+        filename = op.join(self.tmpdir(), 'dupeguru_results.xml')
+        self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
+        self.results.save_to_xml(filename)
+        r = Results(data)
+        r.apply_filter('foo')
+        r.load_from_xml(filename,get_file)
+        self.assertEqual(2,len(r.groups))
+    
+    def test_remove_dupe(self):
+        self.results.remove_duplicates([self.results.dupes[0]])
+        self.results.apply_filter(None)
+        self.assertEqual(2,len(self.results.groups))
+        self.assertEqual(2,len(self.results.dupes))
+        self.results.apply_filter('ibabtu')
+        self.results.remove_duplicates([self.results.dupes[0]])
+        self.results.apply_filter(None)
+        self.assertEqual(1,len(self.results.groups))
+        self.assertEqual(1,len(self.results.dupes))
+    
+    def test_filter_is_case_insensitive(self):
+        self.results.apply_filter(None)
+        self.results.apply_filter('FOO')
+        self.assertEqual(1, len(self.results.dupes))
+    
+    def test_make_ref_on_filtered_out_doesnt_mess_stats(self):
+        # When filtered, a group containing filtered out dupes will display them as being reference.
+        # When calling make_ref on such a dupe, the total size and dupecount stats gets messed up
+        # because they are *not* counted in the stats in the first place.
+        g1, g2 = self.groups
+        bar_bleh = g1[1] # The "bar bleh" dupe is filtered out
+        self.results.make_ref(bar_bleh)
+        # Now the stats should display *2* markable dupes (instead of 1)
+        expected = '0 / 2 (0.00 B / 2.00 B) duplicates marked. filter: foo'
+        self.assertEqual(expected, self.results.stat_line)
+        self.results.apply_filter(None) # Now let's make sure our unfiltered results aren't fucked up
+        expected = '0 / 3 (0.00 B / 3.00 B) duplicates marked.'
+        self.assertEqual(expected, self.results.stat_line)
+    
+
+class TCResultsRefFile(TestCase):
+    def setUp(self):
+        self.results = Results(data)
+        self.objects, self.matches, self.groups = GetTestGroups()
+        self.objects[0].is_ref = True
+        self.objects[1].is_ref = True
+        self.results.groups = self.groups
+    
+    def test_stat_line(self):
+        expected = '0 / 2 (0.00 B / 2.00 B) duplicates marked.'
+        self.assertEqual(expected, self.results.stat_line)
+    
+    def test_make_ref(self):
+        d = self.results.groups[0].dupes[1] #non-ref
+        r = self.results.groups[0].ref
+        self.results.make_ref(d)
+        expected = '0 / 1 (0.00 B / 1.00 B) duplicates marked.'
+        self.assertEqual(expected, self.results.stat_line)
+        self.results.make_ref(r)
+        expected = '0 / 2 (0.00 B / 2.00 B) duplicates marked.'
+        self.assertEqual(expected, self.results.stat_line)
+    
--- a/core/tests/scanner_test.py
+++ b/core/tests/scanner_test.py
@@ -0,0 +1,467 @@
+# Created By: Virgil Dupras
+# Created On: 2006/03/03
+# $Id$
+# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
+# 
+# This software is licensed under the "HS" License as described in the "LICENSE" file, 
+# which should be included with this package. The terms are also available at 
+# http://www.hardcoded.net/licenses/hs_license
+
+from nose.tools import eq_
+
+from hsutil import job, io
+from hsutil.path import Path
+from hsutil.testcase import TestCase
+
+from .. import fs
+from ..engine import getwords, Match
+from ..ignore import IgnoreList
+from ..scanner import *
+
+class NamedObject(object):
+    def __init__(self, name="foobar", size=1):
+        self.name = name
+        self.size = size
+        self.path = Path('')
+        self.words = getwords(name)
+    
+
+no = NamedObject
+
+#--- Scanner
+class ScannerTestFakeFiles(TestCase):
+    def setUp(self):
+        # This is a hack to avoid invalidating all previous tests since the scanner started to test
+        # for file existence before doing the match grouping.
+        self.mock(io, 'exists', lambda _: True)
+    
+    def test_empty(self):
+        s = Scanner()
+        r = s.GetDupeGroups([])
+        eq_(r, [])
+    
+    def test_default_settings(self):
+        s = Scanner()
+        eq_(s.min_match_percentage, 80)
+        eq_(s.scan_type, SCAN_TYPE_FILENAME)
+        eq_(s.mix_file_kind, True)
+        eq_(s.word_weighting, False)
+        eq_(s.match_similar_words, False)
+        assert isinstance(s.ignore_list, IgnoreList)
+    
+    def test_simple_with_default_settings(self):
+        s = Scanner()
+        f = [no('foo bar'), no('foo bar'), no('foo bleh')]
+        r = s.GetDupeGroups(f)
+        eq_(len(r), 1)
+        g = r[0]
+        #'foo bleh' cannot be in the group because the default min match % is 80
+        eq_(len(g), 2)
+        assert g.ref in f[:2]
+        assert g.dupes[0] in f[:2]
+    
+    def test_simple_with_lower_min_match(self):
+        s = Scanner()
+        s.min_match_percentage = 50
+        f = [no('foo bar'), no('foo bar'), no('foo bleh')]
+        r = s.GetDupeGroups(f)
+        eq_(len(r), 1)
+        g = r[0]
+        eq_(len(g), 3)
+    
+    def test_trim_all_ref_groups(self):
+        # When all files of a group are ref, don't include that group in the results, but also don't
+        # count the files from that group as discarded.
+        s = Scanner()
+        f = [no('foo'), no('foo'), no('bar'), no('bar')]
+        f[2].is_ref = True
+        f[3].is_ref = True
+        r = s.GetDupeGroups(f)
+        eq_(len(r), 1)
+        eq_(s.discarded_file_count, 0)
+    
+    def test_priorize(self):
+        s = Scanner()
+        f = [no('foo'), no('foo'), no('bar'), no('bar')]
+        f[1].size = 2
+        f[2].size = 3
+        f[3].is_ref = True
+        r = s.GetDupeGroups(f)
+        g1, g2 = r
+        assert f[1] in (g1.ref,g2.ref)
+        assert f[0] in (g1.dupes[0],g2.dupes[0])
+        assert f[3] in (g1.ref,g2.ref)
+        assert f[2] in (g1.dupes[0],g2.dupes[0])
+    
+    def test_content_scan(self):
+        s = Scanner()
+        s.scan_type = SCAN_TYPE_CONTENT
+        f = [no('foo'), no('bar'), no('bleh')]
+        f[0].md5 = f[0].md5partial = 'foobar'
+        f[1].md5 = f[1].md5partial = 'foobar'
+        f[2].md5 = f[2].md5partial = 'bleh'
+        r = s.GetDupeGroups(f)
+        eq_(len(r), 1)
+        eq_(len(r[0]), 2)
+        eq_(s.discarded_file_count, 0) # don't count the different md5 as discarded!
+    
+    def test_content_scan_compare_sizes_first(self):
+        class MyFile(no):
+            @property
+            def md5(file):
+                raise AssertionError()
+    
+        s = Scanner()
+        s.scan_type = SCAN_TYPE_CONTENT
+        f = [MyFile('foo', 1), MyFile('bar', 2)]
+        eq_(len(s.GetDupeGroups(f)), 0)
+    
+    def test_min_match_perc_doesnt_matter_for_content_scan(self):
+        s = Scanner()
+        s.scan_type = SCAN_TYPE_CONTENT
+        f = [no('foo'), no('bar'), no('bleh')]
+        f[0].md5 = f[0].md5partial = 'foobar'
+        f[1].md5 = f[1].md5partial = 'foobar'
+        f[2].md5 = f[2].md5partial = 'bleh'
+        s.min_match_percentage = 101
+        r = s.GetDupeGroups(f)
+        eq_(len(r), 1)
+        eq_(len(r[0]), 2)
+        s.min_match_percentage = 0
+        r = s.GetDupeGroups(f)
+        eq_(len(r), 1)
+        eq_(len(r[0]), 2)
+    
+    def test_content_scan_doesnt_put_md5_in_words_at_the_end(self):
+        s = Scanner()
+        s.scan_type = SCAN_TYPE_CONTENT
+        f = [no('foo'),no('bar')]
+        f[0].md5 = f[0].md5partial = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
+        f[1].md5 = f[1].md5partial = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
+        r = s.GetDupeGroups(f)
+        g = r[0]
+    
+    def test_extension_is_not_counted_in_filename_scan(self):
+        s = Scanner()
+        s.min_match_percentage = 100
+        f = [no('foo.bar'), no('foo.bleh')]
+        r = s.GetDupeGroups(f)
+        eq_(len(r), 1)
+        eq_(len(r[0]), 2)
+    
+    def test_job(self):
+        def do_progress(progress, desc=''):
+            log.append(progress)
+            return True
+    
+        s = Scanner()
+        log = []
+        f = [no('foo bar'), no('foo bar'), no('foo bleh')]
+        r = s.GetDupeGroups(f, job.Job(1, do_progress))
+        eq_(log[0], 0)
+        eq_(log[-1], 100)
+    
+    def test_mix_file_kind(self):
+        s = Scanner()
+        s.mix_file_kind = False
+        f = [no('foo.1'), no('foo.2')]
+        r = s.GetDupeGroups(f)
+        eq_(len(r), 0)
+    
+    def test_word_weighting(self):
+        s = Scanner()
+        s.min_match_percentage = 75
+        s.word_weighting = True
+        f = [no('foo bar'), no('foo bar bleh')]
+        r = s.GetDupeGroups(f)
+        eq_(len(r), 1)
+        g = r[0]
+        m = g.get_match_of(g.dupes[0])
+        eq_(m.percentage, 75) # 16 letters, 12 matching
+    
+    def test_similar_words(self):
+        s = Scanner()
+        s.match_similar_words = True
+        f = [no('The White Stripes'), no('The Whites Stripe'), no('Limp Bizkit'), no('Limp Bizkitt')]
+        r = s.GetDupeGroups(f)
+        eq_(len(r), 2)
+    
+    def test_fields(self):
+        s = Scanner()
+        s.scan_type = SCAN_TYPE_FIELDS
+        f = [no('The White Stripes - Little Ghost'), no('The White Stripes - Little Acorn')]
+        r = s.GetDupeGroups(f)
+        eq_(len(r), 0)
+    
+    def test_fields_no_order(self):
+        s = Scanner()
+        s.scan_type = SCAN_TYPE_FIELDS_NO_ORDER
+        f = [no('The White Stripes - Little Ghost'), no('Little Ghost - The White Stripes')]
+        r = s.GetDupeGroups(f)
+        eq_(len(r), 1)
+    
+    def test_tag_scan(self):
+        s = Scanner()
+        s.scan_type = SCAN_TYPE_TAG
+        o1 = no('foo')
+        o2 = no('bar')
+        o1.artist = 'The White Stripes'
+        o1.title = 'The Air Near My Fingers'
+        o2.artist = 'The White Stripes'
+        o2.title = 'The Air Near My Fingers'
+        r = s.GetDupeGroups([o1,o2])
+        eq_(len(r), 1)
+    
+    def test_tag_with_album_scan(self):
+        s = Scanner()
+        s.scan_type = SCAN_TYPE_TAG
+        s.scanned_tags = set(['artist', 'album', 'title'])
+        o1 = no('foo')
+        o2 = no('bar')
+        o3 = no('bleh')
+        o1.artist = 'The White Stripes'
+        o1.title = 'The Air Near My Fingers'
+        o1.album = 'Elephant'
+        o2.artist = 'The White Stripes'
+        o2.title = 'The Air Near My Fingers'
+        o2.album = 'Elephant'
+        o3.artist = 'The White Stripes'
+        o3.title = 'The Air Near My Fingers'
+        o3.album = 'foobar'
+        r = s.GetDupeGroups([o1,o2,o3])
+        eq_(len(r), 1)
+    
+    def test_that_dash_in_tags_dont_create_new_fields(self):
+        s = Scanner()
+        s.scan_type = SCAN_TYPE_TAG
+        s.scanned_tags = set(['artist', 'album', 'title'])
+        s.min_match_percentage = 50
+        o1 = no('foo')
+        o2 = no('bar')
+        o1.artist = 'The White Stripes - a'
+        o1.title = 'The Air Near My Fingers - a'
+        o1.album = 'Elephant - a'
+        o2.artist = 'The White Stripes - b'
+        o2.title = 'The Air Near My Fingers - b'
+        o2.album = 'Elephant - b'
+        r = s.GetDupeGroups([o1,o2])
+        eq_(len(r), 1)
+    
+    def test_tag_scan_with_different_scanned(self):
+        s = Scanner()
+        s.scan_type = SCAN_TYPE_TAG
+        s.scanned_tags = set(['track', 'year'])
+        o1 = no('foo')
+        o2 = no('bar')
+        o1.artist = 'The White Stripes'
+        o1.title = 'some title'
+        o1.track = 'foo'
+        o1.year = 'bar'
+        o2.artist = 'The White Stripes'
+        o2.title = 'another title'
+        o2.track = 'foo'
+        o2.year = 'bar'
+        r = s.GetDupeGroups([o1, o2])
+        eq_(len(r), 1)
+    
+    def test_tag_scan_only_scans_existing_tags(self):
+        s = Scanner()
+        s.scan_type = SCAN_TYPE_TAG
+        s.scanned_tags = set(['artist', 'foo'])
+        o1 = no('foo')
+        o2 = no('bar')
+        o1.artist = 'The White Stripes'
+        o1.foo = 'foo'
+        o2.artist = 'The White Stripes'
+        o2.foo = 'bar'
+        r = s.GetDupeGroups([o1, o2])
+        eq_(len(r), 1) # Because 'foo' is not scanned, they match
+    
+    def test_tag_scan_converts_to_str(self):
+        s = Scanner()
+        s.scan_type = SCAN_TYPE_TAG
+        s.scanned_tags = set(['track'])
+        o1 = no('foo')
+        o2 = no('bar')
+        o1.track = 42
+        o2.track = 42
+        try:
+            r = s.GetDupeGroups([o1, o2])
+        except TypeError:
+            raise AssertionError()
+        eq_(len(r), 1)
+    
+    def test_tag_scan_non_ascii(self):
+        s = Scanner()
+        s.scan_type = SCAN_TYPE_TAG
+        s.scanned_tags = set(['title'])
+        o1 = no('foo')
+        o2 = no('bar')
+        o1.title = u'foobar\u00e9'
+        o2.title = u'foobar\u00e9'
+        try:
+            r = s.GetDupeGroups([o1, o2])
+        except UnicodeEncodeError:
+            raise AssertionError()
+        eq_(len(r), 1)
+    
+    def test_audio_content_scan(self):
+        s = Scanner()
+        s.scan_type = SCAN_TYPE_CONTENT_AUDIO
+        f = [no('foo'), no('bar'), no('bleh')]
+        f[0].md5 = 'foo'
+        f[1].md5 = 'bar'
+        f[2].md5 = 'bleh'
+        f[0].md5partial = 'foo'
+        f[1].md5partial = 'foo'
+        f[2].md5partial = 'bleh'
+        f[0].audiosize = 1
+        f[1].audiosize = 1
+        f[2].audiosize = 1
+        r = s.GetDupeGroups(f)
+        eq_(len(r), 1)
+        eq_(len(r[0]), 2)
+    
+    def test_audio_content_scan_compare_sizes_first(self):
+        class MyFile(no):
+            @property
+            def md5partial(file):
+                raise AssertionError()
+    
+        s = Scanner()
+        s.scan_type = SCAN_TYPE_CONTENT_AUDIO
+        f = [MyFile('foo'), MyFile('bar')]
+        f[0].audiosize = 1
+        f[1].audiosize = 2
+        eq_(len(s.GetDupeGroups(f)), 0)
+    
+    def test_ignore_list(self):
+        s = Scanner()
+        f1 = no('foobar')
+        f2 = no('foobar')
+        f3 = no('foobar')
+        f1.path = Path('dir1/foobar')
+        f2.path = Path('dir2/foobar')
+        f3.path = Path('dir3/foobar')
+        s.ignore_list.Ignore(str(f1.path),str(f2.path))
+        s.ignore_list.Ignore(str(f1.path),str(f3.path))
+        r = s.GetDupeGroups([f1,f2,f3])
+        eq_(len(r), 1)
+        g = r[0]
+        eq_(len(g.dupes), 1)
+        assert f1 not in g
+        assert f2 in g
+        assert f3 in g
+        # Ignored matches are not counted as discarded
+        eq_(s.discarded_file_count, 0)
+    
+    def test_ignore_list_checks_for_unicode(self):
+        #scanner was calling path_str for ignore list checks. Since the Path changes, it must
+        #be unicode(path)
+        s = Scanner()
+        f1 = no('foobar')
+        f2 = no('foobar')
+        f3 = no('foobar')
+        f1.path = Path(u'foo1\u00e9')
+        f2.path = Path(u'foo2\u00e9')
+        f3.path = Path(u'foo3\u00e9')
+        s.ignore_list.Ignore(unicode(f1.path),unicode(f2.path))
+        s.ignore_list.Ignore(unicode(f1.path),unicode(f3.path))
+        r = s.GetDupeGroups([f1,f2,f3])
+        eq_(len(r), 1)
+        g = r[0]
+        eq_(len(g.dupes), 1)
+        assert f1 not in g
+        assert f2 in g
+        assert f3 in g
+    
+    def test_file_evaluates_to_false(self):
+        # A very wrong way to use any() was added at some point, causing resulting group list
+        # to be empty.
+        class FalseNamedObject(NamedObject):
+            def __nonzero__(self):
+                return False
+        
+    
+        s = Scanner()
+        f1 = FalseNamedObject('foobar')
+        f2 = FalseNamedObject('foobar')
+        r = s.GetDupeGroups([f1, f2])
+        eq_(len(r), 1)
+    
+    def test_size_threshold(self):
+        # Only file equal or higher than the size_threshold in size are scanned
+        s = Scanner()
+        f1 = no('foo', 1)
+        f2 = no('foo', 2)
+        f3 = no('foo', 3)
+        s.size_threshold = 2
+        groups = s.GetDupeGroups([f1,f2,f3])
+        eq_(len(groups), 1)
+        [group] = groups
+        eq_(len(group), 2)
+        assert f1 not in group
+        assert f2 in group
+        assert f3 in group
+    
+    def test_tie_breaker_path_deepness(self):
+        # If there is a tie in prioritization, path deepness is used as a tie breaker
+        s = Scanner()
+        o1, o2 = no('foo'), no('foo')
+        o1.path = Path('foo')
+        o2.path = Path('foo/bar')
+        [group] = s.GetDupeGroups([o1, o2])
+        assert group.ref is o2
+    
+    def test_tie_breaker_copy(self):
+        # if copy is in the words used (even if it has a deeper path), it becomes a dupe
+        s = Scanner()
+        o1, o2 = no('foo bar Copy'), no('foo bar')
+        o1.path = Path('deeper/path')
+        o2.path = Path('foo')
+        [group] = s.GetDupeGroups([o1, o2])
+        assert group.ref is o2
+    
+    def test_tie_breaker_same_name_plus_digit(self):
+        # if ref has the same words as dupe, but has some just one extra word which is a digit, it
+        # becomes a dupe
+        s = Scanner()
+        o1, o2 = no('foo bar 42'), no('foo bar')
+        o1.path = Path('deeper/path')
+        o2.path = Path('foo')
+        [group] = s.GetDupeGroups([o1, o2])
+        assert group.ref is o2
+    
+    def test_partial_group_match(self):
+        # Count the number od discarded matches (when a file doesn't match all other dupes of the 
+        # group) in Scanner.discarded_file_count
+        s = Scanner()
+        o1, o2, o3 = no('a b'), no('a'), no('b')
+        s.min_match_percentage = 50
+        [group] = s.GetDupeGroups([o1, o2, o3])
+        eq_(len(group), 2)
+        assert o1 in group
+        assert o2 in group
+        assert o3 not in group
+        eq_(s.discarded_file_count, 1)
+    
+
+class ScannerTest(TestCase):
+    def test_dont_group_files_that_dont_exist(self):
+        # when creating groups, check that files exist first. It's possible that these files have
+        # been moved during the scan by the user.
+        # In this test, we have to delete one of the files between the get_matches() part and the
+        # get_groups() part.
+        s = Scanner()
+        s.scan_type = SCAN_TYPE_CONTENT
+        p = self.tmppath()
+        io.open(p + 'file1', 'w').write('foo')
+        io.open(p + 'file2', 'w').write('foo')
+        file1, file2 = fs.get_files(p)
+        def getmatches(*args, **kw):
+            io.remove(file2.path)
+            return [Match(file1, file2, 100)]
+        s._getmatches = getmatches
+        
+        assert not s.GetDupeGroups([file1, file2])
+