mirror of
https://github.com/arsenetar/dupeguru.git
synced 2026-01-25 16:11:39 +00:00
Compare commits
22 Commits
me5.6.6
...
before-tig
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
911521d8e0 | ||
|
|
b25c1c3a3b | ||
|
|
37a40040b3 | ||
|
|
25dadc83eb | ||
|
|
b8c11b5aae | ||
|
|
a3ab314378 | ||
|
|
794192835d | ||
|
|
385768a69b | ||
|
|
a281931b16 | ||
|
|
085311d559 | ||
|
|
4d7f032889 | ||
|
|
cf44c93013 | ||
|
|
787cbcd01f | ||
|
|
b2b316b642 | ||
|
|
49165125e4 | ||
|
|
54ac0fd19e | ||
|
|
0aff7f16e5 | ||
|
|
f9abc3b35d | ||
|
|
b167a51243 | ||
|
|
371cdda911 | ||
|
|
11977c6533 | ||
|
|
7228adf433 |
@@ -14,13 +14,13 @@ import os
|
|||||||
import os.path as op
|
import os.path as op
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from hsutil import job, io, files
|
from hsutil import io, files
|
||||||
from hsutil.path import Path
|
from hsutil.path import Path
|
||||||
from hsutil.reg import RegistrableApplication, RegistrationRequired
|
from hsutil.reg import RegistrableApplication, RegistrationRequired
|
||||||
from hsutil.misc import flatten, first
|
from hsutil.misc import flatten, first
|
||||||
from hsutil.str import escape
|
from hsutil.str import escape
|
||||||
|
|
||||||
from . import directories, results, scanner, export
|
from . import directories, results, scanner, export, fs
|
||||||
|
|
||||||
JOB_SCAN = 'job_scan'
|
JOB_SCAN = 'job_scan'
|
||||||
JOB_LOAD = 'job_load'
|
JOB_LOAD = 'job_load'
|
||||||
@@ -98,13 +98,8 @@ class DupeGuru(RegistrableApplication):
|
|||||||
return ['---'] * len(self.data.COLUMNS)
|
return ['---'] * len(self.data.COLUMNS)
|
||||||
|
|
||||||
def _get_file(self, str_path):
|
def _get_file(self, str_path):
|
||||||
p = Path(str_path)
|
path = Path(str_path)
|
||||||
for d in self.directories:
|
return fs.get_file(path, self.directories.fileclasses)
|
||||||
if p not in d.path:
|
|
||||||
continue
|
|
||||||
result = d.find_path(p[d.path:])
|
|
||||||
if result is not None:
|
|
||||||
return result
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _recycle_dupe(dupe):
|
def _recycle_dupe(dupe):
|
||||||
@@ -150,7 +145,7 @@ class DupeGuru(RegistrableApplication):
|
|||||||
2 = absolute re-creation.
|
2 = absolute re-creation.
|
||||||
"""
|
"""
|
||||||
source_path = dupe.path
|
source_path = dupe.path
|
||||||
location_path = dupe.root.path
|
location_path = first(p for p in self.directories if dupe.path in p)
|
||||||
dest_path = Path(destination)
|
dest_path = Path(destination)
|
||||||
if dest_type == 2:
|
if dest_type == 2:
|
||||||
dest_path = dest_path + source_path[1:-1] #Remove drive letter and filename
|
dest_path = dest_path + source_path[1:-1] #Remove drive letter and filename
|
||||||
|
|||||||
@@ -12,13 +12,12 @@ from AppKit import *
|
|||||||
import logging
|
import logging
|
||||||
import os.path as op
|
import os.path as op
|
||||||
|
|
||||||
import hsfs as fs
|
|
||||||
from hsutil import io, cocoa, job
|
from hsutil import io, cocoa, job
|
||||||
from hsutil.cocoa import install_exception_hook
|
from hsutil.cocoa import install_exception_hook
|
||||||
from hsutil.misc import stripnone
|
from hsutil.misc import stripnone
|
||||||
from hsutil.reg import RegistrationRequired
|
from hsutil.reg import RegistrationRequired
|
||||||
|
|
||||||
import app, data
|
from . import app, fs
|
||||||
|
|
||||||
JOBID2TITLE = {
|
JOBID2TITLE = {
|
||||||
app.JOB_SCAN: "Scanning for duplicates",
|
app.JOB_SCAN: "Scanning for duplicates",
|
||||||
@@ -43,8 +42,6 @@ class DupeGuru(app.DupeGuru):
|
|||||||
logging.basicConfig(level=LOGGING_LEVEL, format='%(levelname)s %(message)s')
|
logging.basicConfig(level=LOGGING_LEVEL, format='%(levelname)s %(message)s')
|
||||||
logging.debug('started in debug mode')
|
logging.debug('started in debug mode')
|
||||||
install_exception_hook()
|
install_exception_hook()
|
||||||
if data_module is None:
|
|
||||||
data_module = data
|
|
||||||
appsupport = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDomainMask, True)[0]
|
appsupport = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDomainMask, True)[0]
|
||||||
appdata = op.join(appsupport, appdata_subdir)
|
appdata = op.join(appsupport, appdata_subdir)
|
||||||
app.DupeGuru.__init__(self, data_module, appdata, appid)
|
app.DupeGuru.__init__(self, data_module, appdata, appid)
|
||||||
@@ -91,15 +88,15 @@ class DupeGuru(app.DupeGuru):
|
|||||||
except IndexError:
|
except IndexError:
|
||||||
return (None,None)
|
return (None,None)
|
||||||
|
|
||||||
def GetDirectory(self,node_path,curr_dir=None):
|
def get_folder_path(self, node_path, curr_path=None):
|
||||||
if not node_path:
|
if not node_path:
|
||||||
return curr_dir
|
return curr_path
|
||||||
if curr_dir is not None:
|
current_index = node_path[0]
|
||||||
l = curr_dir.dirs
|
if curr_path is None:
|
||||||
|
curr_path = self.directories[current_index]
|
||||||
else:
|
else:
|
||||||
l = self.directories
|
curr_path = self.directories.get_subfolders(curr_path)[current_index]
|
||||||
d = l[node_path[0]]
|
return self.get_folder_path(node_path[1:], curr_path)
|
||||||
return self.GetDirectory(node_path[1:],d)
|
|
||||||
|
|
||||||
def RefreshDetailsTable(self,dupe,group):
|
def RefreshDetailsTable(self,dupe,group):
|
||||||
l1 = self._get_display_info(dupe, group, False)
|
l1 = self._get_display_info(dupe, group, False)
|
||||||
@@ -146,13 +143,13 @@ class DupeGuru(app.DupeGuru):
|
|||||||
def RemoveSelected(self):
|
def RemoveSelected(self):
|
||||||
self.results.remove_duplicates(self.selected_dupes)
|
self.results.remove_duplicates(self.selected_dupes)
|
||||||
|
|
||||||
def RenameSelected(self,newname):
|
def RenameSelected(self, newname):
|
||||||
try:
|
try:
|
||||||
d = self.selected_dupes[0]
|
d = self.selected_dupes[0]
|
||||||
d = d.move(d.parent,newname)
|
d.rename(newname)
|
||||||
return True
|
return True
|
||||||
except (IndexError,fs.FSError),e:
|
except (IndexError, fs.FSError) as e:
|
||||||
logging.warning("dupeGuru Warning: %s" % str(e))
|
logging.warning("dupeGuru Warning: %s" % unicode(e))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def RevealSelected(self):
|
def RevealSelected(self):
|
||||||
@@ -214,9 +211,9 @@ class DupeGuru(app.DupeGuru):
|
|||||||
self.results.dupes[row] for row in rows if row in xrange(len(self.results.dupes))
|
self.results.dupes[row] for row in rows if row in xrange(len(self.results.dupes))
|
||||||
]
|
]
|
||||||
|
|
||||||
def SetDirectoryState(self,node_path,state):
|
def SetDirectoryState(self, node_path, state):
|
||||||
d = self.GetDirectory(node_path)
|
p = self.get_folder_path(node_path)
|
||||||
self.directories.set_state(d.path,state)
|
self.directories.set_state(p, state)
|
||||||
|
|
||||||
def sort_dupes(self,key,asc):
|
def sort_dupes(self,key,asc):
|
||||||
self.results.sort_dupes(key,asc,self.display_delta_values)
|
self.results.sort_dupes(key,asc,self.display_delta_values)
|
||||||
@@ -245,8 +242,12 @@ class DupeGuru(app.DupeGuru):
|
|||||||
return [len(g.dupes) for g in self.results.groups]
|
return [len(g.dupes) for g in self.results.groups]
|
||||||
elif tag == 1: #Directories
|
elif tag == 1: #Directories
|
||||||
try:
|
try:
|
||||||
dirs = self.GetDirectory(node_path).dirs if node_path else self.directories
|
if node_path:
|
||||||
return [d.dircount for d in dirs]
|
path = self.get_folder_path(node_path)
|
||||||
|
subfolders = self.directories.get_subfolders(path)
|
||||||
|
else:
|
||||||
|
subfolders = self.directories
|
||||||
|
return [len(self.directories.get_subfolders(path)) for path in subfolders]
|
||||||
except IndexError: # node_path out of range
|
except IndexError: # node_path out of range
|
||||||
return []
|
return []
|
||||||
else: #Power Marker
|
else: #Power Marker
|
||||||
@@ -270,8 +271,9 @@ class DupeGuru(app.DupeGuru):
|
|||||||
return result
|
return result
|
||||||
elif tag == 1: #Directories
|
elif tag == 1: #Directories
|
||||||
try:
|
try:
|
||||||
d = self.GetDirectory(node_path)
|
path = self.get_folder_path(node_path)
|
||||||
return [d.name, self.directories.get_state(d.path)]
|
name = unicode(path) if len(node_path) == 1 else path[-1]
|
||||||
|
return [name, self.directories.get_state(path)]
|
||||||
except IndexError: # node_path out of range
|
except IndexError: # node_path out of range
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|||||||
@@ -40,63 +40,3 @@ def format_dupe_count(c):
|
|||||||
|
|
||||||
def cmp_value(value):
|
def cmp_value(value):
|
||||||
return value.lower() if isinstance(value, basestring) else value
|
return value.lower() if isinstance(value, basestring) else value
|
||||||
|
|
||||||
COLUMNS = [
|
|
||||||
{'attr':'name','display':'Filename'},
|
|
||||||
{'attr':'path','display':'Directory'},
|
|
||||||
{'attr':'size','display':'Size (KB)'},
|
|
||||||
{'attr':'extension','display':'Kind'},
|
|
||||||
{'attr':'ctime','display':'Creation'},
|
|
||||||
{'attr':'mtime','display':'Modification'},
|
|
||||||
{'attr':'percentage','display':'Match %'},
|
|
||||||
{'attr':'words','display':'Words Used'},
|
|
||||||
{'attr':'dupe_count','display':'Dupe Count'},
|
|
||||||
]
|
|
||||||
|
|
||||||
METADATA_TO_READ = ['size', 'ctime', 'mtime']
|
|
||||||
|
|
||||||
def GetDisplayInfo(dupe, group, delta):
|
|
||||||
size = dupe.size
|
|
||||||
ctime = dupe.ctime
|
|
||||||
mtime = dupe.mtime
|
|
||||||
m = group.get_match_of(dupe)
|
|
||||||
if m:
|
|
||||||
percentage = m.percentage
|
|
||||||
dupe_count = 0
|
|
||||||
if delta:
|
|
||||||
r = group.ref
|
|
||||||
size -= r.size
|
|
||||||
ctime -= r.ctime
|
|
||||||
mtime -= r.mtime
|
|
||||||
else:
|
|
||||||
percentage = group.percentage
|
|
||||||
dupe_count = len(group.dupes)
|
|
||||||
return [
|
|
||||||
dupe.name,
|
|
||||||
format_path(dupe.path),
|
|
||||||
format_size(size, 0, 1, False),
|
|
||||||
dupe.extension,
|
|
||||||
format_timestamp(ctime, delta and m),
|
|
||||||
format_timestamp(mtime, delta and m),
|
|
||||||
format_perc(percentage),
|
|
||||||
format_words(dupe.words),
|
|
||||||
format_dupe_count(dupe_count)
|
|
||||||
]
|
|
||||||
|
|
||||||
def GetDupeSortKey(dupe, get_group, key, delta):
|
|
||||||
if key == 6:
|
|
||||||
m = get_group().get_match_of(dupe)
|
|
||||||
return m.percentage
|
|
||||||
if key == 8:
|
|
||||||
return 0
|
|
||||||
r = cmp_value(getattr(dupe, COLUMNS[key]['attr']))
|
|
||||||
if delta and (key in (2, 4, 5)):
|
|
||||||
r -= cmp_value(getattr(get_group().ref, COLUMNS[key]['attr']))
|
|
||||||
return r
|
|
||||||
|
|
||||||
def GetGroupSortKey(group, key):
|
|
||||||
if key == 6:
|
|
||||||
return group.percentage
|
|
||||||
if key == 8:
|
|
||||||
return len(group)
|
|
||||||
return cmp_value(getattr(group.ref, COLUMNS[key]['attr']))
|
|
||||||
|
|||||||
@@ -9,11 +9,12 @@
|
|||||||
|
|
||||||
import xml.dom.minidom
|
import xml.dom.minidom
|
||||||
|
|
||||||
from hsfs import phys
|
from hsutil import io
|
||||||
import hsfs as fs
|
|
||||||
from hsutil.files import FileOrPath
|
from hsutil.files import FileOrPath
|
||||||
from hsutil.path import Path
|
from hsutil.path import Path
|
||||||
|
|
||||||
|
from . import fs
|
||||||
|
|
||||||
(STATE_NORMAL,
|
(STATE_NORMAL,
|
||||||
STATE_REFERENCE,
|
STATE_REFERENCE,
|
||||||
STATE_EXCLUDED) = range(3)
|
STATE_EXCLUDED) = range(3)
|
||||||
@@ -26,15 +27,14 @@ class InvalidPathError(Exception):
|
|||||||
|
|
||||||
class Directories(object):
|
class Directories(object):
|
||||||
#---Override
|
#---Override
|
||||||
def __init__(self):
|
def __init__(self, fileclasses=[fs.File]):
|
||||||
self._dirs = []
|
self._dirs = []
|
||||||
self.states = {}
|
self.states = {}
|
||||||
self.dirclass = phys.Directory
|
self.fileclasses = fileclasses
|
||||||
self.special_dirclasses = {}
|
|
||||||
|
|
||||||
def __contains__(self,path):
|
def __contains__(self, path):
|
||||||
for d in self._dirs:
|
for p in self._dirs:
|
||||||
if path in d.path:
|
if path in p:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -53,8 +53,7 @@ class Directories(object):
|
|||||||
if path[-1].startswith('.'): # hidden
|
if path[-1].startswith('.'): # hidden
|
||||||
return STATE_EXCLUDED
|
return STATE_EXCLUDED
|
||||||
|
|
||||||
def _get_files(self, from_dir):
|
def _get_files(self, from_path):
|
||||||
from_path = from_dir.path
|
|
||||||
state = self.get_state(from_path)
|
state = self.get_state(from_path)
|
||||||
if state == STATE_EXCLUDED:
|
if state == STATE_EXCLUDED:
|
||||||
# Recursively get files from folders with lots of subfolder is expensive. However, there
|
# Recursively get files from folders with lots of subfolder is expensive. However, there
|
||||||
@@ -62,14 +61,21 @@ class Directories(object):
|
|||||||
# through self.states and see if we must continue, or we can stop right here to save time
|
# through self.states and see if we must continue, or we can stop right here to save time
|
||||||
if not any(p[:len(from_path)] == from_path for p in self.states):
|
if not any(p[:len(from_path)] == from_path for p in self.states):
|
||||||
return
|
return
|
||||||
result = []
|
try:
|
||||||
for subdir in from_dir.dirs:
|
filepaths = set()
|
||||||
for file in self._get_files(subdir):
|
if state != STATE_EXCLUDED:
|
||||||
yield file
|
for file in fs.get_files(from_path, fileclasses=self.fileclasses):
|
||||||
if state != STATE_EXCLUDED:
|
file.is_ref = state == STATE_REFERENCE
|
||||||
for file in from_dir.files:
|
filepaths.add(file.path)
|
||||||
file.is_ref = state == STATE_REFERENCE
|
yield file
|
||||||
yield file
|
subpaths = [from_path + name for name in io.listdir(from_path)]
|
||||||
|
# it's possible that a folder (bundle) gets into the file list. in that case, we don't want to recurse into it
|
||||||
|
subfolders = [p for p in subpaths if not io.islink(p) and io.isdir(p) and p not in filepaths]
|
||||||
|
for subfolder in subfolders:
|
||||||
|
for file in self._get_files(subfolder):
|
||||||
|
yield file
|
||||||
|
except (EnvironmentError, fs.InvalidPath):
|
||||||
|
pass
|
||||||
|
|
||||||
#---Public
|
#---Public
|
||||||
def add_path(self, path):
|
def add_path(self, path):
|
||||||
@@ -80,29 +86,30 @@ class Directories(object):
|
|||||||
under it will be removed. Can also raise InvalidPathError if 'path' does not exist.
|
under it will be removed. Can also raise InvalidPathError if 'path' does not exist.
|
||||||
"""
|
"""
|
||||||
if path in self:
|
if path in self:
|
||||||
raise AlreadyThereError
|
raise AlreadyThereError()
|
||||||
self._dirs = [d for d in self._dirs if d.path not in path]
|
if not io.exists(path):
|
||||||
try:
|
|
||||||
dirclass = self.special_dirclasses.get(path, self.dirclass)
|
|
||||||
d = dirclass(None, unicode(path))
|
|
||||||
d[:] #If an InvalidPath exception has to be raised, it will be raised here
|
|
||||||
self._dirs.append(d)
|
|
||||||
return d
|
|
||||||
except fs.InvalidPath:
|
|
||||||
raise InvalidPathError()
|
raise InvalidPathError()
|
||||||
|
self._dirs = [p for p in self._dirs if p not in path]
|
||||||
|
self._dirs.append(path)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_subfolders(path):
|
||||||
|
"""returns a sorted list of paths corresponding to subfolders in `path`"""
|
||||||
|
try:
|
||||||
|
names = [name for name in io.listdir(path) if io.isdir(path + name)]
|
||||||
|
names.sort(key=lambda x:x.lower())
|
||||||
|
return [path + name for name in names]
|
||||||
|
except EnvironmentError:
|
||||||
|
return []
|
||||||
|
|
||||||
def get_files(self):
|
def get_files(self):
|
||||||
"""Returns a list of all files that are not excluded.
|
"""Returns a list of all files that are not excluded.
|
||||||
|
|
||||||
Returned files also have their 'is_ref' attr set.
|
Returned files also have their 'is_ref' attr set.
|
||||||
"""
|
"""
|
||||||
for d in self._dirs:
|
for path in self._dirs:
|
||||||
d.force_update()
|
for file in self._get_files(path):
|
||||||
try:
|
yield file
|
||||||
for file in self._get_files(d):
|
|
||||||
yield file
|
|
||||||
except fs.InvalidPath:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def get_state(self, path):
|
def get_state(self, path):
|
||||||
"""Returns the state of 'path' (One of the STATE_* const.)
|
"""Returns the state of 'path' (One of the STATE_* const.)
|
||||||
@@ -123,8 +130,8 @@ class Directories(object):
|
|||||||
doc = xml.dom.minidom.parse(infile)
|
doc = xml.dom.minidom.parse(infile)
|
||||||
except:
|
except:
|
||||||
return
|
return
|
||||||
root_dir_nodes = doc.getElementsByTagName('root_directory')
|
root_path_nodes = doc.getElementsByTagName('root_directory')
|
||||||
for rdn in root_dir_nodes:
|
for rdn in root_path_nodes:
|
||||||
if not rdn.getAttributeNode('path'):
|
if not rdn.getAttributeNode('path'):
|
||||||
continue
|
continue
|
||||||
path = rdn.getAttributeNode('path').nodeValue
|
path = rdn.getAttributeNode('path').nodeValue
|
||||||
@@ -144,9 +151,9 @@ class Directories(object):
|
|||||||
with FileOrPath(outfile, 'wb') as fp:
|
with FileOrPath(outfile, 'wb') as fp:
|
||||||
doc = xml.dom.minidom.Document()
|
doc = xml.dom.minidom.Document()
|
||||||
root = doc.appendChild(doc.createElement('directories'))
|
root = doc.appendChild(doc.createElement('directories'))
|
||||||
for root_dir in self:
|
for root_path in self:
|
||||||
root_dir_node = root.appendChild(doc.createElement('root_directory'))
|
root_path_node = root.appendChild(doc.createElement('root_directory'))
|
||||||
root_dir_node.setAttribute('path', unicode(root_dir.path).encode('utf-8'))
|
root_path_node.setAttribute('path', unicode(root_path).encode('utf-8'))
|
||||||
for path, state in self.states.iteritems():
|
for path, state in self.states.iteritems():
|
||||||
state_node = root.appendChild(doc.createElement('state'))
|
state_node = root.appendChild(doc.createElement('state'))
|
||||||
state_node.setAttribute('path', unicode(path).encode('utf-8'))
|
state_node.setAttribute('path', unicode(path).encode('utf-8'))
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
|
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
import difflib
|
import difflib
|
||||||
|
import itertools
|
||||||
import logging
|
import logging
|
||||||
import string
|
import string
|
||||||
from collections import defaultdict, namedtuple
|
from collections import defaultdict, namedtuple
|
||||||
@@ -156,58 +157,69 @@ def get_match(first, second, flags=()):
|
|||||||
percentage = compare(first.words, second.words, flags)
|
percentage = compare(first.words, second.words, flags)
|
||||||
return Match(first, second, percentage)
|
return Match(first, second, percentage)
|
||||||
|
|
||||||
class MatchFactory(object):
|
def getmatches(objects, min_match_percentage=0, match_similar_words=False, weight_words=False,
|
||||||
common_word_threshold = 50
|
no_field_order=False, j=job.nulljob):
|
||||||
match_similar_words = False
|
COMMON_WORD_THRESHOLD = 50
|
||||||
min_match_percentage = 0
|
LIMIT = 5000000
|
||||||
weight_words = False
|
j = j.start_subjob(2)
|
||||||
no_field_order = False
|
sj = j.start_subjob(2)
|
||||||
limit = 5000000
|
for o in objects:
|
||||||
|
if not hasattr(o, 'words'):
|
||||||
def getmatches(self, objects, j=job.nulljob):
|
o.words = getwords(o.name)
|
||||||
j = j.start_subjob(2)
|
word_dict = build_word_dict(objects, sj)
|
||||||
sj = j.start_subjob(2)
|
reduce_common_words(word_dict, COMMON_WORD_THRESHOLD)
|
||||||
for o in objects:
|
if match_similar_words:
|
||||||
if not hasattr(o, 'words'):
|
merge_similar_words(word_dict)
|
||||||
o.words = getwords(o.name)
|
match_flags = []
|
||||||
word_dict = build_word_dict(objects, sj)
|
if weight_words:
|
||||||
reduce_common_words(word_dict, self.common_word_threshold)
|
match_flags.append(WEIGHT_WORDS)
|
||||||
if self.match_similar_words:
|
if match_similar_words:
|
||||||
merge_similar_words(word_dict)
|
match_flags.append(MATCH_SIMILAR_WORDS)
|
||||||
match_flags = []
|
if no_field_order:
|
||||||
if self.weight_words:
|
match_flags.append(NO_FIELD_ORDER)
|
||||||
match_flags.append(WEIGHT_WORDS)
|
j.start_job(len(word_dict), '0 matches found')
|
||||||
if self.match_similar_words:
|
compared = defaultdict(set)
|
||||||
match_flags.append(MATCH_SIMILAR_WORDS)
|
result = []
|
||||||
if self.no_field_order:
|
try:
|
||||||
match_flags.append(NO_FIELD_ORDER)
|
# This whole 'popping' thing is there to avoid taking too much memory at the same time.
|
||||||
j.start_job(len(word_dict), '0 matches found')
|
while word_dict:
|
||||||
compared = defaultdict(set)
|
items = word_dict.popitem()[1]
|
||||||
result = []
|
while items:
|
||||||
try:
|
ref = items.pop()
|
||||||
# This whole 'popping' thing is there to avoid taking too much memory at the same time.
|
compared_already = compared[ref]
|
||||||
while word_dict:
|
to_compare = items - compared_already
|
||||||
items = word_dict.popitem()[1]
|
compared_already |= to_compare
|
||||||
while items:
|
for other in to_compare:
|
||||||
ref = items.pop()
|
m = get_match(ref, other, match_flags)
|
||||||
compared_already = compared[ref]
|
if m.percentage >= min_match_percentage:
|
||||||
to_compare = items - compared_already
|
result.append(m)
|
||||||
compared_already |= to_compare
|
if len(result) >= LIMIT:
|
||||||
for other in to_compare:
|
return result
|
||||||
m = get_match(ref, other, match_flags)
|
j.add_progress(desc='%d matches found' % len(result))
|
||||||
if m.percentage >= self.min_match_percentage:
|
except MemoryError:
|
||||||
result.append(m)
|
# This is the place where the memory usage is at its peak during the scan.
|
||||||
if len(result) >= self.limit:
|
# Just continue the process with an incomplete list of matches.
|
||||||
return result
|
del compared # This should give us enough room to call logging.
|
||||||
j.add_progress(desc='%d matches found' % len(result))
|
logging.warning('Memory Overflow. Matches: %d. Word dict: %d' % (len(result), len(word_dict)))
|
||||||
except MemoryError:
|
|
||||||
# This is the place where the memory usage is at its peak during the scan.
|
|
||||||
# Just continue the process with an incomplete list of matches.
|
|
||||||
del compared # This should give us enough room to call logging.
|
|
||||||
logging.warning('Memory Overflow. Matches: %d. Word dict: %d' % (len(result), len(word_dict)))
|
|
||||||
return result
|
|
||||||
return result
|
return result
|
||||||
|
return result
|
||||||
|
|
||||||
|
def getmatches_by_contents(files, sizeattr='size', partial=False, j=job.nulljob):
|
||||||
|
j = j.start_subjob([2, 8])
|
||||||
|
size2files = defaultdict(set)
|
||||||
|
for file in j.iter_with_progress(files, 'Read size of %d/%d files'):
|
||||||
|
size2files[getattr(file, sizeattr)].add(file)
|
||||||
|
possible_matches = [files for files in size2files.values() if len(files) > 1]
|
||||||
|
del size2files
|
||||||
|
result = []
|
||||||
|
j.start_job(len(possible_matches), '0 matches found')
|
||||||
|
for group in possible_matches:
|
||||||
|
for first, second in itertools.combinations(group, 2):
|
||||||
|
if first.md5partial == second.md5partial:
|
||||||
|
if partial or first.md5 == second.md5:
|
||||||
|
result.append(Match(first, second, 100))
|
||||||
|
j.add_progress(desc='%d matches found' % len(result))
|
||||||
|
return result
|
||||||
|
|
||||||
class Group(object):
|
class Group(object):
|
||||||
#---Override
|
#---Override
|
||||||
|
|||||||
178
base/py/fs.py
Normal file
178
base/py/fs.py
Normal file
@@ -0,0 +1,178 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Created By: Virgil Dupras
|
||||||
|
# Created On: 2009-10-22
|
||||||
|
# $Id$
|
||||||
|
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
|
||||||
|
#
|
||||||
|
# This software is licensed under the "HS" License as described in the "LICENSE" file,
|
||||||
|
# which should be included with this package. The terms are also available at
|
||||||
|
# http://www.hardcoded.net/licenses/hs_license
|
||||||
|
|
||||||
|
# This is a fork from hsfs. The reason for this fork is that hsfs has been designed for musicGuru
|
||||||
|
# and was re-used for dupeGuru. The problem is that hsfs is way over-engineered for dupeGuru,
|
||||||
|
# resulting needless complexity and memory usage. It's been a while since I wanted to do that fork,
|
||||||
|
# and I'm doing it now.
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from hsutil import io
|
||||||
|
from hsutil.misc import nonone, flatten
|
||||||
|
from hsutil.str import get_file_ext
|
||||||
|
|
||||||
|
class FSError(Exception):
|
||||||
|
cls_message = "An error has occured on '{name}' in '{parent}'"
|
||||||
|
def __init__(self, fsobject, parent=None):
|
||||||
|
message = self.cls_message
|
||||||
|
if isinstance(fsobject, basestring):
|
||||||
|
name = fsobject
|
||||||
|
elif isinstance(fsobject, File):
|
||||||
|
name = fsobject.name
|
||||||
|
else:
|
||||||
|
name = ''
|
||||||
|
parentname = unicode(parent) if parent is not None else ''
|
||||||
|
Exception.__init__(self, message.format(name=name, parent=parentname))
|
||||||
|
|
||||||
|
|
||||||
|
class AlreadyExistsError(FSError):
|
||||||
|
"The directory or file name we're trying to add already exists"
|
||||||
|
cls_message = "'{name}' already exists in '{parent}'"
|
||||||
|
|
||||||
|
class InvalidPath(FSError):
|
||||||
|
"The path of self is invalid, and cannot be worked with."
|
||||||
|
cls_message = "'{name}' is invalid."
|
||||||
|
|
||||||
|
class InvalidDestinationError(FSError):
|
||||||
|
"""A copy/move operation has been called, but the destination is invalid."""
|
||||||
|
cls_message = "'{name}' is an invalid destination for this operation."
|
||||||
|
|
||||||
|
class OperationError(FSError):
|
||||||
|
"""A copy/move/delete operation has been called, but the checkup after the
|
||||||
|
operation shows that it didn't work."""
|
||||||
|
cls_message = "Operation on '{name}' failed."
|
||||||
|
|
||||||
|
class File(object):
|
||||||
|
INITIAL_INFO = {
|
||||||
|
'size': 0,
|
||||||
|
'ctime': 0,
|
||||||
|
'mtime': 0,
|
||||||
|
'md5': '',
|
||||||
|
'md5partial': '',
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, path):
|
||||||
|
self.path = path
|
||||||
|
#This offset is where we should start reading the file to get a partial md5
|
||||||
|
#For audio file, it should be where audio data starts
|
||||||
|
self._md5partial_offset = 0x4000 #16Kb
|
||||||
|
self._md5partial_size = 0x4000 #16Kb
|
||||||
|
|
||||||
|
def __getattr__(self, attrname):
|
||||||
|
# Only called when attr is not there
|
||||||
|
if attrname in self.INITIAL_INFO:
|
||||||
|
try:
|
||||||
|
self._read_info(attrname)
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning("An error '%s' was raised while decoding '%s'", e, repr(self.path))
|
||||||
|
try:
|
||||||
|
return self.__dict__[attrname]
|
||||||
|
except KeyError:
|
||||||
|
return self.INITIAL_INFO[attrname]
|
||||||
|
raise AttributeError()
|
||||||
|
|
||||||
|
def _read_info(self, field):
|
||||||
|
if field in ('size', 'ctime', 'mtime'):
|
||||||
|
stats = io.stat(self.path)
|
||||||
|
self.size = nonone(stats.st_size, 0)
|
||||||
|
self.ctime = nonone(stats.st_ctime, 0)
|
||||||
|
self.mtime = nonone(stats.st_mtime, 0)
|
||||||
|
elif field == 'md5partial':
|
||||||
|
try:
|
||||||
|
fp = io.open(self.path, 'rb')
|
||||||
|
offset = self._md5partial_offset
|
||||||
|
size = self._md5partial_size
|
||||||
|
fp.seek(offset)
|
||||||
|
partialdata = fp.read(size)
|
||||||
|
md5 = hashlib.md5(partialdata)
|
||||||
|
self.md5partial = md5.digest()
|
||||||
|
fp.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
elif field == 'md5':
|
||||||
|
try:
|
||||||
|
fp = io.open(self.path, 'rb')
|
||||||
|
filedata = fp.read()
|
||||||
|
md5 = hashlib.md5(filedata)
|
||||||
|
self.md5 = md5.digest()
|
||||||
|
fp.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _read_all_info(self, attrnames=None):
|
||||||
|
"""Cache all possible info.
|
||||||
|
|
||||||
|
If `attrnames` is not None, caches only attrnames.
|
||||||
|
"""
|
||||||
|
if attrnames is None:
|
||||||
|
attrnames = self.INITIAL_INFO.keys()
|
||||||
|
for attrname in attrnames:
|
||||||
|
if attrname not in self.__dict__:
|
||||||
|
self._read_info(attrname)
|
||||||
|
|
||||||
|
#--- Public
|
||||||
|
@classmethod
|
||||||
|
def can_handle(cls, path):
|
||||||
|
return not io.islink(path) and io.isfile(path)
|
||||||
|
|
||||||
|
def rename(self, newname):
|
||||||
|
if newname == self.name:
|
||||||
|
return
|
||||||
|
destpath = self.path[:-1] + newname
|
||||||
|
if io.exists(destpath):
|
||||||
|
raise AlreadyExistsError(newname, self.path[:-1])
|
||||||
|
try:
|
||||||
|
io.rename(self.path, destpath)
|
||||||
|
except EnvironmentError:
|
||||||
|
raise OperationError(self)
|
||||||
|
if not io.exists(destpath):
|
||||||
|
raise OperationError(self)
|
||||||
|
self.path = destpath
|
||||||
|
|
||||||
|
#--- Properties
|
||||||
|
@property
|
||||||
|
def extension(self):
|
||||||
|
return get_file_ext(self.name)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self):
|
||||||
|
return self.path[-1]
|
||||||
|
|
||||||
|
|
||||||
|
def get_file(path, fileclasses=[File]):
|
||||||
|
for fileclass in fileclasses:
|
||||||
|
if fileclass.can_handle(path):
|
||||||
|
return fileclass(path)
|
||||||
|
|
||||||
|
def get_files(path, fileclasses=[File]):
|
||||||
|
assert all(issubclass(fileclass, File) for fileclass in fileclasses)
|
||||||
|
try:
|
||||||
|
paths = [path + name for name in io.listdir(path)]
|
||||||
|
result = []
|
||||||
|
for path in paths:
|
||||||
|
file = get_file(path, fileclasses=fileclasses)
|
||||||
|
if file is not None:
|
||||||
|
result.append(file)
|
||||||
|
return result
|
||||||
|
except EnvironmentError:
|
||||||
|
raise InvalidPath(path)
|
||||||
|
|
||||||
|
def get_all_files(path, fileclasses=[File]):
|
||||||
|
files = get_files(path, fileclasses=fileclasses)
|
||||||
|
filepaths = set(f.path for f in files)
|
||||||
|
subpaths = [path + name for name in io.listdir(path)]
|
||||||
|
# it's possible that a folder (bundle) gets into the file list. in that case, we don't want to recurse into it
|
||||||
|
subfolders = [p for p in subpaths if not io.islink(p) and io.isdir(p) and p not in filepaths]
|
||||||
|
subfiles = flatten(get_all_files(subpath, fileclasses=fileclasses) for subpath in subfolders)
|
||||||
|
return subfiles + files
|
||||||
@@ -32,40 +32,32 @@ class Scanner(object):
|
|||||||
self.ignore_list = IgnoreList()
|
self.ignore_list = IgnoreList()
|
||||||
self.discarded_file_count = 0
|
self.discarded_file_count = 0
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _filter_matches_by_content(matches, partial, j):
|
|
||||||
matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
|
|
||||||
md5attrname = 'md5partial' if partial else 'md5'
|
|
||||||
md5 = lambda f: getattr(f, md5attrname)
|
|
||||||
for matched_file in j.iter_with_progress(matched_files, 'Analyzed %d/%d matching files'):
|
|
||||||
md5(matched_file)
|
|
||||||
j.set_progress(100, 'Removing false matches')
|
|
||||||
return [m for m in matches if md5(m.first) == md5(m.second)]
|
|
||||||
|
|
||||||
def _getmatches(self, files, j):
|
def _getmatches(self, files, j):
|
||||||
j = j.start_subjob(2)
|
|
||||||
mf = engine.MatchFactory()
|
|
||||||
if self.scan_type != SCAN_TYPE_CONTENT:
|
|
||||||
mf.match_similar_words = self.match_similar_words
|
|
||||||
mf.weight_words = self.word_weighting
|
|
||||||
mf.min_match_percentage = self.min_match_percentage
|
|
||||||
if self.scan_type == SCAN_TYPE_FIELDS_NO_ORDER:
|
|
||||||
self.scan_type = SCAN_TYPE_FIELDS
|
|
||||||
mf.no_field_order = True
|
|
||||||
func = {
|
|
||||||
SCAN_TYPE_FILENAME: lambda f: engine.getwords(rem_file_ext(f.name)),
|
|
||||||
SCAN_TYPE_FIELDS: lambda f: engine.getfields(rem_file_ext(f.name)),
|
|
||||||
SCAN_TYPE_TAG: lambda f: [engine.getwords(unicode(getattr(f, attrname))) for attrname in SCANNABLE_TAGS if attrname in self.scanned_tags],
|
|
||||||
SCAN_TYPE_CONTENT: lambda f: [str(f.size)],
|
|
||||||
SCAN_TYPE_CONTENT_AUDIO: lambda f: [str(f.audiosize)]
|
|
||||||
}[self.scan_type]
|
|
||||||
for f in j.iter_with_progress(files, 'Read metadata of %d/%d files'):
|
|
||||||
if self.size_threshold:
|
|
||||||
f.size # pre-read, makes a smoother progress if read here (especially for bundles)
|
|
||||||
f.words = func(f)
|
|
||||||
if self.size_threshold:
|
if self.size_threshold:
|
||||||
|
j = j.start_subjob([2, 8])
|
||||||
|
for f in j.iter_with_progress(files, 'Read size of %d/%d files'):
|
||||||
|
f.size # pre-read, makes a smoother progress if read here (especially for bundles)
|
||||||
files = [f for f in files if f.size >= self.size_threshold]
|
files = [f for f in files if f.size >= self.size_threshold]
|
||||||
return mf.getmatches(files, j)
|
if self.scan_type in (SCAN_TYPE_CONTENT, SCAN_TYPE_CONTENT_AUDIO):
|
||||||
|
sizeattr = 'size' if self.scan_type == SCAN_TYPE_CONTENT else 'audiosize'
|
||||||
|
return engine.getmatches_by_contents(files, sizeattr, partial=self.scan_type==SCAN_TYPE_CONTENT_AUDIO, j=j)
|
||||||
|
else:
|
||||||
|
j = j.start_subjob([2, 8])
|
||||||
|
kw = {}
|
||||||
|
kw['match_similar_words'] = self.match_similar_words
|
||||||
|
kw['weight_words'] = self.word_weighting
|
||||||
|
kw['min_match_percentage'] = self.min_match_percentage
|
||||||
|
if self.scan_type == SCAN_TYPE_FIELDS_NO_ORDER:
|
||||||
|
self.scan_type = SCAN_TYPE_FIELDS
|
||||||
|
kw['no_field_order'] = True
|
||||||
|
func = {
|
||||||
|
SCAN_TYPE_FILENAME: lambda f: engine.getwords(rem_file_ext(f.name)),
|
||||||
|
SCAN_TYPE_FIELDS: lambda f: engine.getfields(rem_file_ext(f.name)),
|
||||||
|
SCAN_TYPE_TAG: lambda f: [engine.getwords(unicode(getattr(f, attrname))) for attrname in SCANNABLE_TAGS if attrname in self.scanned_tags],
|
||||||
|
}[self.scan_type]
|
||||||
|
for f in j.iter_with_progress(files, 'Read metadata of %d/%d files'):
|
||||||
|
f.words = func(f)
|
||||||
|
return engine.getmatches(files, j=j, **kw)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _key_func(dupe):
|
def _key_func(dupe):
|
||||||
@@ -86,10 +78,7 @@ class Scanner(object):
|
|||||||
for f in [f for f in files if not hasattr(f, 'is_ref')]:
|
for f in [f for f in files if not hasattr(f, 'is_ref')]:
|
||||||
f.is_ref = False
|
f.is_ref = False
|
||||||
logging.info('Getting matches')
|
logging.info('Getting matches')
|
||||||
if self.match_factory is None:
|
matches = self._getmatches(files, j)
|
||||||
matches = self._getmatches(files, j)
|
|
||||||
else:
|
|
||||||
matches = self.match_factory.getmatches(files, j)
|
|
||||||
logging.info('Found %d matches' % len(matches))
|
logging.info('Found %d matches' % len(matches))
|
||||||
if not self.mix_file_kind:
|
if not self.mix_file_kind:
|
||||||
j.set_progress(100, 'Removing false matches')
|
j.set_progress(100, 'Removing false matches')
|
||||||
@@ -99,14 +88,6 @@ class Scanner(object):
|
|||||||
iter_matches = j.iter_with_progress(matches, 'Processed %d/%d matches against the ignore list')
|
iter_matches = j.iter_with_progress(matches, 'Processed %d/%d matches against the ignore list')
|
||||||
matches = [m for m in iter_matches
|
matches = [m for m in iter_matches
|
||||||
if not self.ignore_list.AreIgnored(unicode(m.first.path), unicode(m.second.path))]
|
if not self.ignore_list.AreIgnored(unicode(m.first.path), unicode(m.second.path))]
|
||||||
if self.scan_type in (SCAN_TYPE_CONTENT, SCAN_TYPE_CONTENT_AUDIO):
|
|
||||||
j = j.start_subjob(3 if self.scan_type == SCAN_TYPE_CONTENT else 2)
|
|
||||||
matches = self._filter_matches_by_content(matches, partial=True, j=j)
|
|
||||||
if self.scan_type == SCAN_TYPE_CONTENT:
|
|
||||||
matches = self._filter_matches_by_content(matches, partial=False, j=j)
|
|
||||||
# We compared md5. No words were involved.
|
|
||||||
for m in matches:
|
|
||||||
m.first.words = m.second.words = ['--']
|
|
||||||
logging.info('Grouping matches')
|
logging.info('Grouping matches')
|
||||||
groups = engine.get_groups(matches, j)
|
groups = engine.get_groups(matches, j)
|
||||||
matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
|
matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
|
||||||
@@ -118,7 +99,6 @@ class Scanner(object):
|
|||||||
g.prioritize(self._key_func, self._tie_breaker)
|
g.prioritize(self._key_func, self._tie_breaker)
|
||||||
return groups
|
return groups
|
||||||
|
|
||||||
match_factory = None
|
|
||||||
match_similar_words = False
|
match_similar_words = False
|
||||||
min_match_percentage = 80
|
min_match_percentage = 80
|
||||||
mix_file_kind = True
|
mix_file_kind = True
|
||||||
@@ -126,9 +106,3 @@ class Scanner(object):
|
|||||||
scanned_tags = set(['artist', 'title'])
|
scanned_tags = set(['artist', 'title'])
|
||||||
size_threshold = 0
|
size_threshold = 0
|
||||||
word_weighting = False
|
word_weighting = False
|
||||||
|
|
||||||
class ScannerME(Scanner): # Scanner for Music Edition
|
|
||||||
@staticmethod
|
|
||||||
def _key_func(dupe):
|
|
||||||
return (not dupe.is_ref, -dupe.bitrate, -dupe.size)
|
|
||||||
|
|
||||||
|
|||||||
@@ -18,10 +18,10 @@ from hsutil.path import Path
|
|||||||
from hsutil.testcase import TestCase
|
from hsutil.testcase import TestCase
|
||||||
from hsutil.decorators import log_calls
|
from hsutil.decorators import log_calls
|
||||||
from hsutil import io
|
from hsutil import io
|
||||||
import hsfs.phys
|
|
||||||
|
|
||||||
|
from . import data
|
||||||
from .results_test import GetTestGroups
|
from .results_test import GetTestGroups
|
||||||
from .. import engine, data
|
from .. import engine, fs
|
||||||
try:
|
try:
|
||||||
from ..app_cocoa import DupeGuru as DupeGuruBase
|
from ..app_cocoa import DupeGuru as DupeGuruBase
|
||||||
except ImportError:
|
except ImportError:
|
||||||
@@ -35,7 +35,6 @@ class DupeGuru(DupeGuruBase):
|
|||||||
def _start_job(self, jobid, func):
|
def _start_job(self, jobid, func):
|
||||||
func(nulljob)
|
func(nulljob)
|
||||||
|
|
||||||
|
|
||||||
def r2np(rows):
|
def r2np(rows):
|
||||||
#Transforms a list of rows [1,2,3] into a list of node paths [[1],[2],[3]]
|
#Transforms a list of rows [1,2,3] into a list of node paths [[1],[2],[3]]
|
||||||
return [[i] for i in rows]
|
return [[i] for i in rows]
|
||||||
@@ -310,15 +309,15 @@ class TCDupeGuru(TestCase):
|
|||||||
|
|
||||||
class TCDupeGuru_renameSelected(TestCase):
|
class TCDupeGuru_renameSelected(TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
p = Path(tempfile.mkdtemp())
|
p = self.tmppath()
|
||||||
fp = open(str(p + 'foo bar 1'),mode='w')
|
fp = open(unicode(p + 'foo bar 1'),mode='w')
|
||||||
fp.close()
|
fp.close()
|
||||||
fp = open(str(p + 'foo bar 2'),mode='w')
|
fp = open(unicode(p + 'foo bar 2'),mode='w')
|
||||||
fp.close()
|
fp.close()
|
||||||
fp = open(str(p + 'foo bar 3'),mode='w')
|
fp = open(unicode(p + 'foo bar 3'),mode='w')
|
||||||
fp.close()
|
fp.close()
|
||||||
refdir = hsfs.phys.Directory(None,str(p))
|
files = fs.get_files(p)
|
||||||
matches = engine.MatchFactory().getmatches(refdir.files)
|
matches = engine.getmatches(files)
|
||||||
groups = engine.get_groups(matches)
|
groups = engine.get_groups(matches)
|
||||||
g = groups[0]
|
g = groups[0]
|
||||||
g.prioritize(lambda x:x.name)
|
g.prioritize(lambda x:x.name)
|
||||||
@@ -327,45 +326,41 @@ class TCDupeGuru_renameSelected(TestCase):
|
|||||||
self.app = app
|
self.app = app
|
||||||
self.groups = groups
|
self.groups = groups
|
||||||
self.p = p
|
self.p = p
|
||||||
self.refdir = refdir
|
self.files = files
|
||||||
|
|
||||||
def tearDown(self):
|
|
||||||
shutil.rmtree(str(self.p))
|
|
||||||
|
|
||||||
def test_simple(self):
|
def test_simple(self):
|
||||||
app = self.app
|
app = self.app
|
||||||
refdir = self.refdir
|
|
||||||
g = self.groups[0]
|
g = self.groups[0]
|
||||||
app.SelectPowerMarkerNodePaths(r2np([0]))
|
app.SelectPowerMarkerNodePaths(r2np([0]))
|
||||||
self.assert_(app.RenameSelected('renamed'))
|
assert app.RenameSelected('renamed')
|
||||||
self.assert_('renamed' in refdir)
|
names = io.listdir(self.p)
|
||||||
self.assert_('foo bar 2' not in refdir)
|
assert 'renamed' in names
|
||||||
self.assert_(g.dupes[0] is refdir['renamed'])
|
assert 'foo bar 2' not in names
|
||||||
self.assert_(g.dupes[0] in refdir)
|
eq_(g.dupes[0].name, 'renamed')
|
||||||
|
|
||||||
def test_none_selected(self):
|
def test_none_selected(self):
|
||||||
app = self.app
|
app = self.app
|
||||||
refdir = self.refdir
|
|
||||||
g = self.groups[0]
|
g = self.groups[0]
|
||||||
app.SelectPowerMarkerNodePaths([])
|
app.SelectPowerMarkerNodePaths([])
|
||||||
self.mock(logging, 'warning', log_calls(lambda msg: None))
|
self.mock(logging, 'warning', log_calls(lambda msg: None))
|
||||||
self.assert_(not app.RenameSelected('renamed'))
|
assert not app.RenameSelected('renamed')
|
||||||
msg = logging.warning.calls[0]['msg']
|
msg = logging.warning.calls[0]['msg']
|
||||||
self.assertEqual('dupeGuru Warning: list index out of range', msg)
|
eq_('dupeGuru Warning: list index out of range', msg)
|
||||||
self.assert_('renamed' not in refdir)
|
names = io.listdir(self.p)
|
||||||
self.assert_('foo bar 2' in refdir)
|
assert 'renamed' not in names
|
||||||
self.assert_(g.dupes[0] is refdir['foo bar 2'])
|
assert 'foo bar 2' in names
|
||||||
|
eq_(g.dupes[0].name, 'foo bar 2')
|
||||||
|
|
||||||
def test_name_already_exists(self):
|
def test_name_already_exists(self):
|
||||||
app = self.app
|
app = self.app
|
||||||
refdir = self.refdir
|
|
||||||
g = self.groups[0]
|
g = self.groups[0]
|
||||||
app.SelectPowerMarkerNodePaths(r2np([0]))
|
app.SelectPowerMarkerNodePaths(r2np([0]))
|
||||||
self.mock(logging, 'warning', log_calls(lambda msg: None))
|
self.mock(logging, 'warning', log_calls(lambda msg: None))
|
||||||
self.assert_(not app.RenameSelected('foo bar 1'))
|
assert not app.RenameSelected('foo bar 1')
|
||||||
msg = logging.warning.calls[0]['msg']
|
msg = logging.warning.calls[0]['msg']
|
||||||
self.assert_(msg.startswith('dupeGuru Warning: \'foo bar 2\' already exists in'))
|
assert msg.startswith('dupeGuru Warning: \'foo bar 1\' already exists in')
|
||||||
self.assert_('foo bar 1' in refdir)
|
names = io.listdir(self.p)
|
||||||
self.assert_('foo bar 2' in refdir)
|
assert 'foo bar 1' in names
|
||||||
self.assert_(g.dupes[0] is refdir['foo bar 2'])
|
assert 'foo bar 2' in names
|
||||||
|
eq_(g.dupes[0].name, 'foo bar 2')
|
||||||
|
|
||||||
|
|||||||
@@ -13,12 +13,11 @@ from hsutil.testcase import TestCase
|
|||||||
from hsutil import io
|
from hsutil import io
|
||||||
from hsutil.path import Path
|
from hsutil.path import Path
|
||||||
from hsutil.decorators import log_calls
|
from hsutil.decorators import log_calls
|
||||||
import hsfs as fs
|
|
||||||
import hsfs.phys
|
|
||||||
import hsutil.files
|
import hsutil.files
|
||||||
from hsutil.job import nulljob
|
from hsutil.job import nulljob
|
||||||
|
|
||||||
from .. import data, app
|
from . import data
|
||||||
|
from .. import app, fs
|
||||||
from ..app import DupeGuru as DupeGuruBase
|
from ..app import DupeGuru as DupeGuruBase
|
||||||
|
|
||||||
class DupeGuru(DupeGuruBase):
|
class DupeGuru(DupeGuruBase):
|
||||||
@@ -59,27 +58,27 @@ class TCDupeGuru(TestCase):
|
|||||||
# The goal here is just to have a test for a previous blowup I had. I know my test coverage
|
# The goal here is just to have a test for a previous blowup I had. I know my test coverage
|
||||||
# for this unit is pathetic. What's done is done. My approach now is to add tests for
|
# for this unit is pathetic. What's done is done. My approach now is to add tests for
|
||||||
# every change I want to make. The blowup was caused by a missing import.
|
# every change I want to make. The blowup was caused by a missing import.
|
||||||
dupe_parent = fs.Directory(None, 'foo')
|
p = self.tmppath()
|
||||||
dupe = fs.File(dupe_parent, 'bar')
|
io.open(p + 'foo', 'w').close()
|
||||||
dupe.copy = log_calls(lambda dest, newname: None)
|
|
||||||
self.mock(hsutil.files, 'copy', log_calls(lambda source_path, dest_path: None))
|
self.mock(hsutil.files, 'copy', log_calls(lambda source_path, dest_path: None))
|
||||||
self.mock(os, 'makedirs', lambda path: None) # We don't want the test to create that fake directory
|
self.mock(os, 'makedirs', lambda path: None) # We don't want the test to create that fake directory
|
||||||
self.mock(fs.phys, 'Directory', fs.Directory) # We don't want an error because makedirs didn't work
|
|
||||||
app = DupeGuru()
|
app = DupeGuru()
|
||||||
app.copy_or_move(dupe, True, 'some_destination', 0)
|
app.directories.add_path(p)
|
||||||
|
[f] = app.directories.get_files()
|
||||||
|
app.copy_or_move(f, True, 'some_destination', 0)
|
||||||
self.assertEqual(1, len(hsutil.files.copy.calls))
|
self.assertEqual(1, len(hsutil.files.copy.calls))
|
||||||
call = hsutil.files.copy.calls[0]
|
call = hsutil.files.copy.calls[0]
|
||||||
self.assertEqual('some_destination', call['dest_path'])
|
self.assertEqual('some_destination', call['dest_path'])
|
||||||
self.assertEqual(dupe.path, call['source_path'])
|
self.assertEqual(f.path, call['source_path'])
|
||||||
|
|
||||||
def test_copy_or_move_clean_empty_dirs(self):
|
def test_copy_or_move_clean_empty_dirs(self):
|
||||||
tmppath = Path(self.tmpdir())
|
tmppath = Path(self.tmpdir())
|
||||||
sourcepath = tmppath + 'source'
|
sourcepath = tmppath + 'source'
|
||||||
io.mkdir(sourcepath)
|
io.mkdir(sourcepath)
|
||||||
io.open(sourcepath + 'myfile', 'w')
|
io.open(sourcepath + 'myfile', 'w')
|
||||||
tmpdir = hsfs.phys.Directory(None, unicode(tmppath))
|
|
||||||
myfile = tmpdir['source']['myfile']
|
|
||||||
app = DupeGuru()
|
app = DupeGuru()
|
||||||
|
app.directories.add_path(tmppath)
|
||||||
|
[myfile] = app.directories.get_files()
|
||||||
self.mock(app, 'clean_empty_dirs', log_calls(lambda path: None))
|
self.mock(app, 'clean_empty_dirs', log_calls(lambda path: None))
|
||||||
app.copy_or_move(myfile, False, tmppath + 'dest', 0)
|
app.copy_or_move(myfile, False, tmppath + 'dest', 0)
|
||||||
calls = app.clean_empty_dirs.calls
|
calls = app.clean_empty_dirs.calls
|
||||||
@@ -87,9 +86,14 @@ class TCDupeGuru(TestCase):
|
|||||||
self.assertEqual(sourcepath, calls[0]['path'])
|
self.assertEqual(sourcepath, calls[0]['path'])
|
||||||
|
|
||||||
def test_Scan_with_objects_evaluating_to_false(self):
|
def test_Scan_with_objects_evaluating_to_false(self):
|
||||||
|
class FakeFile(fs.File):
|
||||||
|
def __nonzero__(self):
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
# At some point, any() was used in a wrong way that made Scan() wrongly return 1
|
# At some point, any() was used in a wrong way that made Scan() wrongly return 1
|
||||||
app = DupeGuru()
|
app = DupeGuru()
|
||||||
f1, f2 = [fs.File(None, 'foo') for i in range(2)]
|
f1, f2 = [FakeFile('foo') for i in range(2)]
|
||||||
f1.is_ref, f2.is_ref = (False, False)
|
f1.is_ref, f2.is_ref = (False, False)
|
||||||
assert not (bool(f1) and bool(f2))
|
assert not (bool(f1) and bool(f2))
|
||||||
app.directories.get_files = lambda: [f1, f2]
|
app.directories.get_files = lambda: [f1, f2]
|
||||||
|
|||||||
45
base/py/tests/data.py
Normal file
45
base/py/tests/data.py
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Created By: Virgil Dupras
|
||||||
|
# Created On: 2009-10-23
|
||||||
|
# $Id$
|
||||||
|
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
|
||||||
|
#
|
||||||
|
# This software is licensed under the "HS" License as described in the "LICENSE" file,
|
||||||
|
# which should be included with this package. The terms are also available at
|
||||||
|
# http://www.hardcoded.net/licenses/hs_license
|
||||||
|
|
||||||
|
# data module for tests
|
||||||
|
|
||||||
|
from hsutil.str import format_size
|
||||||
|
from dupeguru.data import format_path, cmp_value
|
||||||
|
|
||||||
|
COLUMNS = [
|
||||||
|
{'attr':'name','display':'Filename'},
|
||||||
|
{'attr':'path','display':'Directory'},
|
||||||
|
{'attr':'size','display':'Size (KB)'},
|
||||||
|
{'attr':'extension','display':'Kind'},
|
||||||
|
]
|
||||||
|
|
||||||
|
METADATA_TO_READ = ['size']
|
||||||
|
|
||||||
|
def GetDisplayInfo(dupe, group, delta):
|
||||||
|
size = dupe.size
|
||||||
|
m = group.get_match_of(dupe)
|
||||||
|
if m and delta:
|
||||||
|
r = group.ref
|
||||||
|
size -= r.size
|
||||||
|
return [
|
||||||
|
dupe.name,
|
||||||
|
format_path(dupe.path),
|
||||||
|
format_size(size, 0, 1, False),
|
||||||
|
dupe.extension,
|
||||||
|
]
|
||||||
|
|
||||||
|
def GetDupeSortKey(dupe, get_group, key, delta):
|
||||||
|
r = cmp_value(getattr(dupe, COLUMNS[key]['attr']))
|
||||||
|
if delta and (key == 2):
|
||||||
|
r -= cmp_value(getattr(get_group().ref, COLUMNS[key]['attr']))
|
||||||
|
return r
|
||||||
|
|
||||||
|
def GetGroupSortKey(group, key):
|
||||||
|
return cmp_value(getattr(group.ref, COLUMNS[key]['attr']))
|
||||||
@@ -10,20 +10,43 @@
|
|||||||
import os.path as op
|
import os.path as op
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
import shutil
|
|
||||||
|
|
||||||
from nose.tools import eq_
|
from nose.tools import eq_
|
||||||
|
|
||||||
from hsutil import job, io
|
from hsutil import io
|
||||||
from hsutil.path import Path
|
from hsutil.path import Path
|
||||||
from hsutil.testcase import TestCase
|
from hsutil.testcase import TestCase
|
||||||
import hsfs.phys
|
|
||||||
from hsfs.tests import phys_test
|
|
||||||
|
|
||||||
from ..directories import *
|
from ..directories import *
|
||||||
|
|
||||||
testpath = Path(TestCase.datadirpath())
|
testpath = Path(TestCase.datadirpath())
|
||||||
|
|
||||||
|
def create_fake_fs(rootpath):
|
||||||
|
rootpath = rootpath + 'fs'
|
||||||
|
io.mkdir(rootpath)
|
||||||
|
io.mkdir(rootpath + 'dir1')
|
||||||
|
io.mkdir(rootpath + 'dir2')
|
||||||
|
io.mkdir(rootpath + 'dir3')
|
||||||
|
fp = io.open(rootpath + 'file1.test', 'w')
|
||||||
|
fp.write('1')
|
||||||
|
fp.close()
|
||||||
|
fp = io.open(rootpath + 'file2.test', 'w')
|
||||||
|
fp.write('12')
|
||||||
|
fp.close()
|
||||||
|
fp = io.open(rootpath + 'file3.test', 'w')
|
||||||
|
fp.write('123')
|
||||||
|
fp.close()
|
||||||
|
fp = io.open(rootpath + ('dir1', 'file1.test'), 'w')
|
||||||
|
fp.write('1')
|
||||||
|
fp.close()
|
||||||
|
fp = io.open(rootpath + ('dir2', 'file2.test'), 'w')
|
||||||
|
fp.write('12')
|
||||||
|
fp.close()
|
||||||
|
fp = io.open(rootpath + ('dir3', 'file3.test'), 'w')
|
||||||
|
fp.write('123')
|
||||||
|
fp.close()
|
||||||
|
return rootpath
|
||||||
|
|
||||||
class TCDirectories(TestCase):
|
class TCDirectories(TestCase):
|
||||||
def test_empty(self):
|
def test_empty(self):
|
||||||
d = Directories()
|
d = Directories()
|
||||||
@@ -33,13 +56,11 @@ class TCDirectories(TestCase):
|
|||||||
def test_add_path(self):
|
def test_add_path(self):
|
||||||
d = Directories()
|
d = Directories()
|
||||||
p = testpath + 'utils'
|
p = testpath + 'utils'
|
||||||
added = d.add_path(p)
|
d.add_path(p)
|
||||||
self.assertEqual(1,len(d))
|
self.assertEqual(1,len(d))
|
||||||
self.assert_(p in d)
|
self.assert_(p in d)
|
||||||
self.assert_((p + 'foobar') in d)
|
self.assert_((p + 'foobar') in d)
|
||||||
self.assert_(p[:-1] not in d)
|
self.assert_(p[:-1] not in d)
|
||||||
self.assertEqual(p,added.path)
|
|
||||||
self.assert_(d[0] is added)
|
|
||||||
p = self.tmppath()
|
p = self.tmppath()
|
||||||
d.add_path(p)
|
d.add_path(p)
|
||||||
self.assertEqual(2,len(d))
|
self.assertEqual(2,len(d))
|
||||||
@@ -53,13 +74,13 @@ class TCDirectories(TestCase):
|
|||||||
self.assertRaises(AlreadyThereError, d.add_path, p + 'foobar')
|
self.assertRaises(AlreadyThereError, d.add_path, p + 'foobar')
|
||||||
self.assertEqual(1, len(d))
|
self.assertEqual(1, len(d))
|
||||||
|
|
||||||
def test_AddPath_containing_paths_already_there(self):
|
def test_add_path_containing_paths_already_there(self):
|
||||||
d = Directories()
|
d = Directories()
|
||||||
d.add_path(testpath + 'utils')
|
d.add_path(testpath + 'utils')
|
||||||
self.assertEqual(1, len(d))
|
self.assertEqual(1, len(d))
|
||||||
added = d.add_path(testpath)
|
d.add_path(testpath)
|
||||||
self.assertEqual(1, len(d))
|
eq_(len(d), 1)
|
||||||
self.assert_(added is d[0])
|
eq_(d[0], testpath)
|
||||||
|
|
||||||
def test_AddPath_non_latin(self):
|
def test_AddPath_non_latin(self):
|
||||||
p = Path(self.tmpdir())
|
p = Path(self.tmpdir())
|
||||||
@@ -114,7 +135,7 @@ class TCDirectories(TestCase):
|
|||||||
|
|
||||||
def test_set_state_keep_state_dict_size_to_minimum(self):
|
def test_set_state_keep_state_dict_size_to_minimum(self):
|
||||||
d = Directories()
|
d = Directories()
|
||||||
p = Path(phys_test.create_fake_fs(self.tmpdir()))
|
p = create_fake_fs(self.tmppath())
|
||||||
d.add_path(p)
|
d.add_path(p)
|
||||||
d.set_state(p,STATE_REFERENCE)
|
d.set_state(p,STATE_REFERENCE)
|
||||||
d.set_state(p + 'dir1',STATE_REFERENCE)
|
d.set_state(p + 'dir1',STATE_REFERENCE)
|
||||||
@@ -129,7 +150,7 @@ class TCDirectories(TestCase):
|
|||||||
|
|
||||||
def test_get_files(self):
|
def test_get_files(self):
|
||||||
d = Directories()
|
d = Directories()
|
||||||
p = Path(phys_test.create_fake_fs(self.tmpdir()))
|
p = create_fake_fs(self.tmppath())
|
||||||
d.add_path(p)
|
d.add_path(p)
|
||||||
d.set_state(p + 'dir1',STATE_REFERENCE)
|
d.set_state(p + 'dir1',STATE_REFERENCE)
|
||||||
d.set_state(p + 'dir2',STATE_EXCLUDED)
|
d.set_state(p + 'dir2',STATE_EXCLUDED)
|
||||||
@@ -177,52 +198,28 @@ class TCDirectories(TestCase):
|
|||||||
except LookupError:
|
except LookupError:
|
||||||
self.fail()
|
self.fail()
|
||||||
|
|
||||||
def test_default_dirclass(self):
|
|
||||||
self.assert_(Directories().dirclass is hsfs.phys.Directory)
|
|
||||||
|
|
||||||
def test_dirclass(self):
|
|
||||||
class MySpecialDirclass(hsfs.phys.Directory): pass
|
|
||||||
d = Directories()
|
|
||||||
d.dirclass = MySpecialDirclass
|
|
||||||
d.add_path(testpath)
|
|
||||||
self.assert_(isinstance(d[0], MySpecialDirclass))
|
|
||||||
|
|
||||||
def test_load_from_file_with_invalid_path(self):
|
def test_load_from_file_with_invalid_path(self):
|
||||||
#This test simulates a load from file resulting in a
|
#This test simulates a load from file resulting in a
|
||||||
#InvalidPath raise. Other directories must be loaded.
|
#InvalidPath raise. Other directories must be loaded.
|
||||||
d1 = Directories()
|
d1 = Directories()
|
||||||
d1.add_path(testpath + 'utils')
|
d1.add_path(testpath + 'utils')
|
||||||
#Will raise InvalidPath upon loading
|
#Will raise InvalidPath upon loading
|
||||||
d1.add_path(self.tmppath()).name = 'does_not_exist'
|
p = self.tmppath()
|
||||||
|
d1.add_path(p)
|
||||||
|
io.rmdir(p)
|
||||||
tmpxml = op.join(self.tmpdir(), 'directories_testunit.xml')
|
tmpxml = op.join(self.tmpdir(), 'directories_testunit.xml')
|
||||||
d1.save_to_file(tmpxml)
|
d1.save_to_file(tmpxml)
|
||||||
d2 = Directories()
|
d2 = Directories()
|
||||||
d2.load_from_file(tmpxml)
|
d2.load_from_file(tmpxml)
|
||||||
self.assertEqual(1, len(d2))
|
self.assertEqual(1, len(d2))
|
||||||
|
|
||||||
def test_load_from_file_with_same_paths(self):
|
|
||||||
#This test simulates a load from file resulting in a
|
|
||||||
#AlreadyExists raise. Other directories must be loaded.
|
|
||||||
d1 = Directories()
|
|
||||||
p1 = self.tmppath()
|
|
||||||
p2 = self.tmppath()
|
|
||||||
d1.add_path(p1)
|
|
||||||
d1.add_path(p2)
|
|
||||||
#Will raise AlreadyExists upon loading
|
|
||||||
d1.add_path(self.tmppath()).name = unicode(p1)
|
|
||||||
tmpxml = op.join(self.tmpdir(), 'directories_testunit.xml')
|
|
||||||
d1.save_to_file(tmpxml)
|
|
||||||
d2 = Directories()
|
|
||||||
d2.load_from_file(tmpxml)
|
|
||||||
self.assertEqual(2, len(d2))
|
|
||||||
|
|
||||||
def test_unicode_save(self):
|
def test_unicode_save(self):
|
||||||
d = Directories()
|
d = Directories()
|
||||||
p1 = self.tmppath() + u'hello\xe9'
|
p1 = self.tmppath() + u'hello\xe9'
|
||||||
io.mkdir(p1)
|
io.mkdir(p1)
|
||||||
io.mkdir(p1 + u'foo\xe9')
|
io.mkdir(p1 + u'foo\xe9')
|
||||||
d.add_path(p1)
|
d.add_path(p1)
|
||||||
d.set_state(d[0][0].path, STATE_EXCLUDED)
|
d.set_state(p1 + u'foo\xe9', STATE_EXCLUDED)
|
||||||
tmpxml = op.join(self.tmpdir(), 'directories_testunit.xml')
|
tmpxml = op.join(self.tmpdir(), 'directories_testunit.xml')
|
||||||
try:
|
try:
|
||||||
d.save_to_file(tmpxml)
|
d.save_to_file(tmpxml)
|
||||||
@@ -231,7 +228,7 @@ class TCDirectories(TestCase):
|
|||||||
|
|
||||||
def test_get_files_refreshes_its_directories(self):
|
def test_get_files_refreshes_its_directories(self):
|
||||||
d = Directories()
|
d = Directories()
|
||||||
p = Path(phys_test.create_fake_fs(self.tmpdir()))
|
p = create_fake_fs(self.tmppath())
|
||||||
d.add_path(p)
|
d.add_path(p)
|
||||||
files = d.get_files()
|
files = d.get_files()
|
||||||
self.assertEqual(6, len(list(files)))
|
self.assertEqual(6, len(list(files)))
|
||||||
@@ -258,16 +255,6 @@ class TCDirectories(TestCase):
|
|||||||
d.set_state(hidden_dir_path, STATE_NORMAL)
|
d.set_state(hidden_dir_path, STATE_NORMAL)
|
||||||
self.assertEqual(d.get_state(hidden_dir_path), STATE_NORMAL)
|
self.assertEqual(d.get_state(hidden_dir_path), STATE_NORMAL)
|
||||||
|
|
||||||
def test_special_dirclasses(self):
|
|
||||||
# if a path is in special_dirclasses, use this class instead
|
|
||||||
class MySpecialDirclass(hsfs.phys.Directory): pass
|
|
||||||
d = Directories()
|
|
||||||
p1 = self.tmppath()
|
|
||||||
p2 = self.tmppath()
|
|
||||||
d.special_dirclasses[p1] = MySpecialDirclass
|
|
||||||
self.assert_(isinstance(d.add_path(p2), hsfs.phys.Directory))
|
|
||||||
self.assert_(isinstance(d.add_path(p1), MySpecialDirclass))
|
|
||||||
|
|
||||||
def test_default_path_state_override(self):
|
def test_default_path_state_override(self):
|
||||||
# It's possible for a subclass to override the default state of a path
|
# It's possible for a subclass to override the default state of a path
|
||||||
class MyDirectories(Directories):
|
class MyDirectories(Directories):
|
||||||
|
|||||||
@@ -340,21 +340,13 @@ class TCget_match(TestCase):
|
|||||||
self.assertEqual(int((6.0 / 13.0) * 100),get_match(NamedObject("foo bar",True),NamedObject("bar bleh",True),(WEIGHT_WORDS,)).percentage)
|
self.assertEqual(int((6.0 / 13.0) * 100),get_match(NamedObject("foo bar",True),NamedObject("bar bleh",True),(WEIGHT_WORDS,)).percentage)
|
||||||
|
|
||||||
|
|
||||||
class TCMatchFactory(TestCase):
|
class GetMatches(TestCase):
|
||||||
def test_empty(self):
|
def test_empty(self):
|
||||||
self.assertEqual([],MatchFactory().getmatches([]))
|
eq_(getmatches([]), [])
|
||||||
|
|
||||||
def test_defaults(self):
|
|
||||||
mf = MatchFactory()
|
|
||||||
self.assertEqual(50,mf.common_word_threshold)
|
|
||||||
self.assertEqual(False,mf.weight_words)
|
|
||||||
self.assertEqual(False,mf.match_similar_words)
|
|
||||||
self.assertEqual(False,mf.no_field_order)
|
|
||||||
self.assertEqual(0,mf.min_match_percentage)
|
|
||||||
|
|
||||||
def test_simple(self):
|
def test_simple(self):
|
||||||
l = [NamedObject("foo bar"),NamedObject("bar bleh"),NamedObject("a b c foo")]
|
l = [NamedObject("foo bar"),NamedObject("bar bleh"),NamedObject("a b c foo")]
|
||||||
r = MatchFactory().getmatches(l)
|
r = getmatches(l)
|
||||||
self.assertEqual(2,len(r))
|
self.assertEqual(2,len(r))
|
||||||
seek = [m for m in r if m.percentage == 50] #"foo bar" and "bar bleh"
|
seek = [m for m in r if m.percentage == 50] #"foo bar" and "bar bleh"
|
||||||
m = seek[0]
|
m = seek[0]
|
||||||
@@ -367,7 +359,7 @@ class TCMatchFactory(TestCase):
|
|||||||
|
|
||||||
def test_null_and_unrelated_objects(self):
|
def test_null_and_unrelated_objects(self):
|
||||||
l = [NamedObject("foo bar"),NamedObject("bar bleh"),NamedObject(""),NamedObject("unrelated object")]
|
l = [NamedObject("foo bar"),NamedObject("bar bleh"),NamedObject(""),NamedObject("unrelated object")]
|
||||||
r = MatchFactory().getmatches(l)
|
r = getmatches(l)
|
||||||
self.assertEqual(1,len(r))
|
self.assertEqual(1,len(r))
|
||||||
m = r[0]
|
m = r[0]
|
||||||
self.assertEqual(50,m.percentage)
|
self.assertEqual(50,m.percentage)
|
||||||
@@ -376,34 +368,33 @@ class TCMatchFactory(TestCase):
|
|||||||
|
|
||||||
def test_twice_the_same_word(self):
|
def test_twice_the_same_word(self):
|
||||||
l = [NamedObject("foo foo bar"),NamedObject("bar bleh")]
|
l = [NamedObject("foo foo bar"),NamedObject("bar bleh")]
|
||||||
r = MatchFactory().getmatches(l)
|
r = getmatches(l)
|
||||||
self.assertEqual(1,len(r))
|
self.assertEqual(1,len(r))
|
||||||
|
|
||||||
def test_twice_the_same_word_when_preworded(self):
|
def test_twice_the_same_word_when_preworded(self):
|
||||||
l = [NamedObject("foo foo bar",True),NamedObject("bar bleh",True)]
|
l = [NamedObject("foo foo bar",True),NamedObject("bar bleh",True)]
|
||||||
r = MatchFactory().getmatches(l)
|
r = getmatches(l)
|
||||||
self.assertEqual(1,len(r))
|
self.assertEqual(1,len(r))
|
||||||
|
|
||||||
def test_two_words_match(self):
|
def test_two_words_match(self):
|
||||||
l = [NamedObject("foo bar"),NamedObject("foo bar bleh")]
|
l = [NamedObject("foo bar"),NamedObject("foo bar bleh")]
|
||||||
r = MatchFactory().getmatches(l)
|
r = getmatches(l)
|
||||||
self.assertEqual(1,len(r))
|
self.assertEqual(1,len(r))
|
||||||
|
|
||||||
def test_match_files_with_only_common_words(self):
|
def test_match_files_with_only_common_words(self):
|
||||||
#If a word occurs more than 50 times, it is excluded from the matching process
|
#If a word occurs more than 50 times, it is excluded from the matching process
|
||||||
#The problem with the common_word_threshold is that the files containing only common
|
#The problem with the common_word_threshold is that the files containing only common
|
||||||
#words will never be matched together. We *should* match them.
|
#words will never be matched together. We *should* match them.
|
||||||
mf = MatchFactory()
|
# This test assumes that the common word threashold const is 50
|
||||||
mf.common_word_threshold = 50
|
|
||||||
l = [NamedObject("foo") for i in range(50)]
|
l = [NamedObject("foo") for i in range(50)]
|
||||||
r = mf.getmatches(l)
|
r = getmatches(l)
|
||||||
self.assertEqual(1225,len(r))
|
self.assertEqual(1225,len(r))
|
||||||
|
|
||||||
def test_use_words_already_there_if_there(self):
|
def test_use_words_already_there_if_there(self):
|
||||||
o1 = NamedObject('foo')
|
o1 = NamedObject('foo')
|
||||||
o2 = NamedObject('bar')
|
o2 = NamedObject('bar')
|
||||||
o2.words = ['foo']
|
o2.words = ['foo']
|
||||||
self.assertEqual(1,len(MatchFactory().getmatches([o1,o2])))
|
eq_(1, len(getmatches([o1,o2])))
|
||||||
|
|
||||||
def test_job(self):
|
def test_job(self):
|
||||||
def do_progress(p,d=''):
|
def do_progress(p,d=''):
|
||||||
@@ -413,75 +404,62 @@ class TCMatchFactory(TestCase):
|
|||||||
j = job.Job(1,do_progress)
|
j = job.Job(1,do_progress)
|
||||||
self.log = []
|
self.log = []
|
||||||
s = "foo bar"
|
s = "foo bar"
|
||||||
MatchFactory().getmatches([NamedObject(s),NamedObject(s),NamedObject(s)],j)
|
getmatches([NamedObject(s), NamedObject(s), NamedObject(s)], j=j)
|
||||||
self.assert_(len(self.log) > 2)
|
self.assert_(len(self.log) > 2)
|
||||||
self.assertEqual(0,self.log[0])
|
self.assertEqual(0,self.log[0])
|
||||||
self.assertEqual(100,self.log[-1])
|
self.assertEqual(100,self.log[-1])
|
||||||
|
|
||||||
def test_weight_words(self):
|
def test_weight_words(self):
|
||||||
mf = MatchFactory()
|
|
||||||
mf.weight_words = True
|
|
||||||
l = [NamedObject("foo bar"),NamedObject("bar bleh")]
|
l = [NamedObject("foo bar"),NamedObject("bar bleh")]
|
||||||
m = mf.getmatches(l)[0]
|
m = getmatches(l, weight_words=True)[0]
|
||||||
self.assertEqual(int((6.0 / 13.0) * 100),m.percentage)
|
self.assertEqual(int((6.0 / 13.0) * 100),m.percentage)
|
||||||
|
|
||||||
def test_similar_word(self):
|
def test_similar_word(self):
|
||||||
mf = MatchFactory()
|
|
||||||
mf.match_similar_words = True
|
|
||||||
l = [NamedObject("foobar"),NamedObject("foobars")]
|
l = [NamedObject("foobar"),NamedObject("foobars")]
|
||||||
self.assertEqual(1,len(mf.getmatches(l)))
|
eq_(len(getmatches(l, match_similar_words=True)), 1)
|
||||||
self.assertEqual(100,mf.getmatches(l)[0].percentage)
|
eq_(getmatches(l, match_similar_words=True)[0].percentage, 100)
|
||||||
l = [NamedObject("foobar"),NamedObject("foo")]
|
l = [NamedObject("foobar"),NamedObject("foo")]
|
||||||
self.assertEqual(0,len(mf.getmatches(l))) #too far
|
eq_(len(getmatches(l, match_similar_words=True)), 0) #too far
|
||||||
l = [NamedObject("bizkit"),NamedObject("bizket")]
|
l = [NamedObject("bizkit"),NamedObject("bizket")]
|
||||||
self.assertEqual(1,len(mf.getmatches(l)))
|
eq_(len(getmatches(l, match_similar_words=True)), 1)
|
||||||
l = [NamedObject("foobar"),NamedObject("foosbar")]
|
l = [NamedObject("foobar"),NamedObject("foosbar")]
|
||||||
self.assertEqual(1,len(mf.getmatches(l)))
|
eq_(len(getmatches(l, match_similar_words=True)), 1)
|
||||||
|
|
||||||
def test_single_object_with_similar_words(self):
|
def test_single_object_with_similar_words(self):
|
||||||
mf = MatchFactory()
|
|
||||||
mf.match_similar_words = True
|
|
||||||
l = [NamedObject("foo foos")]
|
l = [NamedObject("foo foos")]
|
||||||
self.assertEqual(0,len(mf.getmatches(l)))
|
eq_(len(getmatches(l, match_similar_words=True)), 0)
|
||||||
|
|
||||||
def test_double_words_get_counted_only_once(self):
|
def test_double_words_get_counted_only_once(self):
|
||||||
mf = MatchFactory()
|
|
||||||
l = [NamedObject("foo bar foo bleh"),NamedObject("foo bar bleh bar")]
|
l = [NamedObject("foo bar foo bleh"),NamedObject("foo bar bleh bar")]
|
||||||
m = mf.getmatches(l)[0]
|
m = getmatches(l)[0]
|
||||||
self.assertEqual(75,m.percentage)
|
self.assertEqual(75,m.percentage)
|
||||||
|
|
||||||
def test_with_fields(self):
|
def test_with_fields(self):
|
||||||
mf = MatchFactory()
|
|
||||||
o1 = NamedObject("foo bar - foo bleh")
|
o1 = NamedObject("foo bar - foo bleh")
|
||||||
o2 = NamedObject("foo bar - bleh bar")
|
o2 = NamedObject("foo bar - bleh bar")
|
||||||
o1.words = getfields(o1.name)
|
o1.words = getfields(o1.name)
|
||||||
o2.words = getfields(o2.name)
|
o2.words = getfields(o2.name)
|
||||||
m = mf.getmatches([o1, o2])[0]
|
m = getmatches([o1, o2])[0]
|
||||||
self.assertEqual(50, m.percentage)
|
self.assertEqual(50, m.percentage)
|
||||||
|
|
||||||
def test_with_fields_no_order(self):
|
def test_with_fields_no_order(self):
|
||||||
mf = MatchFactory()
|
|
||||||
mf.no_field_order = True
|
|
||||||
o1 = NamedObject("foo bar - foo bleh")
|
o1 = NamedObject("foo bar - foo bleh")
|
||||||
o2 = NamedObject("bleh bang - foo bar")
|
o2 = NamedObject("bleh bang - foo bar")
|
||||||
o1.words = getfields(o1.name)
|
o1.words = getfields(o1.name)
|
||||||
o2.words = getfields(o2.name)
|
o2.words = getfields(o2.name)
|
||||||
m = mf.getmatches([o1, o2])[0]
|
m = getmatches([o1, o2], no_field_order=True)[0]
|
||||||
self.assertEqual(50 ,m.percentage)
|
eq_(m.percentage, 50)
|
||||||
|
|
||||||
def test_only_match_similar_when_the_option_is_set(self):
|
def test_only_match_similar_when_the_option_is_set(self):
|
||||||
mf = MatchFactory()
|
|
||||||
mf.match_similar_words = False
|
|
||||||
l = [NamedObject("foobar"),NamedObject("foobars")]
|
l = [NamedObject("foobar"),NamedObject("foobars")]
|
||||||
self.assertEqual(0,len(mf.getmatches(l)))
|
eq_(len(getmatches(l, match_similar_words=False)), 0)
|
||||||
|
|
||||||
def test_dont_recurse_do_match(self):
|
def test_dont_recurse_do_match(self):
|
||||||
# with nosetests, the stack is increased. The number has to be high enough not to be failing falsely
|
# with nosetests, the stack is increased. The number has to be high enough not to be failing falsely
|
||||||
sys.setrecursionlimit(100)
|
sys.setrecursionlimit(100)
|
||||||
mf = MatchFactory()
|
|
||||||
files = [NamedObject('foo bar') for i in range(101)]
|
files = [NamedObject('foo bar') for i in range(101)]
|
||||||
try:
|
try:
|
||||||
mf.getmatches(files)
|
getmatches(files)
|
||||||
except RuntimeError:
|
except RuntimeError:
|
||||||
self.fail()
|
self.fail()
|
||||||
finally:
|
finally:
|
||||||
@@ -489,18 +467,9 @@ class TCMatchFactory(TestCase):
|
|||||||
|
|
||||||
def test_min_match_percentage(self):
|
def test_min_match_percentage(self):
|
||||||
l = [NamedObject("foo bar"),NamedObject("bar bleh"),NamedObject("a b c foo")]
|
l = [NamedObject("foo bar"),NamedObject("bar bleh"),NamedObject("a b c foo")]
|
||||||
mf = MatchFactory()
|
r = getmatches(l, min_match_percentage=50)
|
||||||
mf.min_match_percentage = 50
|
|
||||||
r = mf.getmatches(l)
|
|
||||||
self.assertEqual(1,len(r)) #Only "foo bar" / "bar bleh" should match
|
self.assertEqual(1,len(r)) #Only "foo bar" / "bar bleh" should match
|
||||||
|
|
||||||
def test_limit(self):
|
|
||||||
l = [NamedObject(),NamedObject(),NamedObject()]
|
|
||||||
mf = MatchFactory()
|
|
||||||
mf.limit = 2
|
|
||||||
r = mf.getmatches(l)
|
|
||||||
self.assertEqual(2,len(r))
|
|
||||||
|
|
||||||
def test_MemoryError(self):
|
def test_MemoryError(self):
|
||||||
@log_calls
|
@log_calls
|
||||||
def mocked_match(first, second, flags):
|
def mocked_match(first, second, flags):
|
||||||
@@ -510,9 +479,8 @@ class TCMatchFactory(TestCase):
|
|||||||
|
|
||||||
objects = [NamedObject() for i in range(10)] # results in 45 matches
|
objects = [NamedObject() for i in range(10)] # results in 45 matches
|
||||||
self.mock(engine, 'get_match', mocked_match)
|
self.mock(engine, 'get_match', mocked_match)
|
||||||
mf = MatchFactory()
|
|
||||||
try:
|
try:
|
||||||
r = mf.getmatches(objects)
|
r = getmatches(objects)
|
||||||
except MemoryError:
|
except MemoryError:
|
||||||
self.fail('MemorryError must be handled')
|
self.fail('MemorryError must be handled')
|
||||||
self.assertEqual(42, len(r))
|
self.assertEqual(42, len(r))
|
||||||
@@ -738,7 +706,7 @@ class TCget_groups(TestCase):
|
|||||||
|
|
||||||
def test_simple(self):
|
def test_simple(self):
|
||||||
l = [NamedObject("foo bar"),NamedObject("bar bleh")]
|
l = [NamedObject("foo bar"),NamedObject("bar bleh")]
|
||||||
matches = MatchFactory().getmatches(l)
|
matches = getmatches(l)
|
||||||
m = matches[0]
|
m = matches[0]
|
||||||
r = get_groups(matches)
|
r = get_groups(matches)
|
||||||
self.assertEqual(1,len(r))
|
self.assertEqual(1,len(r))
|
||||||
@@ -749,7 +717,7 @@ class TCget_groups(TestCase):
|
|||||||
def test_group_with_multiple_matches(self):
|
def test_group_with_multiple_matches(self):
|
||||||
#This results in 3 matches
|
#This results in 3 matches
|
||||||
l = [NamedObject("foo"),NamedObject("foo"),NamedObject("foo")]
|
l = [NamedObject("foo"),NamedObject("foo"),NamedObject("foo")]
|
||||||
matches = MatchFactory().getmatches(l)
|
matches = getmatches(l)
|
||||||
r = get_groups(matches)
|
r = get_groups(matches)
|
||||||
self.assertEqual(1,len(r))
|
self.assertEqual(1,len(r))
|
||||||
g = r[0]
|
g = r[0]
|
||||||
@@ -759,7 +727,7 @@ class TCget_groups(TestCase):
|
|||||||
l = [NamedObject("a b"),NamedObject("a b"),NamedObject("b c"),NamedObject("c d"),NamedObject("c d")]
|
l = [NamedObject("a b"),NamedObject("a b"),NamedObject("b c"),NamedObject("c d"),NamedObject("c d")]
|
||||||
#There will be 2 groups here: group "a b" and group "c d"
|
#There will be 2 groups here: group "a b" and group "c d"
|
||||||
#"b c" can go either of them, but not both.
|
#"b c" can go either of them, but not both.
|
||||||
matches = MatchFactory().getmatches(l)
|
matches = getmatches(l)
|
||||||
r = get_groups(matches)
|
r = get_groups(matches)
|
||||||
self.assertEqual(2,len(r))
|
self.assertEqual(2,len(r))
|
||||||
self.assertEqual(5,len(r[0])+len(r[1]))
|
self.assertEqual(5,len(r[0])+len(r[1]))
|
||||||
@@ -768,7 +736,7 @@ class TCget_groups(TestCase):
|
|||||||
l = [NamedObject("a b"),NamedObject("a b"),NamedObject("a b"),NamedObject("a b")]
|
l = [NamedObject("a b"),NamedObject("a b"),NamedObject("a b"),NamedObject("a b")]
|
||||||
#There will be 2 groups here: group "a b" and group "c d"
|
#There will be 2 groups here: group "a b" and group "c d"
|
||||||
#"b c" can fit in both, but it must be in only one of them
|
#"b c" can fit in both, but it must be in only one of them
|
||||||
matches = MatchFactory().getmatches(l)
|
matches = getmatches(l)
|
||||||
r = get_groups(matches)
|
r = get_groups(matches)
|
||||||
self.assertEqual(1,len(r))
|
self.assertEqual(1,len(r))
|
||||||
|
|
||||||
@@ -788,7 +756,7 @@ class TCget_groups(TestCase):
|
|||||||
|
|
||||||
def test_four_sized_group(self):
|
def test_four_sized_group(self):
|
||||||
l = [NamedObject("foobar") for i in xrange(4)]
|
l = [NamedObject("foobar") for i in xrange(4)]
|
||||||
m = MatchFactory().getmatches(l)
|
m = getmatches(l)
|
||||||
r = get_groups(m)
|
r = get_groups(m)
|
||||||
self.assertEqual(1,len(r))
|
self.assertEqual(1,len(r))
|
||||||
self.assertEqual(4,len(r[0]))
|
self.assertEqual(4,len(r[0]))
|
||||||
|
|||||||
@@ -16,8 +16,8 @@ from hsutil.path import Path
|
|||||||
from hsutil.testcase import TestCase
|
from hsutil.testcase import TestCase
|
||||||
from hsutil.misc import first
|
from hsutil.misc import first
|
||||||
|
|
||||||
from . import engine_test
|
from . import engine_test, data
|
||||||
from .. import data, engine
|
from .. import engine
|
||||||
from ..results import *
|
from ..results import *
|
||||||
|
|
||||||
class NamedObject(engine_test.NamedObject):
|
class NamedObject(engine_test.NamedObject):
|
||||||
@@ -37,7 +37,7 @@ class NamedObject(engine_test.NamedObject):
|
|||||||
def GetTestGroups():
|
def GetTestGroups():
|
||||||
objects = [NamedObject("foo bar"),NamedObject("bar bleh"),NamedObject("foo bleh"),NamedObject("ibabtu"),NamedObject("ibabtu")]
|
objects = [NamedObject("foo bar"),NamedObject("bar bleh"),NamedObject("foo bleh"),NamedObject("ibabtu"),NamedObject("ibabtu")]
|
||||||
objects[1].size = 1024
|
objects[1].size = 1024
|
||||||
matches = engine.MatchFactory().getmatches(objects) #we should have 5 matches
|
matches = engine.getmatches(objects) #we should have 5 matches
|
||||||
groups = engine.get_groups(matches) #We should have 2 groups
|
groups = engine.get_groups(matches) #We should have 2 groups
|
||||||
for g in groups:
|
for g in groups:
|
||||||
g.prioritize(lambda x:objects.index(x)) #We want the dupes to be in the same order as the list is
|
g.prioritize(lambda x:objects.index(x)) #We want the dupes to be in the same order as the list is
|
||||||
@@ -505,7 +505,7 @@ class TCResultsXML(TestCase):
|
|||||||
return objects[1]
|
return objects[1]
|
||||||
|
|
||||||
objects = [NamedObject(u"\xe9foo bar",True),NamedObject("bar bleh",True)]
|
objects = [NamedObject(u"\xe9foo bar",True),NamedObject("bar bleh",True)]
|
||||||
matches = engine.MatchFactory().getmatches(objects) #we should have 5 matches
|
matches = engine.getmatches(objects) #we should have 5 matches
|
||||||
groups = engine.get_groups(matches) #We should have 2 groups
|
groups = engine.get_groups(matches) #We should have 2 groups
|
||||||
for g in groups:
|
for g in groups:
|
||||||
g.prioritize(lambda x:objects.index(x)) #We want the dupes to be in the same order as the list is
|
g.prioritize(lambda x:objects.index(x)) #We want the dupes to be in the same order as the list is
|
||||||
|
|||||||
@@ -132,8 +132,6 @@ def test_content_scan_doesnt_put_md5_in_words_at_the_end():
|
|||||||
f[1].md5 = f[1].md5partial = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
|
f[1].md5 = f[1].md5partial = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
|
||||||
r = s.GetDupeGroups(f)
|
r = s.GetDupeGroups(f)
|
||||||
g = r[0]
|
g = r[0]
|
||||||
eq_(g.ref.words, ['--'])
|
|
||||||
eq_(g.dupes[0].words, ['--'])
|
|
||||||
|
|
||||||
def test_extension_is_not_counted_in_filename_scan():
|
def test_extension_is_not_counted_in_filename_scan():
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
@@ -369,23 +367,6 @@ def test_ignore_list_checks_for_unicode():
|
|||||||
assert f2 in g
|
assert f2 in g
|
||||||
assert f3 in g
|
assert f3 in g
|
||||||
|
|
||||||
def test_custom_match_factory():
|
|
||||||
class MatchFactory(object):
|
|
||||||
def getmatches(self, objects, j=None):
|
|
||||||
return [Match(objects[0], objects[1], 420)]
|
|
||||||
|
|
||||||
|
|
||||||
s = Scanner()
|
|
||||||
s.match_factory = MatchFactory()
|
|
||||||
o1, o2 = no('foo'), no('bar')
|
|
||||||
groups = s.GetDupeGroups([o1, o2])
|
|
||||||
eq_(len(groups), 1)
|
|
||||||
g = groups[0]
|
|
||||||
eq_(len(g), 2)
|
|
||||||
g.switch_ref(o1)
|
|
||||||
m = g.get_match_of(o2)
|
|
||||||
eq_(m, (o1, o2, 420))
|
|
||||||
|
|
||||||
def test_file_evaluates_to_false():
|
def test_file_evaluates_to_false():
|
||||||
# A very wrong way to use any() was added at some point, causing resulting group list
|
# A very wrong way to use any() was added at some point, causing resulting group list
|
||||||
# to be empty.
|
# to be empty.
|
||||||
@@ -455,15 +436,3 @@ def test_partial_group_match():
|
|||||||
assert o2 in group
|
assert o2 in group
|
||||||
assert o3 not in group
|
assert o3 not in group
|
||||||
eq_(s.discarded_file_count, 1)
|
eq_(s.discarded_file_count, 1)
|
||||||
|
|
||||||
|
|
||||||
#--- Scanner ME
|
|
||||||
def test_priorize_me():
|
|
||||||
# in ScannerME, bitrate goes first (right after is_ref) in priorization
|
|
||||||
s = ScannerME()
|
|
||||||
o1, o2 = no('foo'), no('foo')
|
|
||||||
o1.bitrate = 1
|
|
||||||
o2.bitrate = 2
|
|
||||||
[group] = s.GetDupeGroups([o1, o2])
|
|
||||||
assert group.ref is o2
|
|
||||||
|
|
||||||
|
|||||||
@@ -16,10 +16,10 @@ import os.path as op
|
|||||||
from PyQt4.QtCore import Qt, QTimer, QObject, QCoreApplication, QUrl, SIGNAL
|
from PyQt4.QtCore import Qt, QTimer, QObject, QCoreApplication, QUrl, SIGNAL
|
||||||
from PyQt4.QtGui import QProgressDialog, QDesktopServices, QFileDialog, QDialog, QMessageBox
|
from PyQt4.QtGui import QProgressDialog, QDesktopServices, QFileDialog, QDialog, QMessageBox
|
||||||
|
|
||||||
import hsfs as fs
|
|
||||||
from hsutil import job
|
from hsutil import job
|
||||||
from hsutil.reg import RegistrationRequired
|
from hsutil.reg import RegistrationRequired
|
||||||
|
|
||||||
|
from dupeguru import fs
|
||||||
from dupeguru.app import (DupeGuru as DupeGuruBase, JOB_SCAN, JOB_LOAD, JOB_MOVE, JOB_COPY,
|
from dupeguru.app import (DupeGuru as DupeGuruBase, JOB_SCAN, JOB_LOAD, JOB_MOVE, JOB_COPY,
|
||||||
JOB_DELETE)
|
JOB_DELETE)
|
||||||
|
|
||||||
@@ -145,6 +145,7 @@ class DupeGuru(DupeGuruBase, QObject):
|
|||||||
|
|
||||||
def ask_for_reg_code(self):
|
def ask_for_reg_code(self):
|
||||||
if self.reg.ask_for_code():
|
if self.reg.ask_for_code():
|
||||||
|
#XXX bug???
|
||||||
self._setup_ui_as_registered()
|
self._setup_ui_as_registered()
|
||||||
|
|
||||||
@demo_method
|
@demo_method
|
||||||
|
|||||||
@@ -47,7 +47,14 @@ class DirectoryNode(TreeNode):
|
|||||||
return DirectoryNode(self.model, self, ref, row)
|
return DirectoryNode(self.model, self, ref, row)
|
||||||
|
|
||||||
def _getChildren(self):
|
def _getChildren(self):
|
||||||
return self.ref.dirs
|
return self.model._dirs.get_subfolders(self.ref)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self):
|
||||||
|
if self.parent is not None:
|
||||||
|
return self.ref[-1]
|
||||||
|
else:
|
||||||
|
return unicode(self.ref)
|
||||||
|
|
||||||
|
|
||||||
class DirectoriesModel(TreeModel):
|
class DirectoriesModel(TreeModel):
|
||||||
@@ -70,13 +77,13 @@ class DirectoriesModel(TreeModel):
|
|||||||
node = index.internalPointer()
|
node = index.internalPointer()
|
||||||
if role == Qt.DisplayRole:
|
if role == Qt.DisplayRole:
|
||||||
if index.column() == 0:
|
if index.column() == 0:
|
||||||
return node.ref.name
|
return node.name
|
||||||
else:
|
else:
|
||||||
return STATES[self._dirs.get_state(node.ref.path)]
|
return STATES[self._dirs.get_state(node.ref)]
|
||||||
elif role == Qt.EditRole and index.column() == 1:
|
elif role == Qt.EditRole and index.column() == 1:
|
||||||
return self._dirs.get_state(node.ref.path)
|
return self._dirs.get_state(node.ref)
|
||||||
elif role == Qt.ForegroundRole:
|
elif role == Qt.ForegroundRole:
|
||||||
state = self._dirs.get_state(node.ref.path)
|
state = self._dirs.get_state(node.ref)
|
||||||
if state == 1:
|
if state == 1:
|
||||||
return QBrush(Qt.blue)
|
return QBrush(Qt.blue)
|
||||||
elif state == 2:
|
elif state == 2:
|
||||||
@@ -101,6 +108,6 @@ class DirectoriesModel(TreeModel):
|
|||||||
if not index.isValid() or role != Qt.EditRole or index.column() != 1:
|
if not index.isValid() or role != Qt.EditRole or index.column() != 1:
|
||||||
return False
|
return False
|
||||||
node = index.internalPointer()
|
node = index.internalPointer()
|
||||||
self._dirs.set_state(node.ref.path, value)
|
self._dirs.set_state(node.ref, value)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|||||||
@@ -8,12 +8,13 @@
|
|||||||
import objc
|
import objc
|
||||||
from AppKit import *
|
from AppKit import *
|
||||||
|
|
||||||
from dupeguru import app_me_cocoa, scanner
|
from dupeguru_me.app_cocoa import DupeGuruME
|
||||||
|
from dupeguru.scanner import (SCAN_TYPE_FILENAME, SCAN_TYPE_FIELDS, SCAN_TYPE_FIELDS_NO_ORDER,
|
||||||
|
SCAN_TYPE_TAG, SCAN_TYPE_CONTENT, SCAN_TYPE_CONTENT_AUDIO)
|
||||||
|
|
||||||
# Fix py2app imports which chokes on relative imports
|
# Fix py2app imports which chokes on relative imports
|
||||||
from dupeguru import app, app_cocoa, data, directories, engine, export, ignore, results, scanner
|
from dupeguru_me import app_cocoa, data, fs, scanner
|
||||||
from hsfs import auto, stats, tree, music
|
from dupeguru import app, app_cocoa, data, directories, engine, export, ignore, results, scanner, fs
|
||||||
from hsfs.phys import music
|
|
||||||
from hsmedia import aiff, flac, genres, id3v1, id3v2, mp4, mpeg, ogg, wma
|
from hsmedia import aiff, flac, genres, id3v1, id3v2, mp4, mpeg, ogg, wma
|
||||||
from hsutil import conflict
|
from hsutil import conflict
|
||||||
|
|
||||||
@@ -23,7 +24,7 @@ class PyApp(NSObject):
|
|||||||
class PyDupeGuru(PyApp):
|
class PyDupeGuru(PyApp):
|
||||||
def init(self):
|
def init(self):
|
||||||
self = super(PyDupeGuru,self).init()
|
self = super(PyDupeGuru,self).init()
|
||||||
self.app = app_me_cocoa.DupeGuruME()
|
self.app = DupeGuruME()
|
||||||
return self
|
return self
|
||||||
|
|
||||||
#---Directories
|
#---Directories
|
||||||
@@ -180,12 +181,12 @@ class PyDupeGuru(PyApp):
|
|||||||
def setScanType_(self, scan_type):
|
def setScanType_(self, scan_type):
|
||||||
try:
|
try:
|
||||||
self.app.scanner.scan_type = [
|
self.app.scanner.scan_type = [
|
||||||
scanner.SCAN_TYPE_FILENAME,
|
SCAN_TYPE_FILENAME,
|
||||||
scanner.SCAN_TYPE_FIELDS,
|
SCAN_TYPE_FIELDS,
|
||||||
scanner.SCAN_TYPE_FIELDS_NO_ORDER,
|
SCAN_TYPE_FIELDS_NO_ORDER,
|
||||||
scanner.SCAN_TYPE_TAG,
|
SCAN_TYPE_TAG,
|
||||||
scanner.SCAN_TYPE_CONTENT,
|
SCAN_TYPE_CONTENT,
|
||||||
scanner.SCAN_TYPE_CONTENT_AUDIO
|
SCAN_TYPE_CONTENT_AUDIO
|
||||||
][scan_type]
|
][scan_type]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
pass
|
pass
|
||||||
|
|||||||
0
me/py/__init__.py
Normal file
0
me/py/__init__.py
Normal file
@@ -7,29 +7,29 @@
|
|||||||
# which should be included with this package. The terms are also available at
|
# which should be included with this package. The terms are also available at
|
||||||
# http://www.hardcoded.net/licenses/hs_license
|
# http://www.hardcoded.net/licenses/hs_license
|
||||||
|
|
||||||
import os.path as op
|
|
||||||
import logging
|
import logging
|
||||||
from appscript import app, k, CommandError
|
from appscript import app, k, CommandError
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from hsutil.cocoa import as_fetch
|
from hsutil.cocoa import as_fetch
|
||||||
import hsfs.phys.music
|
|
||||||
|
|
||||||
import app_cocoa, data_me, scanner
|
from dupeguru.app_cocoa import JOBID2TITLE, DupeGuru as DupeGuruBase
|
||||||
|
|
||||||
|
from . import data, scanner, fs
|
||||||
|
|
||||||
JOB_REMOVE_DEAD_TRACKS = 'jobRemoveDeadTracks'
|
JOB_REMOVE_DEAD_TRACKS = 'jobRemoveDeadTracks'
|
||||||
JOB_SCAN_DEAD_TRACKS = 'jobScanDeadTracks'
|
JOB_SCAN_DEAD_TRACKS = 'jobScanDeadTracks'
|
||||||
|
|
||||||
app_cocoa.JOBID2TITLE.update({
|
JOBID2TITLE.update({
|
||||||
JOB_REMOVE_DEAD_TRACKS: "Removing dead tracks from your iTunes Library",
|
JOB_REMOVE_DEAD_TRACKS: "Removing dead tracks from your iTunes Library",
|
||||||
JOB_SCAN_DEAD_TRACKS: "Scanning the iTunes Library",
|
JOB_SCAN_DEAD_TRACKS: "Scanning the iTunes Library",
|
||||||
})
|
})
|
||||||
|
|
||||||
class DupeGuruME(app_cocoa.DupeGuru):
|
class DupeGuruME(DupeGuruBase):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
app_cocoa.DupeGuru.__init__(self, data_me, 'dupeGuru Music Edition', appid=1)
|
DupeGuruBase.__init__(self, data, 'dupeGuru Music Edition', appid=1)
|
||||||
self.scanner = scanner.ScannerME()
|
self.scanner = scanner.ScannerME()
|
||||||
self.directories.dirclass = hsfs.phys.music.Directory
|
self.directories.fileclasses = [fs.Mp3File, fs.Mp4File, fs.WmaFile, fs.OggFile, fs.FlacFile, fs.AiffFile]
|
||||||
self.dead_tracks = []
|
self.dead_tracks = []
|
||||||
|
|
||||||
def remove_dead_tracks(self):
|
def remove_dead_tracks(self):
|
||||||
@@ -8,7 +8,7 @@
|
|||||||
# http://www.hardcoded.net/licenses/hs_license
|
# http://www.hardcoded.net/licenses/hs_license
|
||||||
|
|
||||||
from hsutil.str import format_time, FT_MINUTES, format_size
|
from hsutil.str import format_time, FT_MINUTES, format_size
|
||||||
from .data import (format_path, format_timestamp, format_words, format_perc,
|
from dupeguru.data import (format_path, format_timestamp, format_words, format_perc,
|
||||||
format_dupe_count, cmp_value)
|
format_dupe_count, cmp_value)
|
||||||
|
|
||||||
COLUMNS = [
|
COLUMNS = [
|
||||||
@@ -76,7 +76,7 @@ def GetDisplayInfo(dupe, group, delta):
|
|||||||
str(dupe.track),
|
str(dupe.track),
|
||||||
dupe.comment,
|
dupe.comment,
|
||||||
format_perc(percentage),
|
format_perc(percentage),
|
||||||
format_words(dupe.words),
|
format_words(dupe.words) if hasattr(dupe, 'words') else '',
|
||||||
format_dupe_count(dupe_count)
|
format_dupe_count(dupe_count)
|
||||||
]
|
]
|
||||||
|
|
||||||
183
me/py/fs.py
Normal file
183
me/py/fs.py
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Created By: Virgil Dupras
|
||||||
|
# Created On: 2009-10-23
|
||||||
|
# $Id$
|
||||||
|
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
|
||||||
|
#
|
||||||
|
# This software is licensed under the "HS" License as described in the "LICENSE" file,
|
||||||
|
# which should be included with this package. The terms are also available at
|
||||||
|
# http://www.hardcoded.net/licenses/hs_license
|
||||||
|
|
||||||
|
from hsmedia import mpeg, wma, mp4, ogg, flac, aiff
|
||||||
|
from hsutil.str import get_file_ext
|
||||||
|
from dupeguru import fs
|
||||||
|
|
||||||
|
TAG_FIELDS = ['audiosize', 'duration', 'bitrate', 'samplerate', 'title', 'artist',
|
||||||
|
'album', 'genre', 'year', 'track', 'comment']
|
||||||
|
|
||||||
|
class MusicFile(fs.File):
|
||||||
|
INITIAL_INFO = fs.File.INITIAL_INFO.copy()
|
||||||
|
INITIAL_INFO.update({
|
||||||
|
'audiosize': 0,
|
||||||
|
'bitrate' : 0,
|
||||||
|
'duration' : 0,
|
||||||
|
'samplerate':0,
|
||||||
|
'artist' : '',
|
||||||
|
'album' : '',
|
||||||
|
'title' : '',
|
||||||
|
'genre' : '',
|
||||||
|
'comment' : '',
|
||||||
|
'year' : '',
|
||||||
|
'track' : 0,
|
||||||
|
})
|
||||||
|
HANDLED_EXTS = set()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def can_handle(cls, path):
|
||||||
|
if not fs.File.can_handle(path):
|
||||||
|
return False
|
||||||
|
return get_file_ext(path[-1]) in cls.HANDLED_EXTS
|
||||||
|
|
||||||
|
|
||||||
|
class Mp3File(MusicFile):
|
||||||
|
HANDLED_EXTS = set(['mp3'])
|
||||||
|
def _read_info(self, field):
|
||||||
|
if field == 'md5partial':
|
||||||
|
fileinfo = mpeg.Mpeg(unicode(self.path))
|
||||||
|
self._md5partial_offset = fileinfo.audio_offset
|
||||||
|
self._md5partial_size = fileinfo.audio_size
|
||||||
|
MusicFile._read_info(self, field)
|
||||||
|
if field in TAG_FIELDS:
|
||||||
|
fileinfo = mpeg.Mpeg(unicode(self.path))
|
||||||
|
self.audiosize = fileinfo.audio_size
|
||||||
|
self.bitrate = fileinfo.bitrate
|
||||||
|
self.duration = fileinfo.duration
|
||||||
|
self.samplerate = fileinfo.sample_rate
|
||||||
|
i1 = fileinfo.id3v1
|
||||||
|
# id3v1, even when non-existant, gives empty values. not id3v2. if id3v2 don't exist,
|
||||||
|
# just replace it with id3v1
|
||||||
|
i2 = fileinfo.id3v2
|
||||||
|
if not i2.exists:
|
||||||
|
i2 = i1
|
||||||
|
self.artist = i2.artist or i1.artist
|
||||||
|
self.album = i2.album or i1.album
|
||||||
|
self.title = i2.title or i1.title
|
||||||
|
self.genre = i2.genre or i1.genre
|
||||||
|
self.comment = i2.comment or i1.comment
|
||||||
|
self.year = i2.year or i1.year
|
||||||
|
self.track = i2.track or i1.track
|
||||||
|
|
||||||
|
class WmaFile(MusicFile):
|
||||||
|
HANDLED_EXTS = set(['wma'])
|
||||||
|
def _read_info(self, field):
|
||||||
|
if field == 'md5partial':
|
||||||
|
dec = wma.WMADecoder(unicode(self.path))
|
||||||
|
self._md5partial_offset = dec.audio_offset
|
||||||
|
self._md5partial_size = dec.audio_size
|
||||||
|
MusicFile._read_info(self, field)
|
||||||
|
if field in TAG_FIELDS:
|
||||||
|
dec = wma.WMADecoder(unicode(self.path))
|
||||||
|
self.audiosize = dec.audio_size
|
||||||
|
self.bitrate = dec.bitrate
|
||||||
|
self.duration = dec.duration
|
||||||
|
self.samplerate = dec.sample_rate
|
||||||
|
self.artist = dec.artist
|
||||||
|
self.album = dec.album
|
||||||
|
self.title = dec.title
|
||||||
|
self.genre = dec.genre
|
||||||
|
self.comment = dec.comment
|
||||||
|
self.year = dec.year
|
||||||
|
self.track = dec.track
|
||||||
|
|
||||||
|
class Mp4File(MusicFile):
|
||||||
|
HANDLED_EXTS = set(['m4a', 'm4p'])
|
||||||
|
def _read_info(self, field):
|
||||||
|
if field == 'md5partial':
|
||||||
|
dec = mp4.File(unicode(self.path))
|
||||||
|
self._md5partial_offset = dec.audio_offset
|
||||||
|
self._md5partial_size = dec.audio_size
|
||||||
|
dec.close()
|
||||||
|
MusicFile._read_info(self, field)
|
||||||
|
if field in TAG_FIELDS:
|
||||||
|
dec = mp4.File(unicode(self.path))
|
||||||
|
self.audiosize = dec.audio_size
|
||||||
|
self.bitrate = dec.bitrate
|
||||||
|
self.duration = dec.duration
|
||||||
|
self.samplerate = dec.sample_rate
|
||||||
|
self.artist = dec.artist
|
||||||
|
self.album = dec.album
|
||||||
|
self.title = dec.title
|
||||||
|
self.genre = dec.genre
|
||||||
|
self.comment = dec.comment
|
||||||
|
self.year = dec.year
|
||||||
|
self.track = dec.track
|
||||||
|
dec.close()
|
||||||
|
|
||||||
|
class OggFile(MusicFile):
|
||||||
|
HANDLED_EXTS = set(['ogg'])
|
||||||
|
def _read_info(self, field):
|
||||||
|
if field == 'md5partial':
|
||||||
|
dec = ogg.Vorbis(unicode(self.path))
|
||||||
|
self._md5partial_offset = dec.audio_offset
|
||||||
|
self._md5partial_size = dec.audio_size
|
||||||
|
MusicFile._read_info(self, field)
|
||||||
|
if field in TAG_FIELDS:
|
||||||
|
dec = ogg.Vorbis(unicode(self.path))
|
||||||
|
self.audiosize = dec.audio_size
|
||||||
|
self.bitrate = dec.bitrate
|
||||||
|
self.duration = dec.duration
|
||||||
|
self.samplerate = dec.sample_rate
|
||||||
|
self.artist = dec.artist
|
||||||
|
self.album = dec.album
|
||||||
|
self.title = dec.title
|
||||||
|
self.genre = dec.genre
|
||||||
|
self.comment = dec.comment
|
||||||
|
self.year = dec.year
|
||||||
|
self.track = dec.track
|
||||||
|
|
||||||
|
class FlacFile(MusicFile):
|
||||||
|
HANDLED_EXTS = set(['flac'])
|
||||||
|
def _read_info(self, field):
|
||||||
|
if field == 'md5partial':
|
||||||
|
dec = flac.FLAC(unicode(self.path))
|
||||||
|
self._md5partial_offset = dec.audio_offset
|
||||||
|
self._md5partial_size = dec.audio_size
|
||||||
|
MusicFile._read_info(self, field)
|
||||||
|
if field in TAG_FIELDS:
|
||||||
|
dec = flac.FLAC(unicode(self.path))
|
||||||
|
self.audiosize = dec.audio_size
|
||||||
|
self.bitrate = dec.bitrate
|
||||||
|
self.duration = dec.duration
|
||||||
|
self.samplerate = dec.sample_rate
|
||||||
|
self.artist = dec.artist
|
||||||
|
self.album = dec.album
|
||||||
|
self.title = dec.title
|
||||||
|
self.genre = dec.genre
|
||||||
|
self.comment = dec.comment
|
||||||
|
self.year = dec.year
|
||||||
|
self.track = dec.track
|
||||||
|
|
||||||
|
class AiffFile(MusicFile):
|
||||||
|
HANDLED_EXTS = set(['aif', 'aiff', 'aifc'])
|
||||||
|
def _read_info(self, field):
|
||||||
|
if field == 'md5partial':
|
||||||
|
dec = aiff.File(unicode(self.path))
|
||||||
|
self._md5partial_offset = dec.audio_offset
|
||||||
|
self._md5partial_size = dec.audio_size
|
||||||
|
MusicFile._read_info(self, field)
|
||||||
|
if field in TAG_FIELDS:
|
||||||
|
dec = aiff.File(unicode(self.path))
|
||||||
|
self.audiosize = dec.audio_size
|
||||||
|
self.bitrate = dec.bitrate
|
||||||
|
self.duration = dec.duration
|
||||||
|
self.samplerate = dec.sample_rate
|
||||||
|
tag = dec.tag
|
||||||
|
if tag is not None:
|
||||||
|
self.artist = tag.artist
|
||||||
|
self.album = tag.album
|
||||||
|
self.title = tag.title
|
||||||
|
self.genre = tag.genre
|
||||||
|
self.comment = tag.comment
|
||||||
|
self.year = tag.year
|
||||||
|
self.track = tag.track
|
||||||
|
|
||||||
16
me/py/scanner.py
Normal file
16
me/py/scanner.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
# Created By: Virgil Dupras
|
||||||
|
# Created On: 2006/03/03
|
||||||
|
# $Id$
|
||||||
|
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
|
||||||
|
#
|
||||||
|
# This software is licensed under the "HS" License as described in the "LICENSE" file,
|
||||||
|
# which should be included with this package. The terms are also available at
|
||||||
|
# http://www.hardcoded.net/licenses/hs_license
|
||||||
|
|
||||||
|
from dupeguru.scanner import Scanner as ScannerBase
|
||||||
|
|
||||||
|
class ScannerME(ScannerBase):
|
||||||
|
@staticmethod
|
||||||
|
def _key_func(dupe):
|
||||||
|
return (not dupe.is_ref, -dupe.bitrate, -dupe.size)
|
||||||
|
|
||||||
0
me/py/tests/__init__.py
Normal file
0
me/py/tests/__init__.py
Normal file
33
me/py/tests/scanner_test.py
Normal file
33
me/py/tests/scanner_test.py
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Created By: Virgil Dupras
|
||||||
|
# Created On: 2009-10-23
|
||||||
|
# $Id$
|
||||||
|
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
|
||||||
|
#
|
||||||
|
# This software is licensed under the "HS" License as described in the "LICENSE" file,
|
||||||
|
# which should be included with this package. The terms are also available at
|
||||||
|
# http://www.hardcoded.net/licenses/hs_license
|
||||||
|
|
||||||
|
from hsutil.path import Path
|
||||||
|
|
||||||
|
from dupeguru.engine import getwords
|
||||||
|
from ..scanner import *
|
||||||
|
|
||||||
|
class NamedObject(object):
|
||||||
|
def __init__(self, name="foobar", size=1):
|
||||||
|
self.name = name
|
||||||
|
self.size = size
|
||||||
|
self.path = Path('')
|
||||||
|
self.words = getwords(name)
|
||||||
|
|
||||||
|
|
||||||
|
no = NamedObject
|
||||||
|
|
||||||
|
def test_priorize_me():
|
||||||
|
# in ScannerME, bitrate goes first (right after is_ref) in priorization
|
||||||
|
s = ScannerME()
|
||||||
|
o1, o2 = no('foo'), no('foo')
|
||||||
|
o1.bitrate = 1
|
||||||
|
o2.bitrate = 2
|
||||||
|
[group] = s.GetDupeGroups([o1, o2])
|
||||||
|
assert group.ref is o2
|
||||||
@@ -7,9 +7,7 @@
|
|||||||
# which should be included with this package. The terms are also available at
|
# which should be included with this package. The terms are also available at
|
||||||
# http://www.hardcoded.net/licenses/hs_license
|
# http://www.hardcoded.net/licenses/hs_license
|
||||||
|
|
||||||
import hsfs.phys.music
|
from dupeguru_me import data, scanner, fs
|
||||||
|
|
||||||
from dupeguru import data_me, scanner
|
|
||||||
|
|
||||||
from base.app import DupeGuru as DupeGuruBase
|
from base.app import DupeGuru as DupeGuruBase
|
||||||
from details_dialog import DetailsDialog
|
from details_dialog import DetailsDialog
|
||||||
@@ -23,11 +21,11 @@ class DupeGuru(DupeGuruBase):
|
|||||||
DELTA_COLUMNS = frozenset([2, 3, 4, 5, 7, 8])
|
DELTA_COLUMNS = frozenset([2, 3, 4, 5, 7, 8])
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
DupeGuruBase.__init__(self, data_me, appid=1)
|
DupeGuruBase.__init__(self, data, appid=1)
|
||||||
|
|
||||||
def _setup(self):
|
def _setup(self):
|
||||||
self.scanner = scanner.ScannerME()
|
self.scanner = scanner.ScannerME()
|
||||||
self.directories.dirclass = hsfs.phys.music.Directory
|
self.directories.fileclasses = [fs.Mp3File, fs.Mp4File, fs.WmaFile, fs.OggFile, fs.FlacFile, fs.AiffFile]
|
||||||
DupeGuruBase._setup(self)
|
DupeGuruBase._setup(self)
|
||||||
|
|
||||||
def _update_options(self):
|
def _update_options(self):
|
||||||
|
|||||||
@@ -12,7 +12,6 @@ from dupeguru_pe import app_cocoa as app_pe_cocoa
|
|||||||
# Fix py2app imports which chokes on relative imports
|
# Fix py2app imports which chokes on relative imports
|
||||||
from dupeguru import app, app_cocoa, data, directories, engine, export, ignore, results, scanner
|
from dupeguru import app, app_cocoa, data, directories, engine, export, ignore, results, scanner
|
||||||
from dupeguru_pe import block, cache, matchbase, data
|
from dupeguru_pe import block, cache, matchbase, data
|
||||||
from hsfs import auto, stats, tree
|
|
||||||
from hsutil import conflict
|
from hsutil import conflict
|
||||||
|
|
||||||
class PyApp(NSObject):
|
class PyApp(NSObject):
|
||||||
@@ -39,7 +38,7 @@ class PyDupeGuru(PyApp):
|
|||||||
self.app.scanner.ignore_list.Clear()
|
self.app.scanner.ignore_list.Clear()
|
||||||
|
|
||||||
def clearPictureCache(self):
|
def clearPictureCache(self):
|
||||||
self.app.scanner.match_factory.cached_blocks.clear()
|
self.app.scanner.cached_blocks.clear()
|
||||||
|
|
||||||
def doScan(self):
|
def doScan(self):
|
||||||
return self.app.start_scanning()
|
return self.app.start_scanning()
|
||||||
@@ -172,10 +171,10 @@ class PyDupeGuru(PyApp):
|
|||||||
|
|
||||||
#---Properties
|
#---Properties
|
||||||
def setMatchScaled_(self,match_scaled):
|
def setMatchScaled_(self,match_scaled):
|
||||||
self.app.scanner.match_factory.match_scaled = match_scaled
|
self.app.scanner.match_scaled = match_scaled
|
||||||
|
|
||||||
def setMinMatchPercentage_(self,percentage):
|
def setMinMatchPercentage_(self,percentage):
|
||||||
self.app.scanner.match_factory.threshold = int(percentage)
|
self.app.scanner.threshold = int(percentage)
|
||||||
|
|
||||||
def setMixFileKind_(self,mix_file_kind):
|
def setMixFileKind_(self,mix_file_kind):
|
||||||
self.app.scanner.mix_file_kind = mix_file_kind
|
self.app.scanner.mix_file_kind = mix_file_kind
|
||||||
|
|||||||
@@ -1,3 +1,7 @@
|
|||||||
|
- date: 2009-10-24
|
||||||
|
version: 1.7.8
|
||||||
|
description: |
|
||||||
|
* Fixed a bug sometimes causing some duplicates to be ignored during the scans. (#73)
|
||||||
- date: 2009-10-14
|
- date: 2009-10-14
|
||||||
version: 1.7.7
|
version: 1.7.7
|
||||||
description: |
|
description: |
|
||||||
|
|||||||
@@ -7,41 +7,43 @@
|
|||||||
# which should be included with this package. The terms are also available at
|
# which should be included with this package. The terms are also available at
|
||||||
# http://www.hardcoded.net/licenses/hs_license
|
# http://www.hardcoded.net/licenses/hs_license
|
||||||
|
|
||||||
import os
|
|
||||||
import os.path as op
|
import os.path as op
|
||||||
import logging
|
import logging
|
||||||
import plistlib
|
import plistlib
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import objc
|
|
||||||
from Foundation import *
|
from Foundation import *
|
||||||
from AppKit import *
|
from AppKit import *
|
||||||
from appscript import app, k
|
from appscript import app, k
|
||||||
|
|
||||||
from hsutil import job, io
|
from hsutil import io
|
||||||
import hsfs as fs
|
|
||||||
from hsfs import phys, InvalidPath
|
|
||||||
from hsutil import files
|
|
||||||
from hsutil.str import get_file_ext
|
from hsutil.str import get_file_ext
|
||||||
from hsutil.path import Path
|
from hsutil.path import Path
|
||||||
from hsutil.cocoa import as_fetch
|
from hsutil.cocoa import as_fetch
|
||||||
|
|
||||||
|
from dupeguru import fs
|
||||||
from dupeguru import app_cocoa, directories
|
from dupeguru import app_cocoa, directories
|
||||||
from . import data, matchbase
|
from . import data
|
||||||
from .cache import string_to_colors, Cache
|
from .cache import string_to_colors, Cache
|
||||||
|
from .scanner import ScannerPE
|
||||||
|
|
||||||
mainBundle = NSBundle.mainBundle()
|
mainBundle = NSBundle.mainBundle()
|
||||||
PictureBlocks = mainBundle.classNamed_('PictureBlocks')
|
PictureBlocks = mainBundle.classNamed_('PictureBlocks')
|
||||||
assert PictureBlocks is not None
|
assert PictureBlocks is not None
|
||||||
|
|
||||||
class Photo(phys.File):
|
class Photo(fs.File):
|
||||||
INITIAL_INFO = phys.File.INITIAL_INFO.copy()
|
INITIAL_INFO = fs.File.INITIAL_INFO.copy()
|
||||||
INITIAL_INFO.update({
|
INITIAL_INFO.update({
|
||||||
'dimensions': (0,0),
|
'dimensions': (0,0),
|
||||||
})
|
})
|
||||||
|
HANDLED_EXTS = set(['png', 'jpg', 'jpeg', 'gif', 'psd', 'bmp', 'tiff', 'tif', 'nef', 'cr2'])
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def can_handle(cls, path):
|
||||||
|
return fs.File.can_handle(path) and get_file_ext(path[-1]) in cls.HANDLED_EXTS
|
||||||
|
|
||||||
def _read_info(self, field):
|
def _read_info(self, field):
|
||||||
super(Photo, self)._read_info(field)
|
fs.File._read_info(self, field)
|
||||||
if field == 'dimensions':
|
if field == 'dimensions':
|
||||||
size = PictureBlocks.getImageSize_(unicode(self.path))
|
size = PictureBlocks.getImageSize_(unicode(self.path))
|
||||||
self.dimensions = (size.width, size.height)
|
self.dimensions = (size.width, size.height)
|
||||||
@@ -49,7 +51,7 @@ class Photo(phys.File):
|
|||||||
def get_blocks(self, block_count_per_side):
|
def get_blocks(self, block_count_per_side):
|
||||||
try:
|
try:
|
||||||
blocks = PictureBlocks.getBlocksFromImagePath_blockCount_(unicode(self.path), block_count_per_side)
|
blocks = PictureBlocks.getBlocksFromImagePath_blockCount_(unicode(self.path), block_count_per_side)
|
||||||
except Exception, e:
|
except Exception as e:
|
||||||
raise IOError('The reading of "%s" failed with "%s"' % (unicode(self.path), unicode(e)))
|
raise IOError('The reading of "%s" failed with "%s"' % (unicode(self.path), unicode(e)))
|
||||||
if not blocks:
|
if not blocks:
|
||||||
raise IOError('The picture %s could not be read' % unicode(self.path))
|
raise IOError('The picture %s could not be read' % unicode(self.path))
|
||||||
@@ -57,89 +59,79 @@ class Photo(phys.File):
|
|||||||
|
|
||||||
|
|
||||||
class IPhoto(Photo):
|
class IPhoto(Photo):
|
||||||
def __init__(self, parent, whole_path):
|
|
||||||
super(IPhoto, self).__init__(parent, whole_path[-1])
|
|
||||||
self.whole_path = whole_path
|
|
||||||
|
|
||||||
def _build_path(self):
|
|
||||||
return self.whole_path
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def display_path(self):
|
def display_path(self):
|
||||||
return super(IPhoto, self)._build_path()
|
return Path(('iPhoto Library', self.name))
|
||||||
|
|
||||||
|
def get_iphoto_database_path():
|
||||||
|
ud = NSUserDefaults.standardUserDefaults()
|
||||||
|
prefs = ud.persistentDomainForName_('com.apple.iApps')
|
||||||
|
if 'iPhotoRecentDatabases' not in prefs:
|
||||||
|
raise directories.InvalidPathError()
|
||||||
|
plisturl = NSURL.URLWithString_(prefs['iPhotoRecentDatabases'][0])
|
||||||
|
return Path(plisturl.path())
|
||||||
|
|
||||||
class Directory(phys.Directory):
|
def get_iphoto_pictures(plistpath):
|
||||||
cls_file_class = Photo
|
if not io.exists(plistpath):
|
||||||
cls_supported_exts = ('png', 'jpg', 'jpeg', 'gif', 'psd', 'bmp', 'tiff', 'nef', 'cr2')
|
raise InvalidPath(self)
|
||||||
|
s = io.open(plistpath).read()
|
||||||
def _fetch_subitems(self):
|
# There was a case where a guy had 0x10 chars in his plist, causing expat errors on loading
|
||||||
subdirs, subfiles = super(Directory,self)._fetch_subitems()
|
s = s.replace('\x10', '')
|
||||||
return subdirs, [name for name in subfiles if get_file_ext(name) in self.cls_supported_exts]
|
# It seems that iPhoto sometimes doesn't properly escape & chars. The regexp below is to find
|
||||||
|
# any & char that is not a &-based entity (&, ", etc.). based on TextMate's XML
|
||||||
|
# bundle's regexp
|
||||||
class IPhotoLibrary(fs.Directory):
|
s, count = re.subn(r'&(?![a-zA-Z0-9_-]+|#[0-9]+|#x[0-9a-fA-F]+;)', '', s)
|
||||||
def __init__(self, plistpath):
|
if count:
|
||||||
self.plistpath = plistpath
|
logging.warning("%d invalid XML entities replacement made", count)
|
||||||
self.refpath = plistpath[:-1]
|
plist = plistlib.readPlistFromString(s)
|
||||||
# the AlbumData.xml file lives right in the library path
|
result = []
|
||||||
super(IPhotoLibrary, self).__init__(None, 'iPhoto Library')
|
for photo_data in plist['Master Image List'].values():
|
||||||
if not io.exists(plistpath):
|
|
||||||
raise InvalidPath(self)
|
|
||||||
|
|
||||||
def _update_photo(self, photo_data):
|
|
||||||
if photo_data['MediaType'] != 'Image':
|
if photo_data['MediaType'] != 'Image':
|
||||||
return
|
continue
|
||||||
photo_path = Path(photo_data['ImagePath'])
|
photo_path = Path(photo_data['ImagePath'])
|
||||||
subpath = photo_path[len(self.refpath):-1]
|
photo = IPhoto(photo_path)
|
||||||
subdir = self
|
result.append(photo)
|
||||||
for element in subpath:
|
return result
|
||||||
try:
|
|
||||||
subdir = subdir[element]
|
class Directories(directories.Directories):
|
||||||
except KeyError:
|
def __init__(self):
|
||||||
subdir = fs.Directory(subdir, element)
|
directories.Directories.__init__(self, fileclasses=[Photo])
|
||||||
try:
|
self.iphoto_libpath = get_iphoto_database_path()
|
||||||
IPhoto(subdir, photo_path)
|
self.set_state(self.iphoto_libpath[:-1], directories.STATE_EXCLUDED)
|
||||||
except fs.AlreadyExistsError:
|
|
||||||
# it's possible for 2 entries in the plist to point to the same path. Ignore one of them.
|
|
||||||
pass
|
|
||||||
|
|
||||||
def update(self):
|
def _get_files(self, from_path):
|
||||||
self.clear()
|
if from_path == Path('iPhoto Library'):
|
||||||
s = open(unicode(self.plistpath)).read()
|
is_ref = self.get_state(from_path) == directories.STATE_REFERENCE
|
||||||
# There was a case where a guy had 0x10 chars in his plist, causing expat errors on loading
|
photos = get_iphoto_pictures(self.iphoto_libpath)
|
||||||
s = s.replace('\x10', '')
|
for photo in photos:
|
||||||
# It seems that iPhoto sometimes doesn't properly escape & chars. The regexp below is to find
|
photo.is_ref = is_ref
|
||||||
# any & char that is not a &-based entity (&, ", etc.). based on TextMate's XML
|
return photos
|
||||||
# bundle's regexp
|
else:
|
||||||
s, count = re.subn(r'&(?![a-zA-Z0-9_-]+|#[0-9]+|#x[0-9a-fA-F]+;)', '', s)
|
return directories.Directories._get_files(self, from_path)
|
||||||
if count:
|
|
||||||
logging.warning("%d invalid XML entities replacement made", count)
|
|
||||||
plist = plistlib.readPlistFromString(s)
|
|
||||||
for photo_data in plist['Master Image List'].values():
|
|
||||||
self._update_photo(photo_data)
|
|
||||||
|
|
||||||
def force_update(self): # Don't update
|
@staticmethod
|
||||||
pass
|
def get_subfolders(path):
|
||||||
|
if path == Path('iPhoto Library'):
|
||||||
|
return []
|
||||||
|
else:
|
||||||
|
return directories.Directories.get_subfolders(path)
|
||||||
|
|
||||||
|
def add_path(self, path):
|
||||||
|
if path == Path('iPhoto Library'):
|
||||||
|
if path in self:
|
||||||
|
raise AlreadyThereError()
|
||||||
|
self._dirs.append(path)
|
||||||
|
else:
|
||||||
|
directories.Directories.add_path(self, path)
|
||||||
|
|
||||||
|
|
||||||
class DupeGuruPE(app_cocoa.DupeGuru):
|
class DupeGuruPE(app_cocoa.DupeGuru):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
app_cocoa.DupeGuru.__init__(self, data, 'dupeGuru Picture Edition', appid=5)
|
app_cocoa.DupeGuru.__init__(self, data, 'dupeGuru Picture Edition', appid=5)
|
||||||
self.scanner.match_factory = matchbase.AsyncMatchFactory()
|
self.scanner = ScannerPE()
|
||||||
self.directories.dirclass = Directory
|
self.directories = Directories()
|
||||||
self.directories.special_dirclasses[Path('iPhoto Library')] = lambda _, __: self._create_iphoto_library()
|
|
||||||
p = op.join(self.appdata, 'cached_pictures.db')
|
p = op.join(self.appdata, 'cached_pictures.db')
|
||||||
self.scanner.match_factory.cached_blocks = Cache(p)
|
self.scanner.cached_blocks = Cache(p)
|
||||||
|
|
||||||
def _create_iphoto_library(self):
|
|
||||||
ud = NSUserDefaults.standardUserDefaults()
|
|
||||||
prefs = ud.persistentDomainForName_('com.apple.iApps')
|
|
||||||
if 'iPhotoRecentDatabases' not in prefs:
|
|
||||||
raise directories.InvalidPathError
|
|
||||||
plisturl = NSURL.URLWithString_(prefs['iPhotoRecentDatabases'][0])
|
|
||||||
plistpath = Path(plisturl.path())
|
|
||||||
return IPhotoLibrary(plistpath)
|
|
||||||
|
|
||||||
def _do_delete(self, j):
|
def _do_delete(self, j):
|
||||||
def op(dupe):
|
def op(dupe):
|
||||||
@@ -174,40 +166,19 @@ class DupeGuruPE(app_cocoa.DupeGuru):
|
|||||||
|
|
||||||
def _do_load(self, j):
|
def _do_load(self, j):
|
||||||
self.directories.load_from_file(op.join(self.appdata, 'last_directories.xml'))
|
self.directories.load_from_file(op.join(self.appdata, 'last_directories.xml'))
|
||||||
for d in self.directories:
|
|
||||||
if isinstance(d, IPhotoLibrary):
|
|
||||||
d.update()
|
|
||||||
self.results.load_from_xml(op.join(self.appdata, 'last_results.xml'), self._get_file, j)
|
self.results.load_from_xml(op.join(self.appdata, 'last_results.xml'), self._get_file, j)
|
||||||
|
|
||||||
def _get_file(self, str_path):
|
def _get_file(self, str_path):
|
||||||
p = Path(str_path)
|
p = Path(str_path)
|
||||||
for d in self.directories:
|
if p in self.directories.iphoto_libpath[:-1]:
|
||||||
result = None
|
return IPhoto(p)
|
||||||
if p in d.path:
|
return app_cocoa.DupeGuru._get_file(self, str_path)
|
||||||
result = d.find_path(p[d.path:])
|
|
||||||
if isinstance(d, IPhotoLibrary) and p in d.refpath:
|
|
||||||
result = d.find_path(p[d.refpath:])
|
|
||||||
if result is not None:
|
|
||||||
return result
|
|
||||||
|
|
||||||
def add_directory(self, d):
|
|
||||||
result = app_cocoa.DupeGuru.add_directory(self, d)
|
|
||||||
if (result == 0) and (d == 'iPhoto Library'):
|
|
||||||
[iphotolib] = [dir for dir in self.directories if dir.path == d]
|
|
||||||
iphotolib.update()
|
|
||||||
return result
|
|
||||||
|
|
||||||
def copy_or_move(self, dupe, copy, destination, dest_type):
|
def copy_or_move(self, dupe, copy, destination, dest_type):
|
||||||
if isinstance(dupe, IPhoto):
|
if isinstance(dupe, IPhoto):
|
||||||
copy = True
|
copy = True
|
||||||
return app_cocoa.DupeGuru.copy_or_move(self, dupe, copy, destination, dest_type)
|
return app_cocoa.DupeGuru.copy_or_move(self, dupe, copy, destination, dest_type)
|
||||||
|
|
||||||
def start_scanning(self):
|
|
||||||
for directory in self.directories:
|
|
||||||
if isinstance(directory, IPhotoLibrary):
|
|
||||||
self.directories.set_state(directory.refpath, directories.STATE_EXCLUDED)
|
|
||||||
return app_cocoa.DupeGuru.start_scanning(self)
|
|
||||||
|
|
||||||
def selected_dupe_path(self):
|
def selected_dupe_path(self):
|
||||||
if not self.selected_dupes:
|
if not self.selected_dupes:
|
||||||
return None
|
return None
|
||||||
|
|||||||
@@ -20,58 +20,42 @@ from .block import avgdiff, DifferentBlockCountError, NoBlocksError
|
|||||||
from .cache import Cache
|
from .cache import Cache
|
||||||
|
|
||||||
MIN_ITERATIONS = 3
|
MIN_ITERATIONS = 3
|
||||||
|
BLOCK_COUNT_PER_SIDE = 15
|
||||||
|
|
||||||
# Enough so that we're sure that the main thread will not wait after a result.get() call
|
# Enough so that we're sure that the main thread will not wait after a result.get() call
|
||||||
# cpucount*2 should be enough to be sure that the spawned process will not wait after the results
|
# cpucount*2 should be enough to be sure that the spawned process will not wait after the results
|
||||||
# collection made by the main process.
|
# collection made by the main process.
|
||||||
RESULTS_QUEUE_LIMIT = multiprocessing.cpu_count() * 2
|
RESULTS_QUEUE_LIMIT = multiprocessing.cpu_count() * 2
|
||||||
|
|
||||||
def get_match(first,second,percentage):
|
def prepare_pictures(pictures, cached_blocks, j=job.nulljob):
|
||||||
|
# The MemoryError handlers in there use logging without first caring about whether or not
|
||||||
|
# there is enough memory left to carry on the operation because it is assumed that the
|
||||||
|
# MemoryError happens when trying to read an image file, which is freed from memory by the
|
||||||
|
# time that MemoryError is raised.
|
||||||
|
prepared = [] # only pictures for which there was no error getting blocks
|
||||||
|
try:
|
||||||
|
for picture in j.iter_with_progress(pictures, 'Analyzed %d/%d pictures'):
|
||||||
|
picture.dimensions
|
||||||
|
picture.unicode_path = unicode(picture.path)
|
||||||
|
try:
|
||||||
|
if picture.unicode_path not in cached_blocks:
|
||||||
|
blocks = picture.get_blocks(BLOCK_COUNT_PER_SIDE)
|
||||||
|
cached_blocks[picture.unicode_path] = blocks
|
||||||
|
prepared.append(picture)
|
||||||
|
except IOError as e:
|
||||||
|
logging.warning(unicode(e))
|
||||||
|
except MemoryError:
|
||||||
|
logging.warning(u'Ran out of memory while reading %s of size %d' % (picture.unicode_path, picture.size))
|
||||||
|
if picture.size < 10 * 1024 * 1024: # We're really running out of memory
|
||||||
|
raise
|
||||||
|
except MemoryError:
|
||||||
|
logging.warning('Ran out of memory while preparing pictures')
|
||||||
|
return prepared
|
||||||
|
|
||||||
|
def get_match(first, second, percentage):
|
||||||
if percentage < 0:
|
if percentage < 0:
|
||||||
percentage = 0
|
percentage = 0
|
||||||
return Match(first,second,percentage)
|
return Match(first, second, percentage)
|
||||||
|
|
||||||
class MatchFactory(object):
|
|
||||||
cached_blocks = None
|
|
||||||
block_count_per_side = 15
|
|
||||||
threshold = 75
|
|
||||||
match_scaled = False
|
|
||||||
|
|
||||||
def _do_getmatches(self, files, j):
|
|
||||||
raise NotImplementedError()
|
|
||||||
|
|
||||||
def getmatches(self, files, j=job.nulljob):
|
|
||||||
# The MemoryError handlers in there use logging without first caring about whether or not
|
|
||||||
# there is enough memory left to carry on the operation because it is assumed that the
|
|
||||||
# MemoryError happens when trying to read an image file, which is freed from memory by the
|
|
||||||
# time that MemoryError is raised.
|
|
||||||
j = j.start_subjob([3, 7])
|
|
||||||
logging.info('Preparing %d files' % len(files))
|
|
||||||
prepared = self.prepare_files(files, j)
|
|
||||||
logging.info('Finished preparing %d files' % len(prepared))
|
|
||||||
return self._do_getmatches(prepared, j)
|
|
||||||
|
|
||||||
def prepare_files(self, files, j=job.nulljob):
|
|
||||||
prepared = [] # only files for which there was no error getting blocks
|
|
||||||
try:
|
|
||||||
for picture in j.iter_with_progress(files, 'Analyzed %d/%d pictures'):
|
|
||||||
picture.dimensions
|
|
||||||
picture.unicode_path = unicode(picture.path)
|
|
||||||
try:
|
|
||||||
if picture.unicode_path not in self.cached_blocks:
|
|
||||||
blocks = picture.get_blocks(self.block_count_per_side)
|
|
||||||
self.cached_blocks[picture.unicode_path] = blocks
|
|
||||||
prepared.append(picture)
|
|
||||||
except IOError as e:
|
|
||||||
logging.warning(unicode(e))
|
|
||||||
except MemoryError:
|
|
||||||
logging.warning(u'Ran out of memory while reading %s of size %d' % (picture.unicode_path, picture.size))
|
|
||||||
if picture.size < 10 * 1024 * 1024: # We're really running out of memory
|
|
||||||
raise
|
|
||||||
except MemoryError:
|
|
||||||
logging.warning('Ran out of memory while preparing files')
|
|
||||||
return prepared
|
|
||||||
|
|
||||||
|
|
||||||
def async_compare(ref_id, other_ids, dbname, threshold):
|
def async_compare(ref_id, other_ids, dbname, threshold):
|
||||||
cache = Cache(dbname, threaded=False)
|
cache = Cache(dbname, threaded=False)
|
||||||
@@ -89,53 +73,55 @@ def async_compare(ref_id, other_ids, dbname, threshold):
|
|||||||
results.append((ref_id, other_id, percentage))
|
results.append((ref_id, other_id, percentage))
|
||||||
cache.con.close()
|
cache.con.close()
|
||||||
return results
|
return results
|
||||||
|
|
||||||
class AsyncMatchFactory(MatchFactory):
|
|
||||||
def _do_getmatches(self, pictures, j):
|
|
||||||
def empty_out_queue(queue, into):
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
into.append(queue.get(block=False))
|
|
||||||
except Empty:
|
|
||||||
pass
|
|
||||||
|
|
||||||
j = j.start_subjob([9, 1], 'Preparing for matching')
|
|
||||||
cache = self.cached_blocks
|
|
||||||
id2picture = {}
|
|
||||||
dimensions2pictures = defaultdict(set)
|
|
||||||
for picture in pictures:
|
|
||||||
try:
|
|
||||||
picture.cache_id = cache.get_id(picture.unicode_path)
|
|
||||||
id2picture[picture.cache_id] = picture
|
|
||||||
if not self.match_scaled:
|
|
||||||
dimensions2pictures[picture.dimensions].add(picture)
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
pictures = [p for p in pictures if hasattr(p, 'cache_id')]
|
|
||||||
pool = multiprocessing.Pool()
|
|
||||||
async_results = []
|
|
||||||
matches = []
|
|
||||||
pictures_copy = set(pictures)
|
|
||||||
for ref in j.iter_with_progress(pictures, 'Matched %d/%d pictures'):
|
|
||||||
others = pictures_copy if self.match_scaled else dimensions2pictures[ref.dimensions]
|
|
||||||
others.remove(ref)
|
|
||||||
if others:
|
|
||||||
cache_ids = [f.cache_id for f in others]
|
|
||||||
args = (ref.cache_id, cache_ids, self.cached_blocks.dbname, self.threshold)
|
|
||||||
async_results.append(pool.apply_async(async_compare, args))
|
|
||||||
if len(async_results) > RESULTS_QUEUE_LIMIT:
|
|
||||||
result = async_results.pop(0)
|
|
||||||
matches.extend(result.get())
|
|
||||||
|
|
||||||
result = []
|
|
||||||
for ref_id, other_id, percentage in j.iter_with_progress(matches, 'Verified %d/%d matches', every=10):
|
|
||||||
ref = id2picture[ref_id]
|
|
||||||
other = id2picture[other_id]
|
|
||||||
if percentage == 100 and ref.md5 != other.md5:
|
|
||||||
percentage = 99
|
|
||||||
if percentage >= self.threshold:
|
|
||||||
result.append(get_match(ref, other, percentage))
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
def getmatches(pictures, cached_blocks, threshold=75, match_scaled=False, j=job.nulljob):
|
||||||
|
def empty_out_queue(queue, into):
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
into.append(queue.get(block=False))
|
||||||
|
except Empty:
|
||||||
|
pass
|
||||||
|
|
||||||
|
j = j.start_subjob([3, 7])
|
||||||
|
pictures = prepare_pictures(pictures, cached_blocks, j)
|
||||||
|
j = j.start_subjob([9, 1], 'Preparing for matching')
|
||||||
|
cache = cached_blocks
|
||||||
|
id2picture = {}
|
||||||
|
dimensions2pictures = defaultdict(set)
|
||||||
|
for picture in pictures:
|
||||||
|
try:
|
||||||
|
picture.cache_id = cache.get_id(picture.unicode_path)
|
||||||
|
id2picture[picture.cache_id] = picture
|
||||||
|
if not match_scaled:
|
||||||
|
dimensions2pictures[picture.dimensions].add(picture)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
pictures = [p for p in pictures if hasattr(p, 'cache_id')]
|
||||||
|
pool = multiprocessing.Pool()
|
||||||
|
async_results = []
|
||||||
|
matches = []
|
||||||
|
pictures_copy = set(pictures)
|
||||||
|
for ref in j.iter_with_progress(pictures, 'Matched %d/%d pictures'):
|
||||||
|
others = pictures_copy if match_scaled else dimensions2pictures[ref.dimensions]
|
||||||
|
others.remove(ref)
|
||||||
|
if others:
|
||||||
|
cache_ids = [f.cache_id for f in others]
|
||||||
|
args = (ref.cache_id, cache_ids, cached_blocks.dbname, threshold)
|
||||||
|
async_results.append(pool.apply_async(async_compare, args))
|
||||||
|
if len(async_results) > RESULTS_QUEUE_LIMIT:
|
||||||
|
result = async_results.pop(0)
|
||||||
|
matches.extend(result.get())
|
||||||
|
for result in async_results: # process the rest of the results
|
||||||
|
matches.extend(result.get())
|
||||||
|
|
||||||
|
result = []
|
||||||
|
for ref_id, other_id, percentage in j.iter_with_progress(matches, 'Verified %d/%d matches', every=10):
|
||||||
|
ref = id2picture[ref_id]
|
||||||
|
other = id2picture[other_id]
|
||||||
|
if percentage == 100 and ref.md5 != other.md5:
|
||||||
|
percentage = 99
|
||||||
|
if percentage >= threshold:
|
||||||
|
result.append(get_match(ref, other, percentage))
|
||||||
|
return result
|
||||||
|
|
||||||
multiprocessing.freeze_support()
|
multiprocessing.freeze_support()
|
||||||
22
pe/py/scanner.py
Normal file
22
pe/py/scanner.py
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Created By: Virgil Dupras
|
||||||
|
# Created On: 2009-10-18
|
||||||
|
# $Id$
|
||||||
|
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
|
||||||
|
#
|
||||||
|
# This software is licensed under the "HS" License as described in the "LICENSE" file,
|
||||||
|
# which should be included with this package. The terms are also available at
|
||||||
|
# http://www.hardcoded.net/licenses/hs_license
|
||||||
|
|
||||||
|
from dupeguru.scanner import Scanner
|
||||||
|
|
||||||
|
from . import matchbase
|
||||||
|
|
||||||
|
class ScannerPE(Scanner):
|
||||||
|
cached_blocks = None
|
||||||
|
match_scaled = False
|
||||||
|
threshold = 75
|
||||||
|
|
||||||
|
def _getmatches(self, files, j):
|
||||||
|
return matchbase.getmatches(files, self.cached_blocks, self.threshold, self.match_scaled, j)
|
||||||
|
|
||||||
34
pe/qt/app.py
34
pe/qt/app.py
@@ -12,12 +12,12 @@ import os.path as op
|
|||||||
from PyQt4.QtGui import QImage
|
from PyQt4.QtGui import QImage
|
||||||
import PIL.Image
|
import PIL.Image
|
||||||
|
|
||||||
from hsfs import phys
|
|
||||||
from hsutil.str import get_file_ext
|
from hsutil.str import get_file_ext
|
||||||
|
|
||||||
|
from dupeguru import fs
|
||||||
from dupeguru_pe import data as data_pe
|
from dupeguru_pe import data as data_pe
|
||||||
from dupeguru_pe.cache import Cache
|
from dupeguru_pe.cache import Cache
|
||||||
from dupeguru_pe.matchbase import AsyncMatchFactory
|
from dupeguru_pe.scanner import ScannerPE
|
||||||
|
|
||||||
from block import getblocks
|
from block import getblocks
|
||||||
from base.app import DupeGuru as DupeGuruBase
|
from base.app import DupeGuru as DupeGuruBase
|
||||||
@@ -26,14 +26,19 @@ from main_window import MainWindow
|
|||||||
from preferences import Preferences
|
from preferences import Preferences
|
||||||
from preferences_dialog import PreferencesDialog
|
from preferences_dialog import PreferencesDialog
|
||||||
|
|
||||||
class File(phys.File):
|
class File(fs.File):
|
||||||
INITIAL_INFO = phys.File.INITIAL_INFO.copy()
|
INITIAL_INFO = fs.File.INITIAL_INFO.copy()
|
||||||
INITIAL_INFO.update({
|
INITIAL_INFO.update({
|
||||||
'dimensions': (0,0),
|
'dimensions': (0,0),
|
||||||
})
|
})
|
||||||
|
HANDLED_EXTS = set(['png', 'jpg', 'jpeg', 'gif', 'bmp', 'tiff', 'tif'])
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def can_handle(cls, path):
|
||||||
|
return fs.File.can_handle(path) and get_file_ext(path[-1]) in cls.HANDLED_EXTS
|
||||||
|
|
||||||
def _read_info(self, field):
|
def _read_info(self, field):
|
||||||
super(File, self)._read_info(field)
|
fs.File._read_info(self, field)
|
||||||
if field == 'dimensions':
|
if field == 'dimensions':
|
||||||
im = PIL.Image.open(unicode(self.path))
|
im = PIL.Image.open(unicode(self.path))
|
||||||
self.dimensions = im.size
|
self.dimensions = im.size
|
||||||
@@ -44,15 +49,6 @@ class File(phys.File):
|
|||||||
return getblocks(image, block_count_per_side)
|
return getblocks(image, block_count_per_side)
|
||||||
|
|
||||||
|
|
||||||
class Directory(phys.Directory):
|
|
||||||
cls_file_class = File
|
|
||||||
cls_supported_exts = ('png', 'jpg', 'jpeg', 'gif', 'bmp', 'tiff')
|
|
||||||
|
|
||||||
def _fetch_subitems(self):
|
|
||||||
subdirs, subfiles = super(Directory, self)._fetch_subitems()
|
|
||||||
return subdirs, [name for name in subfiles if get_file_ext(name) in self.cls_supported_exts]
|
|
||||||
|
|
||||||
|
|
||||||
class DupeGuru(DupeGuruBase):
|
class DupeGuru(DupeGuruBase):
|
||||||
LOGO_NAME = 'logo_pe'
|
LOGO_NAME = 'logo_pe'
|
||||||
NAME = 'dupeGuru Picture Edition'
|
NAME = 'dupeGuru Picture Edition'
|
||||||
@@ -63,15 +59,15 @@ class DupeGuru(DupeGuruBase):
|
|||||||
DupeGuruBase.__init__(self, data_pe, appid=5)
|
DupeGuruBase.__init__(self, data_pe, appid=5)
|
||||||
|
|
||||||
def _setup(self):
|
def _setup(self):
|
||||||
self.scanner.match_factory = AsyncMatchFactory()
|
self.scanner = ScannerPE()
|
||||||
self.directories.dirclass = Directory
|
self.directories.fileclasses = [File]
|
||||||
self.scanner.match_factory.cached_blocks = Cache(op.join(self.appdata, 'cached_pictures.db'))
|
self.scanner.cached_blocks = Cache(op.join(self.appdata, 'cached_pictures.db'))
|
||||||
DupeGuruBase._setup(self)
|
DupeGuruBase._setup(self)
|
||||||
|
|
||||||
def _update_options(self):
|
def _update_options(self):
|
||||||
DupeGuruBase._update_options(self)
|
DupeGuruBase._update_options(self)
|
||||||
self.scanner.match_factory.match_scaled = self.prefs.match_scaled
|
self.scanner.match_scaled = self.prefs.match_scaled
|
||||||
self.scanner.match_factory.threshold = self.prefs.filter_hardness
|
self.scanner.threshold = self.prefs.filter_hardness
|
||||||
|
|
||||||
def _create_details_dialog(self, parent):
|
def _create_details_dialog(self, parent):
|
||||||
return DetailsDialog(parent, self)
|
return DetailsDialog(parent, self)
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# -*- mode: python -*-
|
# -*- mode: python -*-
|
||||||
a = Analysis([os.path.join(HOMEPATH,'support\\_mountzlib.py'), os.path.join(HOMEPATH,'support\\useUnicode.py'), 'start.py'],
|
a = Analysis([os.path.join(HOMEPATH,'support\\_mountzlib.py'), os.path.join(HOMEPATH,'support\\useUnicode.py'), 'start.py'],
|
||||||
pathex=['C:\\src\\dupeguru\\pe\\qt'])
|
pathex=[])
|
||||||
pyz = PYZ(a.pure)
|
pyz = PYZ(a.pure)
|
||||||
exe = EXE(pyz,
|
exe = EXE(pyz,
|
||||||
a.scripts,
|
a.scripts,
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ from hsutil.build import print_and_do, build_all_qt_ui
|
|||||||
build_all_qt_ui(op.join('qtlib', 'ui'))
|
build_all_qt_ui(op.join('qtlib', 'ui'))
|
||||||
build_all_qt_ui('base')
|
build_all_qt_ui('base')
|
||||||
build_all_qt_ui('.')
|
build_all_qt_ui('.')
|
||||||
|
print_and_do("pyrcc4 base\\dg.qrc > base\\dg_rc.py")
|
||||||
|
|
||||||
def move(src, dst):
|
def move(src, dst):
|
||||||
if not op.exists(src):
|
if not op.exists(src):
|
||||||
|
|||||||
@@ -23,6 +23,6 @@ class MainWindow(MainWindowBase):
|
|||||||
title = "Clear Picture Cache"
|
title = "Clear Picture Cache"
|
||||||
msg = "Do you really want to remove all your cached picture analysis?"
|
msg = "Do you really want to remove all your cached picture analysis?"
|
||||||
if self._confirm(title, msg, QMessageBox.No):
|
if self._confirm(title, msg, QMessageBox.No):
|
||||||
self.app.scanner.match_factory.cached_blocks.clear()
|
self.app.scanner.cached_blocks.clear()
|
||||||
QMessageBox.information(self, title, "Picture cache cleared.")
|
QMessageBox.information(self, title, "Picture cache cleared.")
|
||||||
|
|
||||||
@@ -14,6 +14,9 @@ import base.dg_rc
|
|||||||
|
|
||||||
from app import DupeGuru
|
from app import DupeGuru
|
||||||
|
|
||||||
|
# This is a workaround for a pyinstaller problem where compiled dupeguru can't read tiff files
|
||||||
|
from PIL import TiffImagePlugin, TiffTags
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
app = QApplication(sys.argv)
|
app = QApplication(sys.argv)
|
||||||
app.setWindowIcon(QIcon(QPixmap(":/logo_pe")))
|
app.setWindowIcon(QIcon(QPixmap(":/logo_pe")))
|
||||||
|
|||||||
@@ -8,12 +8,12 @@
|
|||||||
import objc
|
import objc
|
||||||
from AppKit import *
|
from AppKit import *
|
||||||
|
|
||||||
from dupeguru import app_se_cocoa, scanner
|
from dupeguru_se.app_cocoa import DupeGuru
|
||||||
|
from dupeguru import scanner
|
||||||
|
|
||||||
# Fix py2app imports with chokes on relative imports
|
# Fix py2app imports with chokes on relative imports
|
||||||
from dupeguru import app, app_cocoa, data, directories, engine, export, ignore, results, scanner
|
from dupeguru_se import fs, data
|
||||||
from hsfs import auto, stats, tree
|
from dupeguru import app, app_cocoa, data, directories, engine, export, ignore, results, fs
|
||||||
from hsfs.phys import bundle
|
|
||||||
from hsutil import conflict
|
from hsutil import conflict
|
||||||
|
|
||||||
class PyApp(NSObject):
|
class PyApp(NSObject):
|
||||||
@@ -22,7 +22,7 @@ class PyApp(NSObject):
|
|||||||
class PyDupeGuru(PyApp):
|
class PyDupeGuru(PyApp):
|
||||||
def init(self):
|
def init(self):
|
||||||
self = super(PyDupeGuru,self).init()
|
self = super(PyDupeGuru,self).init()
|
||||||
self.app = app_se_cocoa.DupeGuru()
|
self.app = DupeGuru()
|
||||||
return self
|
return self
|
||||||
|
|
||||||
#---Directories
|
#---Directories
|
||||||
|
|||||||
11
se/py/LICENSE
Normal file
11
se/py/LICENSE
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
Copyright 2009 Hardcoded Software Inc. (http://www.hardcoded.net)
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of Hardcoded Software Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
|
||||||
|
* If the source code has been published less than two years ago, any redistribution, in whole or in part, must retain full licensing functionality, without any attempt to change, obscure or in other ways circumvent its intent.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
1
se/py/__init__.py
Normal file
1
se/py/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
@@ -11,14 +11,15 @@ import logging
|
|||||||
|
|
||||||
from AppKit import *
|
from AppKit import *
|
||||||
|
|
||||||
from hsfs.phys import Directory as DirectoryBase
|
from hsutil import io
|
||||||
from hsfs.phys.bundle import Bundle
|
|
||||||
from hsutil.path import Path
|
from hsutil.path import Path
|
||||||
from hsutil.misc import extract
|
|
||||||
from hsutil.str import get_file_ext
|
from hsutil.str import get_file_ext
|
||||||
|
|
||||||
from . import app_cocoa, data
|
from dupeguru import fs
|
||||||
from .directories import Directories as DirectoriesBase, STATE_EXCLUDED
|
from dupeguru.app_cocoa import DupeGuru as DupeGuruBase
|
||||||
|
from dupeguru.directories import Directories as DirectoriesBase, STATE_EXCLUDED
|
||||||
|
from . import data
|
||||||
|
from .fs import Bundle as BundleBase
|
||||||
|
|
||||||
if NSWorkspace.sharedWorkspace().respondsToSelector_('typeOfFile:error:'): # Only from 10.5
|
if NSWorkspace.sharedWorkspace().respondsToSelector_('typeOfFile:error:'): # Only from 10.5
|
||||||
def is_bundle(str_path):
|
def is_bundle(str_path):
|
||||||
@@ -31,27 +32,17 @@ else: # Tiger
|
|||||||
def is_bundle(str_path): # just return a list of a few known bundle extensions.
|
def is_bundle(str_path): # just return a list of a few known bundle extensions.
|
||||||
return get_file_ext(str_path) in ('app', 'pages', 'numbers')
|
return get_file_ext(str_path) in ('app', 'pages', 'numbers')
|
||||||
|
|
||||||
class DGDirectory(DirectoryBase):
|
class Bundle(BundleBase):
|
||||||
def _create_sub_file(self, name, with_parent=True):
|
@classmethod
|
||||||
if is_bundle(unicode(self.path + name)):
|
def can_handle(cls, path):
|
||||||
parent = self if with_parent else None
|
return not io.islink(path) and io.isdir(path) and is_bundle(unicode(path))
|
||||||
return Bundle(parent, name)
|
|
||||||
else:
|
|
||||||
return super(DGDirectory, self)._create_sub_file(name, with_parent)
|
|
||||||
|
|
||||||
def _fetch_subitems(self):
|
|
||||||
subdirs, subfiles = super(DGDirectory, self)._fetch_subitems()
|
|
||||||
apps, normal_dirs = extract(lambda name: is_bundle(unicode(self.path + name)), subdirs)
|
|
||||||
subfiles += apps
|
|
||||||
return normal_dirs, subfiles
|
|
||||||
|
|
||||||
|
|
||||||
class Directories(DirectoriesBase):
|
class Directories(DirectoriesBase):
|
||||||
ROOT_PATH_TO_EXCLUDE = map(Path, ['/Library', '/Volumes', '/System', '/bin', '/sbin', '/opt', '/private', '/dev'])
|
ROOT_PATH_TO_EXCLUDE = map(Path, ['/Library', '/Volumes', '/System', '/bin', '/sbin', '/opt', '/private', '/dev'])
|
||||||
HOME_PATH_TO_EXCLUDE = [Path('Library')]
|
HOME_PATH_TO_EXCLUDE = [Path('Library')]
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
DirectoriesBase.__init__(self)
|
DirectoriesBase.__init__(self, fileclasses=[Bundle, fs.File])
|
||||||
self.dirclass = DGDirectory
|
|
||||||
|
|
||||||
def _default_state_for_path(self, path):
|
def _default_state_for_path(self, path):
|
||||||
result = DirectoriesBase._default_state_for_path(self, path)
|
result = DirectoriesBase._default_state_for_path(self, path)
|
||||||
@@ -63,8 +54,8 @@ class Directories(DirectoriesBase):
|
|||||||
return STATE_EXCLUDED
|
return STATE_EXCLUDED
|
||||||
|
|
||||||
|
|
||||||
class DupeGuru(app_cocoa.DupeGuru):
|
class DupeGuru(DupeGuruBase):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
app_cocoa.DupeGuru.__init__(self, data, 'dupeGuru', appid=4)
|
DupeGuruBase.__init__(self, data, 'dupeGuru', appid=4)
|
||||||
self.directories = Directories()
|
self.directories = Directories()
|
||||||
|
|
||||||
72
se/py/data.py
Normal file
72
se/py/data.py
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
# Created By: Virgil Dupras
|
||||||
|
# Created On: 2006/03/15
|
||||||
|
# $Id$
|
||||||
|
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
|
||||||
|
#
|
||||||
|
# This software is licensed under the "HS" License as described in the "LICENSE" file,
|
||||||
|
# which should be included with this package. The terms are also available at
|
||||||
|
# http://www.hardcoded.net/licenses/hs_license
|
||||||
|
|
||||||
|
from hsutil.str import format_size
|
||||||
|
from dupeguru.data import (format_path, format_timestamp, format_words, format_perc,
|
||||||
|
format_dupe_count, cmp_value)
|
||||||
|
|
||||||
|
COLUMNS = [
|
||||||
|
{'attr':'name','display':'Filename'},
|
||||||
|
{'attr':'path','display':'Directory'},
|
||||||
|
{'attr':'size','display':'Size (KB)'},
|
||||||
|
{'attr':'extension','display':'Kind'},
|
||||||
|
{'attr':'ctime','display':'Creation'},
|
||||||
|
{'attr':'mtime','display':'Modification'},
|
||||||
|
{'attr':'percentage','display':'Match %'},
|
||||||
|
{'attr':'words','display':'Words Used'},
|
||||||
|
{'attr':'dupe_count','display':'Dupe Count'},
|
||||||
|
]
|
||||||
|
|
||||||
|
METADATA_TO_READ = ['size', 'ctime', 'mtime']
|
||||||
|
|
||||||
|
def GetDisplayInfo(dupe, group, delta):
|
||||||
|
size = dupe.size
|
||||||
|
ctime = dupe.ctime
|
||||||
|
mtime = dupe.mtime
|
||||||
|
m = group.get_match_of(dupe)
|
||||||
|
if m:
|
||||||
|
percentage = m.percentage
|
||||||
|
dupe_count = 0
|
||||||
|
if delta:
|
||||||
|
r = group.ref
|
||||||
|
size -= r.size
|
||||||
|
ctime -= r.ctime
|
||||||
|
mtime -= r.mtime
|
||||||
|
else:
|
||||||
|
percentage = group.percentage
|
||||||
|
dupe_count = len(group.dupes)
|
||||||
|
return [
|
||||||
|
dupe.name,
|
||||||
|
format_path(dupe.path),
|
||||||
|
format_size(size, 0, 1, False),
|
||||||
|
dupe.extension,
|
||||||
|
format_timestamp(ctime, delta and m),
|
||||||
|
format_timestamp(mtime, delta and m),
|
||||||
|
format_perc(percentage),
|
||||||
|
format_words(dupe.words) if hasattr(dupe, 'words') else '',
|
||||||
|
format_dupe_count(dupe_count)
|
||||||
|
]
|
||||||
|
|
||||||
|
def GetDupeSortKey(dupe, get_group, key, delta):
|
||||||
|
if key == 6:
|
||||||
|
m = get_group().get_match_of(dupe)
|
||||||
|
return m.percentage
|
||||||
|
if key == 8:
|
||||||
|
return 0
|
||||||
|
r = cmp_value(getattr(dupe, COLUMNS[key]['attr']))
|
||||||
|
if delta and (key in (2, 4, 5)):
|
||||||
|
r -= cmp_value(getattr(get_group().ref, COLUMNS[key]['attr']))
|
||||||
|
return r
|
||||||
|
|
||||||
|
def GetGroupSortKey(group, key):
|
||||||
|
if key == 6:
|
||||||
|
return group.percentage
|
||||||
|
if key == 8:
|
||||||
|
return len(group)
|
||||||
|
return cmp_value(getattr(group.ref, COLUMNS[key]['attr']))
|
||||||
43
se/py/fs.py
Normal file
43
se/py/fs.py
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Created By: Virgil Dupras
|
||||||
|
# Created On: 2009-10-23
|
||||||
|
# $Id$
|
||||||
|
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
|
||||||
|
#
|
||||||
|
# This software is licensed under the "HS" License as described in the "LICENSE" file,
|
||||||
|
# which should be included with this package. The terms are also available at
|
||||||
|
# http://www.hardcoded.net/licenses/hs_license
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
|
||||||
|
from hsutil import io
|
||||||
|
from hsutil.misc import nonone
|
||||||
|
|
||||||
|
from dupeguru import fs
|
||||||
|
|
||||||
|
class Bundle(fs.File):
|
||||||
|
"""This class is for Mac OSX bundles (.app). Bundles are seen by the OS as
|
||||||
|
normal directories, but I don't want that in dupeGuru. I want dupeGuru
|
||||||
|
to see them as files.
|
||||||
|
"""
|
||||||
|
def _read_info(self, field):
|
||||||
|
if field in ('size', 'ctime', 'mtime'):
|
||||||
|
files = fs.get_all_files(self.path)
|
||||||
|
size = sum((file.size for file in files), 0)
|
||||||
|
self.size = size
|
||||||
|
stats = io.stat(self.path)
|
||||||
|
self.ctime = nonone(stats.st_ctime, 0)
|
||||||
|
self.mtime = nonone(stats.st_mtime, 0)
|
||||||
|
elif field in ('md5', 'md5partial'):
|
||||||
|
# What's sensitive here is that we must make sure that subfiles'
|
||||||
|
# md5 are always added up in the same order, but we also want a
|
||||||
|
# different md5 if a file gets moved in a different subdirectory.
|
||||||
|
def get_dir_md5_concat():
|
||||||
|
files = fs.get_all_files(self.path)
|
||||||
|
files.sort(key=lambda f:f.path)
|
||||||
|
md5s = [getattr(f, field) for f in files]
|
||||||
|
return ''.join(md5s)
|
||||||
|
|
||||||
|
md5 = hashlib.md5(get_dir_md5_concat())
|
||||||
|
digest = md5.digest()
|
||||||
|
setattr(self, field, digest)
|
||||||
0
se/py/tests/__init__.py
Normal file
0
se/py/tests/__init__.py
Normal file
48
se/py/tests/fs_test.py
Normal file
48
se/py/tests/fs_test.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Created By: Virgil Dupras
|
||||||
|
# Created On: 2009-10-23
|
||||||
|
# $Id$
|
||||||
|
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
|
||||||
|
#
|
||||||
|
# This software is licensed under the "HS" License as described in the "LICENSE" file,
|
||||||
|
# which should be included with this package. The terms are also available at
|
||||||
|
# http://www.hardcoded.net/licenses/hs_license
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
|
||||||
|
from nose.tools import eq_
|
||||||
|
|
||||||
|
from hsutil.testcase import TestCase
|
||||||
|
from dupeguru.fs import File
|
||||||
|
from dupeguru.tests.directories_test import create_fake_fs
|
||||||
|
|
||||||
|
from .. import fs
|
||||||
|
|
||||||
|
class TCBundle(TestCase):
|
||||||
|
def test_size_aggregates_subfiles(self):
|
||||||
|
p = create_fake_fs(self.tmppath())
|
||||||
|
b = fs.Bundle(p)
|
||||||
|
eq_(b.size, 12)
|
||||||
|
|
||||||
|
def test_md5_aggregate_subfiles_sorted(self):
|
||||||
|
#dir.allfiles can return child in any order. Thus, bundle.md5 must aggregate
|
||||||
|
#all files' md5 it contains, but it must make sure that it does so in the
|
||||||
|
#same order everytime.
|
||||||
|
p = create_fake_fs(self.tmppath())
|
||||||
|
b = fs.Bundle(p)
|
||||||
|
md5s = File(p + ('dir1', 'file1.test')).md5
|
||||||
|
md5s += File(p + ('dir2', 'file2.test')).md5
|
||||||
|
md5s += File(p + ('dir3', 'file3.test')).md5
|
||||||
|
md5s += File(p + 'file1.test').md5
|
||||||
|
md5s += File(p + 'file2.test').md5
|
||||||
|
md5s += File(p + 'file3.test').md5
|
||||||
|
md5 = hashlib.md5(md5s)
|
||||||
|
eq_(b.md5, md5.digest())
|
||||||
|
|
||||||
|
def test_has_file_attrs(self):
|
||||||
|
#a Bundle must behave like a file, so it must have ctime and mtime attributes
|
||||||
|
b = fs.Bundle(self.tmppath())
|
||||||
|
assert b.mtime > 0
|
||||||
|
assert b.ctime > 0
|
||||||
|
eq_(b.extension, '')
|
||||||
|
|
||||||
@@ -7,7 +7,7 @@
|
|||||||
# which should be included with this package. The terms are also available at
|
# which should be included with this package. The terms are also available at
|
||||||
# http://www.hardcoded.net/licenses/hs_license
|
# http://www.hardcoded.net/licenses/hs_license
|
||||||
|
|
||||||
from dupeguru import data
|
from dupeguru_se import data
|
||||||
from dupeguru.directories import Directories as DirectoriesBase, STATE_EXCLUDED
|
from dupeguru.directories import Directories as DirectoriesBase, STATE_EXCLUDED
|
||||||
|
|
||||||
from base.app import DupeGuru as DupeGuruBase
|
from base.app import DupeGuru as DupeGuruBase
|
||||||
|
|||||||
Reference in New Issue
Block a user