mirror of
https://github.com/arsenetar/dupeguru.git
synced 2026-01-25 08:01:39 +00:00
Compare commits
22 Commits
se2.8.2
...
before-tig
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
911521d8e0 | ||
|
|
b25c1c3a3b | ||
|
|
37a40040b3 | ||
|
|
25dadc83eb | ||
|
|
b8c11b5aae | ||
|
|
a3ab314378 | ||
|
|
794192835d | ||
|
|
385768a69b | ||
|
|
a281931b16 | ||
|
|
085311d559 | ||
|
|
4d7f032889 | ||
|
|
cf44c93013 | ||
|
|
787cbcd01f | ||
|
|
b2b316b642 | ||
|
|
49165125e4 | ||
|
|
54ac0fd19e | ||
|
|
0aff7f16e5 | ||
|
|
f9abc3b35d | ||
|
|
b167a51243 | ||
|
|
371cdda911 | ||
|
|
11977c6533 | ||
|
|
7228adf433 |
@@ -14,13 +14,13 @@ import os
|
||||
import os.path as op
|
||||
import logging
|
||||
|
||||
from hsutil import job, io, files
|
||||
from hsutil import io, files
|
||||
from hsutil.path import Path
|
||||
from hsutil.reg import RegistrableApplication, RegistrationRequired
|
||||
from hsutil.misc import flatten, first
|
||||
from hsutil.str import escape
|
||||
|
||||
from . import directories, results, scanner, export
|
||||
from . import directories, results, scanner, export, fs
|
||||
|
||||
JOB_SCAN = 'job_scan'
|
||||
JOB_LOAD = 'job_load'
|
||||
@@ -98,13 +98,8 @@ class DupeGuru(RegistrableApplication):
|
||||
return ['---'] * len(self.data.COLUMNS)
|
||||
|
||||
def _get_file(self, str_path):
|
||||
p = Path(str_path)
|
||||
for d in self.directories:
|
||||
if p not in d.path:
|
||||
continue
|
||||
result = d.find_path(p[d.path:])
|
||||
if result is not None:
|
||||
return result
|
||||
path = Path(str_path)
|
||||
return fs.get_file(path, self.directories.fileclasses)
|
||||
|
||||
@staticmethod
|
||||
def _recycle_dupe(dupe):
|
||||
@@ -150,7 +145,7 @@ class DupeGuru(RegistrableApplication):
|
||||
2 = absolute re-creation.
|
||||
"""
|
||||
source_path = dupe.path
|
||||
location_path = dupe.root.path
|
||||
location_path = first(p for p in self.directories if dupe.path in p)
|
||||
dest_path = Path(destination)
|
||||
if dest_type == 2:
|
||||
dest_path = dest_path + source_path[1:-1] #Remove drive letter and filename
|
||||
|
||||
@@ -12,13 +12,12 @@ from AppKit import *
|
||||
import logging
|
||||
import os.path as op
|
||||
|
||||
import hsfs as fs
|
||||
from hsutil import io, cocoa, job
|
||||
from hsutil.cocoa import install_exception_hook
|
||||
from hsutil.misc import stripnone
|
||||
from hsutil.reg import RegistrationRequired
|
||||
|
||||
import app, data
|
||||
from . import app, fs
|
||||
|
||||
JOBID2TITLE = {
|
||||
app.JOB_SCAN: "Scanning for duplicates",
|
||||
@@ -43,8 +42,6 @@ class DupeGuru(app.DupeGuru):
|
||||
logging.basicConfig(level=LOGGING_LEVEL, format='%(levelname)s %(message)s')
|
||||
logging.debug('started in debug mode')
|
||||
install_exception_hook()
|
||||
if data_module is None:
|
||||
data_module = data
|
||||
appsupport = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDomainMask, True)[0]
|
||||
appdata = op.join(appsupport, appdata_subdir)
|
||||
app.DupeGuru.__init__(self, data_module, appdata, appid)
|
||||
@@ -91,15 +88,15 @@ class DupeGuru(app.DupeGuru):
|
||||
except IndexError:
|
||||
return (None,None)
|
||||
|
||||
def GetDirectory(self,node_path,curr_dir=None):
|
||||
def get_folder_path(self, node_path, curr_path=None):
|
||||
if not node_path:
|
||||
return curr_dir
|
||||
if curr_dir is not None:
|
||||
l = curr_dir.dirs
|
||||
return curr_path
|
||||
current_index = node_path[0]
|
||||
if curr_path is None:
|
||||
curr_path = self.directories[current_index]
|
||||
else:
|
||||
l = self.directories
|
||||
d = l[node_path[0]]
|
||||
return self.GetDirectory(node_path[1:],d)
|
||||
curr_path = self.directories.get_subfolders(curr_path)[current_index]
|
||||
return self.get_folder_path(node_path[1:], curr_path)
|
||||
|
||||
def RefreshDetailsTable(self,dupe,group):
|
||||
l1 = self._get_display_info(dupe, group, False)
|
||||
@@ -146,13 +143,13 @@ class DupeGuru(app.DupeGuru):
|
||||
def RemoveSelected(self):
|
||||
self.results.remove_duplicates(self.selected_dupes)
|
||||
|
||||
def RenameSelected(self,newname):
|
||||
def RenameSelected(self, newname):
|
||||
try:
|
||||
d = self.selected_dupes[0]
|
||||
d = d.move(d.parent,newname)
|
||||
d.rename(newname)
|
||||
return True
|
||||
except (IndexError,fs.FSError),e:
|
||||
logging.warning("dupeGuru Warning: %s" % str(e))
|
||||
except (IndexError, fs.FSError) as e:
|
||||
logging.warning("dupeGuru Warning: %s" % unicode(e))
|
||||
return False
|
||||
|
||||
def RevealSelected(self):
|
||||
@@ -214,9 +211,9 @@ class DupeGuru(app.DupeGuru):
|
||||
self.results.dupes[row] for row in rows if row in xrange(len(self.results.dupes))
|
||||
]
|
||||
|
||||
def SetDirectoryState(self,node_path,state):
|
||||
d = self.GetDirectory(node_path)
|
||||
self.directories.set_state(d.path,state)
|
||||
def SetDirectoryState(self, node_path, state):
|
||||
p = self.get_folder_path(node_path)
|
||||
self.directories.set_state(p, state)
|
||||
|
||||
def sort_dupes(self,key,asc):
|
||||
self.results.sort_dupes(key,asc,self.display_delta_values)
|
||||
@@ -245,8 +242,12 @@ class DupeGuru(app.DupeGuru):
|
||||
return [len(g.dupes) for g in self.results.groups]
|
||||
elif tag == 1: #Directories
|
||||
try:
|
||||
dirs = self.GetDirectory(node_path).dirs if node_path else self.directories
|
||||
return [d.dircount for d in dirs]
|
||||
if node_path:
|
||||
path = self.get_folder_path(node_path)
|
||||
subfolders = self.directories.get_subfolders(path)
|
||||
else:
|
||||
subfolders = self.directories
|
||||
return [len(self.directories.get_subfolders(path)) for path in subfolders]
|
||||
except IndexError: # node_path out of range
|
||||
return []
|
||||
else: #Power Marker
|
||||
@@ -270,8 +271,9 @@ class DupeGuru(app.DupeGuru):
|
||||
return result
|
||||
elif tag == 1: #Directories
|
||||
try:
|
||||
d = self.GetDirectory(node_path)
|
||||
return [d.name, self.directories.get_state(d.path)]
|
||||
path = self.get_folder_path(node_path)
|
||||
name = unicode(path) if len(node_path) == 1 else path[-1]
|
||||
return [name, self.directories.get_state(path)]
|
||||
except IndexError: # node_path out of range
|
||||
return []
|
||||
|
||||
|
||||
@@ -40,63 +40,3 @@ def format_dupe_count(c):
|
||||
|
||||
def cmp_value(value):
|
||||
return value.lower() if isinstance(value, basestring) else value
|
||||
|
||||
COLUMNS = [
|
||||
{'attr':'name','display':'Filename'},
|
||||
{'attr':'path','display':'Directory'},
|
||||
{'attr':'size','display':'Size (KB)'},
|
||||
{'attr':'extension','display':'Kind'},
|
||||
{'attr':'ctime','display':'Creation'},
|
||||
{'attr':'mtime','display':'Modification'},
|
||||
{'attr':'percentage','display':'Match %'},
|
||||
{'attr':'words','display':'Words Used'},
|
||||
{'attr':'dupe_count','display':'Dupe Count'},
|
||||
]
|
||||
|
||||
METADATA_TO_READ = ['size', 'ctime', 'mtime']
|
||||
|
||||
def GetDisplayInfo(dupe, group, delta):
|
||||
size = dupe.size
|
||||
ctime = dupe.ctime
|
||||
mtime = dupe.mtime
|
||||
m = group.get_match_of(dupe)
|
||||
if m:
|
||||
percentage = m.percentage
|
||||
dupe_count = 0
|
||||
if delta:
|
||||
r = group.ref
|
||||
size -= r.size
|
||||
ctime -= r.ctime
|
||||
mtime -= r.mtime
|
||||
else:
|
||||
percentage = group.percentage
|
||||
dupe_count = len(group.dupes)
|
||||
return [
|
||||
dupe.name,
|
||||
format_path(dupe.path),
|
||||
format_size(size, 0, 1, False),
|
||||
dupe.extension,
|
||||
format_timestamp(ctime, delta and m),
|
||||
format_timestamp(mtime, delta and m),
|
||||
format_perc(percentage),
|
||||
format_words(dupe.words),
|
||||
format_dupe_count(dupe_count)
|
||||
]
|
||||
|
||||
def GetDupeSortKey(dupe, get_group, key, delta):
|
||||
if key == 6:
|
||||
m = get_group().get_match_of(dupe)
|
||||
return m.percentage
|
||||
if key == 8:
|
||||
return 0
|
||||
r = cmp_value(getattr(dupe, COLUMNS[key]['attr']))
|
||||
if delta and (key in (2, 4, 5)):
|
||||
r -= cmp_value(getattr(get_group().ref, COLUMNS[key]['attr']))
|
||||
return r
|
||||
|
||||
def GetGroupSortKey(group, key):
|
||||
if key == 6:
|
||||
return group.percentage
|
||||
if key == 8:
|
||||
return len(group)
|
||||
return cmp_value(getattr(group.ref, COLUMNS[key]['attr']))
|
||||
|
||||
@@ -9,11 +9,12 @@
|
||||
|
||||
import xml.dom.minidom
|
||||
|
||||
from hsfs import phys
|
||||
import hsfs as fs
|
||||
from hsutil import io
|
||||
from hsutil.files import FileOrPath
|
||||
from hsutil.path import Path
|
||||
|
||||
from . import fs
|
||||
|
||||
(STATE_NORMAL,
|
||||
STATE_REFERENCE,
|
||||
STATE_EXCLUDED) = range(3)
|
||||
@@ -26,15 +27,14 @@ class InvalidPathError(Exception):
|
||||
|
||||
class Directories(object):
|
||||
#---Override
|
||||
def __init__(self):
|
||||
def __init__(self, fileclasses=[fs.File]):
|
||||
self._dirs = []
|
||||
self.states = {}
|
||||
self.dirclass = phys.Directory
|
||||
self.special_dirclasses = {}
|
||||
self.fileclasses = fileclasses
|
||||
|
||||
def __contains__(self,path):
|
||||
for d in self._dirs:
|
||||
if path in d.path:
|
||||
def __contains__(self, path):
|
||||
for p in self._dirs:
|
||||
if path in p:
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -53,8 +53,7 @@ class Directories(object):
|
||||
if path[-1].startswith('.'): # hidden
|
||||
return STATE_EXCLUDED
|
||||
|
||||
def _get_files(self, from_dir):
|
||||
from_path = from_dir.path
|
||||
def _get_files(self, from_path):
|
||||
state = self.get_state(from_path)
|
||||
if state == STATE_EXCLUDED:
|
||||
# Recursively get files from folders with lots of subfolder is expensive. However, there
|
||||
@@ -62,14 +61,21 @@ class Directories(object):
|
||||
# through self.states and see if we must continue, or we can stop right here to save time
|
||||
if not any(p[:len(from_path)] == from_path for p in self.states):
|
||||
return
|
||||
result = []
|
||||
for subdir in from_dir.dirs:
|
||||
for file in self._get_files(subdir):
|
||||
yield file
|
||||
if state != STATE_EXCLUDED:
|
||||
for file in from_dir.files:
|
||||
file.is_ref = state == STATE_REFERENCE
|
||||
yield file
|
||||
try:
|
||||
filepaths = set()
|
||||
if state != STATE_EXCLUDED:
|
||||
for file in fs.get_files(from_path, fileclasses=self.fileclasses):
|
||||
file.is_ref = state == STATE_REFERENCE
|
||||
filepaths.add(file.path)
|
||||
yield file
|
||||
subpaths = [from_path + name for name in io.listdir(from_path)]
|
||||
# it's possible that a folder (bundle) gets into the file list. in that case, we don't want to recurse into it
|
||||
subfolders = [p for p in subpaths if not io.islink(p) and io.isdir(p) and p not in filepaths]
|
||||
for subfolder in subfolders:
|
||||
for file in self._get_files(subfolder):
|
||||
yield file
|
||||
except (EnvironmentError, fs.InvalidPath):
|
||||
pass
|
||||
|
||||
#---Public
|
||||
def add_path(self, path):
|
||||
@@ -80,29 +86,30 @@ class Directories(object):
|
||||
under it will be removed. Can also raise InvalidPathError if 'path' does not exist.
|
||||
"""
|
||||
if path in self:
|
||||
raise AlreadyThereError
|
||||
self._dirs = [d for d in self._dirs if d.path not in path]
|
||||
try:
|
||||
dirclass = self.special_dirclasses.get(path, self.dirclass)
|
||||
d = dirclass(None, unicode(path))
|
||||
d[:] #If an InvalidPath exception has to be raised, it will be raised here
|
||||
self._dirs.append(d)
|
||||
return d
|
||||
except fs.InvalidPath:
|
||||
raise AlreadyThereError()
|
||||
if not io.exists(path):
|
||||
raise InvalidPathError()
|
||||
self._dirs = [p for p in self._dirs if p not in path]
|
||||
self._dirs.append(path)
|
||||
|
||||
@staticmethod
|
||||
def get_subfolders(path):
|
||||
"""returns a sorted list of paths corresponding to subfolders in `path`"""
|
||||
try:
|
||||
names = [name for name in io.listdir(path) if io.isdir(path + name)]
|
||||
names.sort(key=lambda x:x.lower())
|
||||
return [path + name for name in names]
|
||||
except EnvironmentError:
|
||||
return []
|
||||
|
||||
def get_files(self):
|
||||
"""Returns a list of all files that are not excluded.
|
||||
|
||||
Returned files also have their 'is_ref' attr set.
|
||||
"""
|
||||
for d in self._dirs:
|
||||
d.force_update()
|
||||
try:
|
||||
for file in self._get_files(d):
|
||||
yield file
|
||||
except fs.InvalidPath:
|
||||
pass
|
||||
for path in self._dirs:
|
||||
for file in self._get_files(path):
|
||||
yield file
|
||||
|
||||
def get_state(self, path):
|
||||
"""Returns the state of 'path' (One of the STATE_* const.)
|
||||
@@ -123,8 +130,8 @@ class Directories(object):
|
||||
doc = xml.dom.minidom.parse(infile)
|
||||
except:
|
||||
return
|
||||
root_dir_nodes = doc.getElementsByTagName('root_directory')
|
||||
for rdn in root_dir_nodes:
|
||||
root_path_nodes = doc.getElementsByTagName('root_directory')
|
||||
for rdn in root_path_nodes:
|
||||
if not rdn.getAttributeNode('path'):
|
||||
continue
|
||||
path = rdn.getAttributeNode('path').nodeValue
|
||||
@@ -144,9 +151,9 @@ class Directories(object):
|
||||
with FileOrPath(outfile, 'wb') as fp:
|
||||
doc = xml.dom.minidom.Document()
|
||||
root = doc.appendChild(doc.createElement('directories'))
|
||||
for root_dir in self:
|
||||
root_dir_node = root.appendChild(doc.createElement('root_directory'))
|
||||
root_dir_node.setAttribute('path', unicode(root_dir.path).encode('utf-8'))
|
||||
for root_path in self:
|
||||
root_path_node = root.appendChild(doc.createElement('root_directory'))
|
||||
root_path_node.setAttribute('path', unicode(root_path).encode('utf-8'))
|
||||
for path, state in self.states.iteritems():
|
||||
state_node = root.appendChild(doc.createElement('state'))
|
||||
state_node.setAttribute('path', unicode(path).encode('utf-8'))
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
|
||||
from __future__ import division
|
||||
import difflib
|
||||
import itertools
|
||||
import logging
|
||||
import string
|
||||
from collections import defaultdict, namedtuple
|
||||
@@ -156,58 +157,69 @@ def get_match(first, second, flags=()):
|
||||
percentage = compare(first.words, second.words, flags)
|
||||
return Match(first, second, percentage)
|
||||
|
||||
class MatchFactory(object):
|
||||
common_word_threshold = 50
|
||||
match_similar_words = False
|
||||
min_match_percentage = 0
|
||||
weight_words = False
|
||||
no_field_order = False
|
||||
limit = 5000000
|
||||
|
||||
def getmatches(self, objects, j=job.nulljob):
|
||||
j = j.start_subjob(2)
|
||||
sj = j.start_subjob(2)
|
||||
for o in objects:
|
||||
if not hasattr(o, 'words'):
|
||||
o.words = getwords(o.name)
|
||||
word_dict = build_word_dict(objects, sj)
|
||||
reduce_common_words(word_dict, self.common_word_threshold)
|
||||
if self.match_similar_words:
|
||||
merge_similar_words(word_dict)
|
||||
match_flags = []
|
||||
if self.weight_words:
|
||||
match_flags.append(WEIGHT_WORDS)
|
||||
if self.match_similar_words:
|
||||
match_flags.append(MATCH_SIMILAR_WORDS)
|
||||
if self.no_field_order:
|
||||
match_flags.append(NO_FIELD_ORDER)
|
||||
j.start_job(len(word_dict), '0 matches found')
|
||||
compared = defaultdict(set)
|
||||
result = []
|
||||
try:
|
||||
# This whole 'popping' thing is there to avoid taking too much memory at the same time.
|
||||
while word_dict:
|
||||
items = word_dict.popitem()[1]
|
||||
while items:
|
||||
ref = items.pop()
|
||||
compared_already = compared[ref]
|
||||
to_compare = items - compared_already
|
||||
compared_already |= to_compare
|
||||
for other in to_compare:
|
||||
m = get_match(ref, other, match_flags)
|
||||
if m.percentage >= self.min_match_percentage:
|
||||
result.append(m)
|
||||
if len(result) >= self.limit:
|
||||
return result
|
||||
j.add_progress(desc='%d matches found' % len(result))
|
||||
except MemoryError:
|
||||
# This is the place where the memory usage is at its peak during the scan.
|
||||
# Just continue the process with an incomplete list of matches.
|
||||
del compared # This should give us enough room to call logging.
|
||||
logging.warning('Memory Overflow. Matches: %d. Word dict: %d' % (len(result), len(word_dict)))
|
||||
return result
|
||||
def getmatches(objects, min_match_percentage=0, match_similar_words=False, weight_words=False,
|
||||
no_field_order=False, j=job.nulljob):
|
||||
COMMON_WORD_THRESHOLD = 50
|
||||
LIMIT = 5000000
|
||||
j = j.start_subjob(2)
|
||||
sj = j.start_subjob(2)
|
||||
for o in objects:
|
||||
if not hasattr(o, 'words'):
|
||||
o.words = getwords(o.name)
|
||||
word_dict = build_word_dict(objects, sj)
|
||||
reduce_common_words(word_dict, COMMON_WORD_THRESHOLD)
|
||||
if match_similar_words:
|
||||
merge_similar_words(word_dict)
|
||||
match_flags = []
|
||||
if weight_words:
|
||||
match_flags.append(WEIGHT_WORDS)
|
||||
if match_similar_words:
|
||||
match_flags.append(MATCH_SIMILAR_WORDS)
|
||||
if no_field_order:
|
||||
match_flags.append(NO_FIELD_ORDER)
|
||||
j.start_job(len(word_dict), '0 matches found')
|
||||
compared = defaultdict(set)
|
||||
result = []
|
||||
try:
|
||||
# This whole 'popping' thing is there to avoid taking too much memory at the same time.
|
||||
while word_dict:
|
||||
items = word_dict.popitem()[1]
|
||||
while items:
|
||||
ref = items.pop()
|
||||
compared_already = compared[ref]
|
||||
to_compare = items - compared_already
|
||||
compared_already |= to_compare
|
||||
for other in to_compare:
|
||||
m = get_match(ref, other, match_flags)
|
||||
if m.percentage >= min_match_percentage:
|
||||
result.append(m)
|
||||
if len(result) >= LIMIT:
|
||||
return result
|
||||
j.add_progress(desc='%d matches found' % len(result))
|
||||
except MemoryError:
|
||||
# This is the place where the memory usage is at its peak during the scan.
|
||||
# Just continue the process with an incomplete list of matches.
|
||||
del compared # This should give us enough room to call logging.
|
||||
logging.warning('Memory Overflow. Matches: %d. Word dict: %d' % (len(result), len(word_dict)))
|
||||
return result
|
||||
|
||||
return result
|
||||
|
||||
def getmatches_by_contents(files, sizeattr='size', partial=False, j=job.nulljob):
|
||||
j = j.start_subjob([2, 8])
|
||||
size2files = defaultdict(set)
|
||||
for file in j.iter_with_progress(files, 'Read size of %d/%d files'):
|
||||
size2files[getattr(file, sizeattr)].add(file)
|
||||
possible_matches = [files for files in size2files.values() if len(files) > 1]
|
||||
del size2files
|
||||
result = []
|
||||
j.start_job(len(possible_matches), '0 matches found')
|
||||
for group in possible_matches:
|
||||
for first, second in itertools.combinations(group, 2):
|
||||
if first.md5partial == second.md5partial:
|
||||
if partial or first.md5 == second.md5:
|
||||
result.append(Match(first, second, 100))
|
||||
j.add_progress(desc='%d matches found' % len(result))
|
||||
return result
|
||||
|
||||
class Group(object):
|
||||
#---Override
|
||||
|
||||
178
base/py/fs.py
Normal file
178
base/py/fs.py
Normal file
@@ -0,0 +1,178 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2009-10-22
|
||||
# $Id$
|
||||
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "HS" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.hardcoded.net/licenses/hs_license
|
||||
|
||||
# This is a fork from hsfs. The reason for this fork is that hsfs has been designed for musicGuru
|
||||
# and was re-used for dupeGuru. The problem is that hsfs is way over-engineered for dupeGuru,
|
||||
# resulting needless complexity and memory usage. It's been a while since I wanted to do that fork,
|
||||
# and I'm doing it now.
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
|
||||
from hsutil import io
|
||||
from hsutil.misc import nonone, flatten
|
||||
from hsutil.str import get_file_ext
|
||||
|
||||
class FSError(Exception):
|
||||
cls_message = "An error has occured on '{name}' in '{parent}'"
|
||||
def __init__(self, fsobject, parent=None):
|
||||
message = self.cls_message
|
||||
if isinstance(fsobject, basestring):
|
||||
name = fsobject
|
||||
elif isinstance(fsobject, File):
|
||||
name = fsobject.name
|
||||
else:
|
||||
name = ''
|
||||
parentname = unicode(parent) if parent is not None else ''
|
||||
Exception.__init__(self, message.format(name=name, parent=parentname))
|
||||
|
||||
|
||||
class AlreadyExistsError(FSError):
|
||||
"The directory or file name we're trying to add already exists"
|
||||
cls_message = "'{name}' already exists in '{parent}'"
|
||||
|
||||
class InvalidPath(FSError):
|
||||
"The path of self is invalid, and cannot be worked with."
|
||||
cls_message = "'{name}' is invalid."
|
||||
|
||||
class InvalidDestinationError(FSError):
|
||||
"""A copy/move operation has been called, but the destination is invalid."""
|
||||
cls_message = "'{name}' is an invalid destination for this operation."
|
||||
|
||||
class OperationError(FSError):
|
||||
"""A copy/move/delete operation has been called, but the checkup after the
|
||||
operation shows that it didn't work."""
|
||||
cls_message = "Operation on '{name}' failed."
|
||||
|
||||
class File(object):
|
||||
INITIAL_INFO = {
|
||||
'size': 0,
|
||||
'ctime': 0,
|
||||
'mtime': 0,
|
||||
'md5': '',
|
||||
'md5partial': '',
|
||||
}
|
||||
|
||||
def __init__(self, path):
|
||||
self.path = path
|
||||
#This offset is where we should start reading the file to get a partial md5
|
||||
#For audio file, it should be where audio data starts
|
||||
self._md5partial_offset = 0x4000 #16Kb
|
||||
self._md5partial_size = 0x4000 #16Kb
|
||||
|
||||
def __getattr__(self, attrname):
|
||||
# Only called when attr is not there
|
||||
if attrname in self.INITIAL_INFO:
|
||||
try:
|
||||
self._read_info(attrname)
|
||||
except Exception as e:
|
||||
logging.warning("An error '%s' was raised while decoding '%s'", e, repr(self.path))
|
||||
try:
|
||||
return self.__dict__[attrname]
|
||||
except KeyError:
|
||||
return self.INITIAL_INFO[attrname]
|
||||
raise AttributeError()
|
||||
|
||||
def _read_info(self, field):
|
||||
if field in ('size', 'ctime', 'mtime'):
|
||||
stats = io.stat(self.path)
|
||||
self.size = nonone(stats.st_size, 0)
|
||||
self.ctime = nonone(stats.st_ctime, 0)
|
||||
self.mtime = nonone(stats.st_mtime, 0)
|
||||
elif field == 'md5partial':
|
||||
try:
|
||||
fp = io.open(self.path, 'rb')
|
||||
offset = self._md5partial_offset
|
||||
size = self._md5partial_size
|
||||
fp.seek(offset)
|
||||
partialdata = fp.read(size)
|
||||
md5 = hashlib.md5(partialdata)
|
||||
self.md5partial = md5.digest()
|
||||
fp.close()
|
||||
except Exception:
|
||||
pass
|
||||
elif field == 'md5':
|
||||
try:
|
||||
fp = io.open(self.path, 'rb')
|
||||
filedata = fp.read()
|
||||
md5 = hashlib.md5(filedata)
|
||||
self.md5 = md5.digest()
|
||||
fp.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _read_all_info(self, attrnames=None):
|
||||
"""Cache all possible info.
|
||||
|
||||
If `attrnames` is not None, caches only attrnames.
|
||||
"""
|
||||
if attrnames is None:
|
||||
attrnames = self.INITIAL_INFO.keys()
|
||||
for attrname in attrnames:
|
||||
if attrname not in self.__dict__:
|
||||
self._read_info(attrname)
|
||||
|
||||
#--- Public
|
||||
@classmethod
|
||||
def can_handle(cls, path):
|
||||
return not io.islink(path) and io.isfile(path)
|
||||
|
||||
def rename(self, newname):
|
||||
if newname == self.name:
|
||||
return
|
||||
destpath = self.path[:-1] + newname
|
||||
if io.exists(destpath):
|
||||
raise AlreadyExistsError(newname, self.path[:-1])
|
||||
try:
|
||||
io.rename(self.path, destpath)
|
||||
except EnvironmentError:
|
||||
raise OperationError(self)
|
||||
if not io.exists(destpath):
|
||||
raise OperationError(self)
|
||||
self.path = destpath
|
||||
|
||||
#--- Properties
|
||||
@property
|
||||
def extension(self):
|
||||
return get_file_ext(self.name)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self.path[-1]
|
||||
|
||||
|
||||
def get_file(path, fileclasses=[File]):
|
||||
for fileclass in fileclasses:
|
||||
if fileclass.can_handle(path):
|
||||
return fileclass(path)
|
||||
|
||||
def get_files(path, fileclasses=[File]):
|
||||
assert all(issubclass(fileclass, File) for fileclass in fileclasses)
|
||||
try:
|
||||
paths = [path + name for name in io.listdir(path)]
|
||||
result = []
|
||||
for path in paths:
|
||||
file = get_file(path, fileclasses=fileclasses)
|
||||
if file is not None:
|
||||
result.append(file)
|
||||
return result
|
||||
except EnvironmentError:
|
||||
raise InvalidPath(path)
|
||||
|
||||
def get_all_files(path, fileclasses=[File]):
|
||||
files = get_files(path, fileclasses=fileclasses)
|
||||
filepaths = set(f.path for f in files)
|
||||
subpaths = [path + name for name in io.listdir(path)]
|
||||
# it's possible that a folder (bundle) gets into the file list. in that case, we don't want to recurse into it
|
||||
subfolders = [p for p in subpaths if not io.islink(p) and io.isdir(p) and p not in filepaths]
|
||||
subfiles = flatten(get_all_files(subpath, fileclasses=fileclasses) for subpath in subfolders)
|
||||
return subfiles + files
|
||||
@@ -32,40 +32,32 @@ class Scanner(object):
|
||||
self.ignore_list = IgnoreList()
|
||||
self.discarded_file_count = 0
|
||||
|
||||
@staticmethod
|
||||
def _filter_matches_by_content(matches, partial, j):
|
||||
matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
|
||||
md5attrname = 'md5partial' if partial else 'md5'
|
||||
md5 = lambda f: getattr(f, md5attrname)
|
||||
for matched_file in j.iter_with_progress(matched_files, 'Analyzed %d/%d matching files'):
|
||||
md5(matched_file)
|
||||
j.set_progress(100, 'Removing false matches')
|
||||
return [m for m in matches if md5(m.first) == md5(m.second)]
|
||||
|
||||
def _getmatches(self, files, j):
|
||||
j = j.start_subjob(2)
|
||||
mf = engine.MatchFactory()
|
||||
if self.scan_type != SCAN_TYPE_CONTENT:
|
||||
mf.match_similar_words = self.match_similar_words
|
||||
mf.weight_words = self.word_weighting
|
||||
mf.min_match_percentage = self.min_match_percentage
|
||||
if self.scan_type == SCAN_TYPE_FIELDS_NO_ORDER:
|
||||
self.scan_type = SCAN_TYPE_FIELDS
|
||||
mf.no_field_order = True
|
||||
func = {
|
||||
SCAN_TYPE_FILENAME: lambda f: engine.getwords(rem_file_ext(f.name)),
|
||||
SCAN_TYPE_FIELDS: lambda f: engine.getfields(rem_file_ext(f.name)),
|
||||
SCAN_TYPE_TAG: lambda f: [engine.getwords(unicode(getattr(f, attrname))) for attrname in SCANNABLE_TAGS if attrname in self.scanned_tags],
|
||||
SCAN_TYPE_CONTENT: lambda f: [str(f.size)],
|
||||
SCAN_TYPE_CONTENT_AUDIO: lambda f: [str(f.audiosize)]
|
||||
}[self.scan_type]
|
||||
for f in j.iter_with_progress(files, 'Read metadata of %d/%d files'):
|
||||
if self.size_threshold:
|
||||
f.size # pre-read, makes a smoother progress if read here (especially for bundles)
|
||||
f.words = func(f)
|
||||
if self.size_threshold:
|
||||
j = j.start_subjob([2, 8])
|
||||
for f in j.iter_with_progress(files, 'Read size of %d/%d files'):
|
||||
f.size # pre-read, makes a smoother progress if read here (especially for bundles)
|
||||
files = [f for f in files if f.size >= self.size_threshold]
|
||||
return mf.getmatches(files, j)
|
||||
if self.scan_type in (SCAN_TYPE_CONTENT, SCAN_TYPE_CONTENT_AUDIO):
|
||||
sizeattr = 'size' if self.scan_type == SCAN_TYPE_CONTENT else 'audiosize'
|
||||
return engine.getmatches_by_contents(files, sizeattr, partial=self.scan_type==SCAN_TYPE_CONTENT_AUDIO, j=j)
|
||||
else:
|
||||
j = j.start_subjob([2, 8])
|
||||
kw = {}
|
||||
kw['match_similar_words'] = self.match_similar_words
|
||||
kw['weight_words'] = self.word_weighting
|
||||
kw['min_match_percentage'] = self.min_match_percentage
|
||||
if self.scan_type == SCAN_TYPE_FIELDS_NO_ORDER:
|
||||
self.scan_type = SCAN_TYPE_FIELDS
|
||||
kw['no_field_order'] = True
|
||||
func = {
|
||||
SCAN_TYPE_FILENAME: lambda f: engine.getwords(rem_file_ext(f.name)),
|
||||
SCAN_TYPE_FIELDS: lambda f: engine.getfields(rem_file_ext(f.name)),
|
||||
SCAN_TYPE_TAG: lambda f: [engine.getwords(unicode(getattr(f, attrname))) for attrname in SCANNABLE_TAGS if attrname in self.scanned_tags],
|
||||
}[self.scan_type]
|
||||
for f in j.iter_with_progress(files, 'Read metadata of %d/%d files'):
|
||||
f.words = func(f)
|
||||
return engine.getmatches(files, j=j, **kw)
|
||||
|
||||
@staticmethod
|
||||
def _key_func(dupe):
|
||||
@@ -86,10 +78,7 @@ class Scanner(object):
|
||||
for f in [f for f in files if not hasattr(f, 'is_ref')]:
|
||||
f.is_ref = False
|
||||
logging.info('Getting matches')
|
||||
if self.match_factory is None:
|
||||
matches = self._getmatches(files, j)
|
||||
else:
|
||||
matches = self.match_factory.getmatches(files, j)
|
||||
matches = self._getmatches(files, j)
|
||||
logging.info('Found %d matches' % len(matches))
|
||||
if not self.mix_file_kind:
|
||||
j.set_progress(100, 'Removing false matches')
|
||||
@@ -99,14 +88,6 @@ class Scanner(object):
|
||||
iter_matches = j.iter_with_progress(matches, 'Processed %d/%d matches against the ignore list')
|
||||
matches = [m for m in iter_matches
|
||||
if not self.ignore_list.AreIgnored(unicode(m.first.path), unicode(m.second.path))]
|
||||
if self.scan_type in (SCAN_TYPE_CONTENT, SCAN_TYPE_CONTENT_AUDIO):
|
||||
j = j.start_subjob(3 if self.scan_type == SCAN_TYPE_CONTENT else 2)
|
||||
matches = self._filter_matches_by_content(matches, partial=True, j=j)
|
||||
if self.scan_type == SCAN_TYPE_CONTENT:
|
||||
matches = self._filter_matches_by_content(matches, partial=False, j=j)
|
||||
# We compared md5. No words were involved.
|
||||
for m in matches:
|
||||
m.first.words = m.second.words = ['--']
|
||||
logging.info('Grouping matches')
|
||||
groups = engine.get_groups(matches, j)
|
||||
matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
|
||||
@@ -118,7 +99,6 @@ class Scanner(object):
|
||||
g.prioritize(self._key_func, self._tie_breaker)
|
||||
return groups
|
||||
|
||||
match_factory = None
|
||||
match_similar_words = False
|
||||
min_match_percentage = 80
|
||||
mix_file_kind = True
|
||||
@@ -126,9 +106,3 @@ class Scanner(object):
|
||||
scanned_tags = set(['artist', 'title'])
|
||||
size_threshold = 0
|
||||
word_weighting = False
|
||||
|
||||
class ScannerME(Scanner): # Scanner for Music Edition
|
||||
@staticmethod
|
||||
def _key_func(dupe):
|
||||
return (not dupe.is_ref, -dupe.bitrate, -dupe.size)
|
||||
|
||||
|
||||
@@ -18,10 +18,10 @@ from hsutil.path import Path
|
||||
from hsutil.testcase import TestCase
|
||||
from hsutil.decorators import log_calls
|
||||
from hsutil import io
|
||||
import hsfs.phys
|
||||
|
||||
from . import data
|
||||
from .results_test import GetTestGroups
|
||||
from .. import engine, data
|
||||
from .. import engine, fs
|
||||
try:
|
||||
from ..app_cocoa import DupeGuru as DupeGuruBase
|
||||
except ImportError:
|
||||
@@ -35,7 +35,6 @@ class DupeGuru(DupeGuruBase):
|
||||
def _start_job(self, jobid, func):
|
||||
func(nulljob)
|
||||
|
||||
|
||||
def r2np(rows):
|
||||
#Transforms a list of rows [1,2,3] into a list of node paths [[1],[2],[3]]
|
||||
return [[i] for i in rows]
|
||||
@@ -310,15 +309,15 @@ class TCDupeGuru(TestCase):
|
||||
|
||||
class TCDupeGuru_renameSelected(TestCase):
|
||||
def setUp(self):
|
||||
p = Path(tempfile.mkdtemp())
|
||||
fp = open(str(p + 'foo bar 1'),mode='w')
|
||||
p = self.tmppath()
|
||||
fp = open(unicode(p + 'foo bar 1'),mode='w')
|
||||
fp.close()
|
||||
fp = open(str(p + 'foo bar 2'),mode='w')
|
||||
fp = open(unicode(p + 'foo bar 2'),mode='w')
|
||||
fp.close()
|
||||
fp = open(str(p + 'foo bar 3'),mode='w')
|
||||
fp = open(unicode(p + 'foo bar 3'),mode='w')
|
||||
fp.close()
|
||||
refdir = hsfs.phys.Directory(None,str(p))
|
||||
matches = engine.MatchFactory().getmatches(refdir.files)
|
||||
files = fs.get_files(p)
|
||||
matches = engine.getmatches(files)
|
||||
groups = engine.get_groups(matches)
|
||||
g = groups[0]
|
||||
g.prioritize(lambda x:x.name)
|
||||
@@ -327,45 +326,41 @@ class TCDupeGuru_renameSelected(TestCase):
|
||||
self.app = app
|
||||
self.groups = groups
|
||||
self.p = p
|
||||
self.refdir = refdir
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(str(self.p))
|
||||
self.files = files
|
||||
|
||||
def test_simple(self):
|
||||
app = self.app
|
||||
refdir = self.refdir
|
||||
g = self.groups[0]
|
||||
app.SelectPowerMarkerNodePaths(r2np([0]))
|
||||
self.assert_(app.RenameSelected('renamed'))
|
||||
self.assert_('renamed' in refdir)
|
||||
self.assert_('foo bar 2' not in refdir)
|
||||
self.assert_(g.dupes[0] is refdir['renamed'])
|
||||
self.assert_(g.dupes[0] in refdir)
|
||||
assert app.RenameSelected('renamed')
|
||||
names = io.listdir(self.p)
|
||||
assert 'renamed' in names
|
||||
assert 'foo bar 2' not in names
|
||||
eq_(g.dupes[0].name, 'renamed')
|
||||
|
||||
def test_none_selected(self):
|
||||
app = self.app
|
||||
refdir = self.refdir
|
||||
g = self.groups[0]
|
||||
app.SelectPowerMarkerNodePaths([])
|
||||
self.mock(logging, 'warning', log_calls(lambda msg: None))
|
||||
self.assert_(not app.RenameSelected('renamed'))
|
||||
assert not app.RenameSelected('renamed')
|
||||
msg = logging.warning.calls[0]['msg']
|
||||
self.assertEqual('dupeGuru Warning: list index out of range', msg)
|
||||
self.assert_('renamed' not in refdir)
|
||||
self.assert_('foo bar 2' in refdir)
|
||||
self.assert_(g.dupes[0] is refdir['foo bar 2'])
|
||||
eq_('dupeGuru Warning: list index out of range', msg)
|
||||
names = io.listdir(self.p)
|
||||
assert 'renamed' not in names
|
||||
assert 'foo bar 2' in names
|
||||
eq_(g.dupes[0].name, 'foo bar 2')
|
||||
|
||||
def test_name_already_exists(self):
|
||||
app = self.app
|
||||
refdir = self.refdir
|
||||
g = self.groups[0]
|
||||
app.SelectPowerMarkerNodePaths(r2np([0]))
|
||||
self.mock(logging, 'warning', log_calls(lambda msg: None))
|
||||
self.assert_(not app.RenameSelected('foo bar 1'))
|
||||
assert not app.RenameSelected('foo bar 1')
|
||||
msg = logging.warning.calls[0]['msg']
|
||||
self.assert_(msg.startswith('dupeGuru Warning: \'foo bar 2\' already exists in'))
|
||||
self.assert_('foo bar 1' in refdir)
|
||||
self.assert_('foo bar 2' in refdir)
|
||||
self.assert_(g.dupes[0] is refdir['foo bar 2'])
|
||||
assert msg.startswith('dupeGuru Warning: \'foo bar 1\' already exists in')
|
||||
names = io.listdir(self.p)
|
||||
assert 'foo bar 1' in names
|
||||
assert 'foo bar 2' in names
|
||||
eq_(g.dupes[0].name, 'foo bar 2')
|
||||
|
||||
|
||||
@@ -13,12 +13,11 @@ from hsutil.testcase import TestCase
|
||||
from hsutil import io
|
||||
from hsutil.path import Path
|
||||
from hsutil.decorators import log_calls
|
||||
import hsfs as fs
|
||||
import hsfs.phys
|
||||
import hsutil.files
|
||||
from hsutil.job import nulljob
|
||||
|
||||
from .. import data, app
|
||||
from . import data
|
||||
from .. import app, fs
|
||||
from ..app import DupeGuru as DupeGuruBase
|
||||
|
||||
class DupeGuru(DupeGuruBase):
|
||||
@@ -59,27 +58,27 @@ class TCDupeGuru(TestCase):
|
||||
# The goal here is just to have a test for a previous blowup I had. I know my test coverage
|
||||
# for this unit is pathetic. What's done is done. My approach now is to add tests for
|
||||
# every change I want to make. The blowup was caused by a missing import.
|
||||
dupe_parent = fs.Directory(None, 'foo')
|
||||
dupe = fs.File(dupe_parent, 'bar')
|
||||
dupe.copy = log_calls(lambda dest, newname: None)
|
||||
p = self.tmppath()
|
||||
io.open(p + 'foo', 'w').close()
|
||||
self.mock(hsutil.files, 'copy', log_calls(lambda source_path, dest_path: None))
|
||||
self.mock(os, 'makedirs', lambda path: None) # We don't want the test to create that fake directory
|
||||
self.mock(fs.phys, 'Directory', fs.Directory) # We don't want an error because makedirs didn't work
|
||||
app = DupeGuru()
|
||||
app.copy_or_move(dupe, True, 'some_destination', 0)
|
||||
app.directories.add_path(p)
|
||||
[f] = app.directories.get_files()
|
||||
app.copy_or_move(f, True, 'some_destination', 0)
|
||||
self.assertEqual(1, len(hsutil.files.copy.calls))
|
||||
call = hsutil.files.copy.calls[0]
|
||||
self.assertEqual('some_destination', call['dest_path'])
|
||||
self.assertEqual(dupe.path, call['source_path'])
|
||||
self.assertEqual(f.path, call['source_path'])
|
||||
|
||||
def test_copy_or_move_clean_empty_dirs(self):
|
||||
tmppath = Path(self.tmpdir())
|
||||
sourcepath = tmppath + 'source'
|
||||
io.mkdir(sourcepath)
|
||||
io.open(sourcepath + 'myfile', 'w')
|
||||
tmpdir = hsfs.phys.Directory(None, unicode(tmppath))
|
||||
myfile = tmpdir['source']['myfile']
|
||||
app = DupeGuru()
|
||||
app.directories.add_path(tmppath)
|
||||
[myfile] = app.directories.get_files()
|
||||
self.mock(app, 'clean_empty_dirs', log_calls(lambda path: None))
|
||||
app.copy_or_move(myfile, False, tmppath + 'dest', 0)
|
||||
calls = app.clean_empty_dirs.calls
|
||||
@@ -87,9 +86,14 @@ class TCDupeGuru(TestCase):
|
||||
self.assertEqual(sourcepath, calls[0]['path'])
|
||||
|
||||
def test_Scan_with_objects_evaluating_to_false(self):
|
||||
class FakeFile(fs.File):
|
||||
def __nonzero__(self):
|
||||
return False
|
||||
|
||||
|
||||
# At some point, any() was used in a wrong way that made Scan() wrongly return 1
|
||||
app = DupeGuru()
|
||||
f1, f2 = [fs.File(None, 'foo') for i in range(2)]
|
||||
f1, f2 = [FakeFile('foo') for i in range(2)]
|
||||
f1.is_ref, f2.is_ref = (False, False)
|
||||
assert not (bool(f1) and bool(f2))
|
||||
app.directories.get_files = lambda: [f1, f2]
|
||||
|
||||
45
base/py/tests/data.py
Normal file
45
base/py/tests/data.py
Normal file
@@ -0,0 +1,45 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2009-10-23
|
||||
# $Id$
|
||||
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "HS" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.hardcoded.net/licenses/hs_license
|
||||
|
||||
# data module for tests
|
||||
|
||||
from hsutil.str import format_size
|
||||
from dupeguru.data import format_path, cmp_value
|
||||
|
||||
COLUMNS = [
|
||||
{'attr':'name','display':'Filename'},
|
||||
{'attr':'path','display':'Directory'},
|
||||
{'attr':'size','display':'Size (KB)'},
|
||||
{'attr':'extension','display':'Kind'},
|
||||
]
|
||||
|
||||
METADATA_TO_READ = ['size']
|
||||
|
||||
def GetDisplayInfo(dupe, group, delta):
|
||||
size = dupe.size
|
||||
m = group.get_match_of(dupe)
|
||||
if m and delta:
|
||||
r = group.ref
|
||||
size -= r.size
|
||||
return [
|
||||
dupe.name,
|
||||
format_path(dupe.path),
|
||||
format_size(size, 0, 1, False),
|
||||
dupe.extension,
|
||||
]
|
||||
|
||||
def GetDupeSortKey(dupe, get_group, key, delta):
|
||||
r = cmp_value(getattr(dupe, COLUMNS[key]['attr']))
|
||||
if delta and (key == 2):
|
||||
r -= cmp_value(getattr(get_group().ref, COLUMNS[key]['attr']))
|
||||
return r
|
||||
|
||||
def GetGroupSortKey(group, key):
|
||||
return cmp_value(getattr(group.ref, COLUMNS[key]['attr']))
|
||||
@@ -10,20 +10,43 @@
|
||||
import os.path as op
|
||||
import os
|
||||
import time
|
||||
import shutil
|
||||
|
||||
from nose.tools import eq_
|
||||
|
||||
from hsutil import job, io
|
||||
from hsutil import io
|
||||
from hsutil.path import Path
|
||||
from hsutil.testcase import TestCase
|
||||
import hsfs.phys
|
||||
from hsfs.tests import phys_test
|
||||
|
||||
from ..directories import *
|
||||
|
||||
testpath = Path(TestCase.datadirpath())
|
||||
|
||||
def create_fake_fs(rootpath):
|
||||
rootpath = rootpath + 'fs'
|
||||
io.mkdir(rootpath)
|
||||
io.mkdir(rootpath + 'dir1')
|
||||
io.mkdir(rootpath + 'dir2')
|
||||
io.mkdir(rootpath + 'dir3')
|
||||
fp = io.open(rootpath + 'file1.test', 'w')
|
||||
fp.write('1')
|
||||
fp.close()
|
||||
fp = io.open(rootpath + 'file2.test', 'w')
|
||||
fp.write('12')
|
||||
fp.close()
|
||||
fp = io.open(rootpath + 'file3.test', 'w')
|
||||
fp.write('123')
|
||||
fp.close()
|
||||
fp = io.open(rootpath + ('dir1', 'file1.test'), 'w')
|
||||
fp.write('1')
|
||||
fp.close()
|
||||
fp = io.open(rootpath + ('dir2', 'file2.test'), 'w')
|
||||
fp.write('12')
|
||||
fp.close()
|
||||
fp = io.open(rootpath + ('dir3', 'file3.test'), 'w')
|
||||
fp.write('123')
|
||||
fp.close()
|
||||
return rootpath
|
||||
|
||||
class TCDirectories(TestCase):
|
||||
def test_empty(self):
|
||||
d = Directories()
|
||||
@@ -33,13 +56,11 @@ class TCDirectories(TestCase):
|
||||
def test_add_path(self):
|
||||
d = Directories()
|
||||
p = testpath + 'utils'
|
||||
added = d.add_path(p)
|
||||
d.add_path(p)
|
||||
self.assertEqual(1,len(d))
|
||||
self.assert_(p in d)
|
||||
self.assert_((p + 'foobar') in d)
|
||||
self.assert_(p[:-1] not in d)
|
||||
self.assertEqual(p,added.path)
|
||||
self.assert_(d[0] is added)
|
||||
p = self.tmppath()
|
||||
d.add_path(p)
|
||||
self.assertEqual(2,len(d))
|
||||
@@ -53,13 +74,13 @@ class TCDirectories(TestCase):
|
||||
self.assertRaises(AlreadyThereError, d.add_path, p + 'foobar')
|
||||
self.assertEqual(1, len(d))
|
||||
|
||||
def test_AddPath_containing_paths_already_there(self):
|
||||
def test_add_path_containing_paths_already_there(self):
|
||||
d = Directories()
|
||||
d.add_path(testpath + 'utils')
|
||||
self.assertEqual(1, len(d))
|
||||
added = d.add_path(testpath)
|
||||
self.assertEqual(1, len(d))
|
||||
self.assert_(added is d[0])
|
||||
d.add_path(testpath)
|
||||
eq_(len(d), 1)
|
||||
eq_(d[0], testpath)
|
||||
|
||||
def test_AddPath_non_latin(self):
|
||||
p = Path(self.tmpdir())
|
||||
@@ -114,7 +135,7 @@ class TCDirectories(TestCase):
|
||||
|
||||
def test_set_state_keep_state_dict_size_to_minimum(self):
|
||||
d = Directories()
|
||||
p = Path(phys_test.create_fake_fs(self.tmpdir()))
|
||||
p = create_fake_fs(self.tmppath())
|
||||
d.add_path(p)
|
||||
d.set_state(p,STATE_REFERENCE)
|
||||
d.set_state(p + 'dir1',STATE_REFERENCE)
|
||||
@@ -129,7 +150,7 @@ class TCDirectories(TestCase):
|
||||
|
||||
def test_get_files(self):
|
||||
d = Directories()
|
||||
p = Path(phys_test.create_fake_fs(self.tmpdir()))
|
||||
p = create_fake_fs(self.tmppath())
|
||||
d.add_path(p)
|
||||
d.set_state(p + 'dir1',STATE_REFERENCE)
|
||||
d.set_state(p + 'dir2',STATE_EXCLUDED)
|
||||
@@ -177,52 +198,28 @@ class TCDirectories(TestCase):
|
||||
except LookupError:
|
||||
self.fail()
|
||||
|
||||
def test_default_dirclass(self):
|
||||
self.assert_(Directories().dirclass is hsfs.phys.Directory)
|
||||
|
||||
def test_dirclass(self):
|
||||
class MySpecialDirclass(hsfs.phys.Directory): pass
|
||||
d = Directories()
|
||||
d.dirclass = MySpecialDirclass
|
||||
d.add_path(testpath)
|
||||
self.assert_(isinstance(d[0], MySpecialDirclass))
|
||||
|
||||
def test_load_from_file_with_invalid_path(self):
|
||||
#This test simulates a load from file resulting in a
|
||||
#InvalidPath raise. Other directories must be loaded.
|
||||
d1 = Directories()
|
||||
d1.add_path(testpath + 'utils')
|
||||
#Will raise InvalidPath upon loading
|
||||
d1.add_path(self.tmppath()).name = 'does_not_exist'
|
||||
p = self.tmppath()
|
||||
d1.add_path(p)
|
||||
io.rmdir(p)
|
||||
tmpxml = op.join(self.tmpdir(), 'directories_testunit.xml')
|
||||
d1.save_to_file(tmpxml)
|
||||
d2 = Directories()
|
||||
d2.load_from_file(tmpxml)
|
||||
self.assertEqual(1, len(d2))
|
||||
|
||||
def test_load_from_file_with_same_paths(self):
|
||||
#This test simulates a load from file resulting in a
|
||||
#AlreadyExists raise. Other directories must be loaded.
|
||||
d1 = Directories()
|
||||
p1 = self.tmppath()
|
||||
p2 = self.tmppath()
|
||||
d1.add_path(p1)
|
||||
d1.add_path(p2)
|
||||
#Will raise AlreadyExists upon loading
|
||||
d1.add_path(self.tmppath()).name = unicode(p1)
|
||||
tmpxml = op.join(self.tmpdir(), 'directories_testunit.xml')
|
||||
d1.save_to_file(tmpxml)
|
||||
d2 = Directories()
|
||||
d2.load_from_file(tmpxml)
|
||||
self.assertEqual(2, len(d2))
|
||||
|
||||
def test_unicode_save(self):
|
||||
d = Directories()
|
||||
p1 = self.tmppath() + u'hello\xe9'
|
||||
io.mkdir(p1)
|
||||
io.mkdir(p1 + u'foo\xe9')
|
||||
d.add_path(p1)
|
||||
d.set_state(d[0][0].path, STATE_EXCLUDED)
|
||||
d.set_state(p1 + u'foo\xe9', STATE_EXCLUDED)
|
||||
tmpxml = op.join(self.tmpdir(), 'directories_testunit.xml')
|
||||
try:
|
||||
d.save_to_file(tmpxml)
|
||||
@@ -231,7 +228,7 @@ class TCDirectories(TestCase):
|
||||
|
||||
def test_get_files_refreshes_its_directories(self):
|
||||
d = Directories()
|
||||
p = Path(phys_test.create_fake_fs(self.tmpdir()))
|
||||
p = create_fake_fs(self.tmppath())
|
||||
d.add_path(p)
|
||||
files = d.get_files()
|
||||
self.assertEqual(6, len(list(files)))
|
||||
@@ -258,16 +255,6 @@ class TCDirectories(TestCase):
|
||||
d.set_state(hidden_dir_path, STATE_NORMAL)
|
||||
self.assertEqual(d.get_state(hidden_dir_path), STATE_NORMAL)
|
||||
|
||||
def test_special_dirclasses(self):
|
||||
# if a path is in special_dirclasses, use this class instead
|
||||
class MySpecialDirclass(hsfs.phys.Directory): pass
|
||||
d = Directories()
|
||||
p1 = self.tmppath()
|
||||
p2 = self.tmppath()
|
||||
d.special_dirclasses[p1] = MySpecialDirclass
|
||||
self.assert_(isinstance(d.add_path(p2), hsfs.phys.Directory))
|
||||
self.assert_(isinstance(d.add_path(p1), MySpecialDirclass))
|
||||
|
||||
def test_default_path_state_override(self):
|
||||
# It's possible for a subclass to override the default state of a path
|
||||
class MyDirectories(Directories):
|
||||
|
||||
@@ -340,21 +340,13 @@ class TCget_match(TestCase):
|
||||
self.assertEqual(int((6.0 / 13.0) * 100),get_match(NamedObject("foo bar",True),NamedObject("bar bleh",True),(WEIGHT_WORDS,)).percentage)
|
||||
|
||||
|
||||
class TCMatchFactory(TestCase):
|
||||
class GetMatches(TestCase):
|
||||
def test_empty(self):
|
||||
self.assertEqual([],MatchFactory().getmatches([]))
|
||||
|
||||
def test_defaults(self):
|
||||
mf = MatchFactory()
|
||||
self.assertEqual(50,mf.common_word_threshold)
|
||||
self.assertEqual(False,mf.weight_words)
|
||||
self.assertEqual(False,mf.match_similar_words)
|
||||
self.assertEqual(False,mf.no_field_order)
|
||||
self.assertEqual(0,mf.min_match_percentage)
|
||||
eq_(getmatches([]), [])
|
||||
|
||||
def test_simple(self):
|
||||
l = [NamedObject("foo bar"),NamedObject("bar bleh"),NamedObject("a b c foo")]
|
||||
r = MatchFactory().getmatches(l)
|
||||
r = getmatches(l)
|
||||
self.assertEqual(2,len(r))
|
||||
seek = [m for m in r if m.percentage == 50] #"foo bar" and "bar bleh"
|
||||
m = seek[0]
|
||||
@@ -367,7 +359,7 @@ class TCMatchFactory(TestCase):
|
||||
|
||||
def test_null_and_unrelated_objects(self):
|
||||
l = [NamedObject("foo bar"),NamedObject("bar bleh"),NamedObject(""),NamedObject("unrelated object")]
|
||||
r = MatchFactory().getmatches(l)
|
||||
r = getmatches(l)
|
||||
self.assertEqual(1,len(r))
|
||||
m = r[0]
|
||||
self.assertEqual(50,m.percentage)
|
||||
@@ -376,34 +368,33 @@ class TCMatchFactory(TestCase):
|
||||
|
||||
def test_twice_the_same_word(self):
|
||||
l = [NamedObject("foo foo bar"),NamedObject("bar bleh")]
|
||||
r = MatchFactory().getmatches(l)
|
||||
r = getmatches(l)
|
||||
self.assertEqual(1,len(r))
|
||||
|
||||
def test_twice_the_same_word_when_preworded(self):
|
||||
l = [NamedObject("foo foo bar",True),NamedObject("bar bleh",True)]
|
||||
r = MatchFactory().getmatches(l)
|
||||
r = getmatches(l)
|
||||
self.assertEqual(1,len(r))
|
||||
|
||||
def test_two_words_match(self):
|
||||
l = [NamedObject("foo bar"),NamedObject("foo bar bleh")]
|
||||
r = MatchFactory().getmatches(l)
|
||||
r = getmatches(l)
|
||||
self.assertEqual(1,len(r))
|
||||
|
||||
def test_match_files_with_only_common_words(self):
|
||||
#If a word occurs more than 50 times, it is excluded from the matching process
|
||||
#The problem with the common_word_threshold is that the files containing only common
|
||||
#words will never be matched together. We *should* match them.
|
||||
mf = MatchFactory()
|
||||
mf.common_word_threshold = 50
|
||||
# This test assumes that the common word threashold const is 50
|
||||
l = [NamedObject("foo") for i in range(50)]
|
||||
r = mf.getmatches(l)
|
||||
r = getmatches(l)
|
||||
self.assertEqual(1225,len(r))
|
||||
|
||||
def test_use_words_already_there_if_there(self):
|
||||
o1 = NamedObject('foo')
|
||||
o2 = NamedObject('bar')
|
||||
o2.words = ['foo']
|
||||
self.assertEqual(1,len(MatchFactory().getmatches([o1,o2])))
|
||||
eq_(1, len(getmatches([o1,o2])))
|
||||
|
||||
def test_job(self):
|
||||
def do_progress(p,d=''):
|
||||
@@ -413,75 +404,62 @@ class TCMatchFactory(TestCase):
|
||||
j = job.Job(1,do_progress)
|
||||
self.log = []
|
||||
s = "foo bar"
|
||||
MatchFactory().getmatches([NamedObject(s),NamedObject(s),NamedObject(s)],j)
|
||||
getmatches([NamedObject(s), NamedObject(s), NamedObject(s)], j=j)
|
||||
self.assert_(len(self.log) > 2)
|
||||
self.assertEqual(0,self.log[0])
|
||||
self.assertEqual(100,self.log[-1])
|
||||
|
||||
def test_weight_words(self):
|
||||
mf = MatchFactory()
|
||||
mf.weight_words = True
|
||||
l = [NamedObject("foo bar"),NamedObject("bar bleh")]
|
||||
m = mf.getmatches(l)[0]
|
||||
m = getmatches(l, weight_words=True)[0]
|
||||
self.assertEqual(int((6.0 / 13.0) * 100),m.percentage)
|
||||
|
||||
def test_similar_word(self):
|
||||
mf = MatchFactory()
|
||||
mf.match_similar_words = True
|
||||
l = [NamedObject("foobar"),NamedObject("foobars")]
|
||||
self.assertEqual(1,len(mf.getmatches(l)))
|
||||
self.assertEqual(100,mf.getmatches(l)[0].percentage)
|
||||
eq_(len(getmatches(l, match_similar_words=True)), 1)
|
||||
eq_(getmatches(l, match_similar_words=True)[0].percentage, 100)
|
||||
l = [NamedObject("foobar"),NamedObject("foo")]
|
||||
self.assertEqual(0,len(mf.getmatches(l))) #too far
|
||||
eq_(len(getmatches(l, match_similar_words=True)), 0) #too far
|
||||
l = [NamedObject("bizkit"),NamedObject("bizket")]
|
||||
self.assertEqual(1,len(mf.getmatches(l)))
|
||||
eq_(len(getmatches(l, match_similar_words=True)), 1)
|
||||
l = [NamedObject("foobar"),NamedObject("foosbar")]
|
||||
self.assertEqual(1,len(mf.getmatches(l)))
|
||||
eq_(len(getmatches(l, match_similar_words=True)), 1)
|
||||
|
||||
def test_single_object_with_similar_words(self):
|
||||
mf = MatchFactory()
|
||||
mf.match_similar_words = True
|
||||
l = [NamedObject("foo foos")]
|
||||
self.assertEqual(0,len(mf.getmatches(l)))
|
||||
eq_(len(getmatches(l, match_similar_words=True)), 0)
|
||||
|
||||
def test_double_words_get_counted_only_once(self):
|
||||
mf = MatchFactory()
|
||||
l = [NamedObject("foo bar foo bleh"),NamedObject("foo bar bleh bar")]
|
||||
m = mf.getmatches(l)[0]
|
||||
m = getmatches(l)[0]
|
||||
self.assertEqual(75,m.percentage)
|
||||
|
||||
def test_with_fields(self):
|
||||
mf = MatchFactory()
|
||||
o1 = NamedObject("foo bar - foo bleh")
|
||||
o2 = NamedObject("foo bar - bleh bar")
|
||||
o1.words = getfields(o1.name)
|
||||
o2.words = getfields(o2.name)
|
||||
m = mf.getmatches([o1, o2])[0]
|
||||
m = getmatches([o1, o2])[0]
|
||||
self.assertEqual(50, m.percentage)
|
||||
|
||||
def test_with_fields_no_order(self):
|
||||
mf = MatchFactory()
|
||||
mf.no_field_order = True
|
||||
o1 = NamedObject("foo bar - foo bleh")
|
||||
o2 = NamedObject("bleh bang - foo bar")
|
||||
o1.words = getfields(o1.name)
|
||||
o2.words = getfields(o2.name)
|
||||
m = mf.getmatches([o1, o2])[0]
|
||||
self.assertEqual(50 ,m.percentage)
|
||||
m = getmatches([o1, o2], no_field_order=True)[0]
|
||||
eq_(m.percentage, 50)
|
||||
|
||||
def test_only_match_similar_when_the_option_is_set(self):
|
||||
mf = MatchFactory()
|
||||
mf.match_similar_words = False
|
||||
l = [NamedObject("foobar"),NamedObject("foobars")]
|
||||
self.assertEqual(0,len(mf.getmatches(l)))
|
||||
eq_(len(getmatches(l, match_similar_words=False)), 0)
|
||||
|
||||
def test_dont_recurse_do_match(self):
|
||||
# with nosetests, the stack is increased. The number has to be high enough not to be failing falsely
|
||||
sys.setrecursionlimit(100)
|
||||
mf = MatchFactory()
|
||||
files = [NamedObject('foo bar') for i in range(101)]
|
||||
try:
|
||||
mf.getmatches(files)
|
||||
getmatches(files)
|
||||
except RuntimeError:
|
||||
self.fail()
|
||||
finally:
|
||||
@@ -489,18 +467,9 @@ class TCMatchFactory(TestCase):
|
||||
|
||||
def test_min_match_percentage(self):
|
||||
l = [NamedObject("foo bar"),NamedObject("bar bleh"),NamedObject("a b c foo")]
|
||||
mf = MatchFactory()
|
||||
mf.min_match_percentage = 50
|
||||
r = mf.getmatches(l)
|
||||
r = getmatches(l, min_match_percentage=50)
|
||||
self.assertEqual(1,len(r)) #Only "foo bar" / "bar bleh" should match
|
||||
|
||||
def test_limit(self):
|
||||
l = [NamedObject(),NamedObject(),NamedObject()]
|
||||
mf = MatchFactory()
|
||||
mf.limit = 2
|
||||
r = mf.getmatches(l)
|
||||
self.assertEqual(2,len(r))
|
||||
|
||||
def test_MemoryError(self):
|
||||
@log_calls
|
||||
def mocked_match(first, second, flags):
|
||||
@@ -510,9 +479,8 @@ class TCMatchFactory(TestCase):
|
||||
|
||||
objects = [NamedObject() for i in range(10)] # results in 45 matches
|
||||
self.mock(engine, 'get_match', mocked_match)
|
||||
mf = MatchFactory()
|
||||
try:
|
||||
r = mf.getmatches(objects)
|
||||
r = getmatches(objects)
|
||||
except MemoryError:
|
||||
self.fail('MemorryError must be handled')
|
||||
self.assertEqual(42, len(r))
|
||||
@@ -738,7 +706,7 @@ class TCget_groups(TestCase):
|
||||
|
||||
def test_simple(self):
|
||||
l = [NamedObject("foo bar"),NamedObject("bar bleh")]
|
||||
matches = MatchFactory().getmatches(l)
|
||||
matches = getmatches(l)
|
||||
m = matches[0]
|
||||
r = get_groups(matches)
|
||||
self.assertEqual(1,len(r))
|
||||
@@ -749,7 +717,7 @@ class TCget_groups(TestCase):
|
||||
def test_group_with_multiple_matches(self):
|
||||
#This results in 3 matches
|
||||
l = [NamedObject("foo"),NamedObject("foo"),NamedObject("foo")]
|
||||
matches = MatchFactory().getmatches(l)
|
||||
matches = getmatches(l)
|
||||
r = get_groups(matches)
|
||||
self.assertEqual(1,len(r))
|
||||
g = r[0]
|
||||
@@ -759,7 +727,7 @@ class TCget_groups(TestCase):
|
||||
l = [NamedObject("a b"),NamedObject("a b"),NamedObject("b c"),NamedObject("c d"),NamedObject("c d")]
|
||||
#There will be 2 groups here: group "a b" and group "c d"
|
||||
#"b c" can go either of them, but not both.
|
||||
matches = MatchFactory().getmatches(l)
|
||||
matches = getmatches(l)
|
||||
r = get_groups(matches)
|
||||
self.assertEqual(2,len(r))
|
||||
self.assertEqual(5,len(r[0])+len(r[1]))
|
||||
@@ -768,7 +736,7 @@ class TCget_groups(TestCase):
|
||||
l = [NamedObject("a b"),NamedObject("a b"),NamedObject("a b"),NamedObject("a b")]
|
||||
#There will be 2 groups here: group "a b" and group "c d"
|
||||
#"b c" can fit in both, but it must be in only one of them
|
||||
matches = MatchFactory().getmatches(l)
|
||||
matches = getmatches(l)
|
||||
r = get_groups(matches)
|
||||
self.assertEqual(1,len(r))
|
||||
|
||||
@@ -788,7 +756,7 @@ class TCget_groups(TestCase):
|
||||
|
||||
def test_four_sized_group(self):
|
||||
l = [NamedObject("foobar") for i in xrange(4)]
|
||||
m = MatchFactory().getmatches(l)
|
||||
m = getmatches(l)
|
||||
r = get_groups(m)
|
||||
self.assertEqual(1,len(r))
|
||||
self.assertEqual(4,len(r[0]))
|
||||
|
||||
@@ -16,8 +16,8 @@ from hsutil.path import Path
|
||||
from hsutil.testcase import TestCase
|
||||
from hsutil.misc import first
|
||||
|
||||
from . import engine_test
|
||||
from .. import data, engine
|
||||
from . import engine_test, data
|
||||
from .. import engine
|
||||
from ..results import *
|
||||
|
||||
class NamedObject(engine_test.NamedObject):
|
||||
@@ -37,7 +37,7 @@ class NamedObject(engine_test.NamedObject):
|
||||
def GetTestGroups():
|
||||
objects = [NamedObject("foo bar"),NamedObject("bar bleh"),NamedObject("foo bleh"),NamedObject("ibabtu"),NamedObject("ibabtu")]
|
||||
objects[1].size = 1024
|
||||
matches = engine.MatchFactory().getmatches(objects) #we should have 5 matches
|
||||
matches = engine.getmatches(objects) #we should have 5 matches
|
||||
groups = engine.get_groups(matches) #We should have 2 groups
|
||||
for g in groups:
|
||||
g.prioritize(lambda x:objects.index(x)) #We want the dupes to be in the same order as the list is
|
||||
@@ -505,7 +505,7 @@ class TCResultsXML(TestCase):
|
||||
return objects[1]
|
||||
|
||||
objects = [NamedObject(u"\xe9foo bar",True),NamedObject("bar bleh",True)]
|
||||
matches = engine.MatchFactory().getmatches(objects) #we should have 5 matches
|
||||
matches = engine.getmatches(objects) #we should have 5 matches
|
||||
groups = engine.get_groups(matches) #We should have 2 groups
|
||||
for g in groups:
|
||||
g.prioritize(lambda x:objects.index(x)) #We want the dupes to be in the same order as the list is
|
||||
|
||||
@@ -132,8 +132,6 @@ def test_content_scan_doesnt_put_md5_in_words_at_the_end():
|
||||
f[1].md5 = f[1].md5partial = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
|
||||
r = s.GetDupeGroups(f)
|
||||
g = r[0]
|
||||
eq_(g.ref.words, ['--'])
|
||||
eq_(g.dupes[0].words, ['--'])
|
||||
|
||||
def test_extension_is_not_counted_in_filename_scan():
|
||||
s = Scanner()
|
||||
@@ -369,23 +367,6 @@ def test_ignore_list_checks_for_unicode():
|
||||
assert f2 in g
|
||||
assert f3 in g
|
||||
|
||||
def test_custom_match_factory():
|
||||
class MatchFactory(object):
|
||||
def getmatches(self, objects, j=None):
|
||||
return [Match(objects[0], objects[1], 420)]
|
||||
|
||||
|
||||
s = Scanner()
|
||||
s.match_factory = MatchFactory()
|
||||
o1, o2 = no('foo'), no('bar')
|
||||
groups = s.GetDupeGroups([o1, o2])
|
||||
eq_(len(groups), 1)
|
||||
g = groups[0]
|
||||
eq_(len(g), 2)
|
||||
g.switch_ref(o1)
|
||||
m = g.get_match_of(o2)
|
||||
eq_(m, (o1, o2, 420))
|
||||
|
||||
def test_file_evaluates_to_false():
|
||||
# A very wrong way to use any() was added at some point, causing resulting group list
|
||||
# to be empty.
|
||||
@@ -455,15 +436,3 @@ def test_partial_group_match():
|
||||
assert o2 in group
|
||||
assert o3 not in group
|
||||
eq_(s.discarded_file_count, 1)
|
||||
|
||||
|
||||
#--- Scanner ME
|
||||
def test_priorize_me():
|
||||
# in ScannerME, bitrate goes first (right after is_ref) in priorization
|
||||
s = ScannerME()
|
||||
o1, o2 = no('foo'), no('foo')
|
||||
o1.bitrate = 1
|
||||
o2.bitrate = 2
|
||||
[group] = s.GetDupeGroups([o1, o2])
|
||||
assert group.ref is o2
|
||||
|
||||
|
||||
@@ -16,10 +16,10 @@ import os.path as op
|
||||
from PyQt4.QtCore import Qt, QTimer, QObject, QCoreApplication, QUrl, SIGNAL
|
||||
from PyQt4.QtGui import QProgressDialog, QDesktopServices, QFileDialog, QDialog, QMessageBox
|
||||
|
||||
import hsfs as fs
|
||||
from hsutil import job
|
||||
from hsutil.reg import RegistrationRequired
|
||||
|
||||
from dupeguru import fs
|
||||
from dupeguru.app import (DupeGuru as DupeGuruBase, JOB_SCAN, JOB_LOAD, JOB_MOVE, JOB_COPY,
|
||||
JOB_DELETE)
|
||||
|
||||
@@ -145,6 +145,7 @@ class DupeGuru(DupeGuruBase, QObject):
|
||||
|
||||
def ask_for_reg_code(self):
|
||||
if self.reg.ask_for_code():
|
||||
#XXX bug???
|
||||
self._setup_ui_as_registered()
|
||||
|
||||
@demo_method
|
||||
|
||||
@@ -47,7 +47,14 @@ class DirectoryNode(TreeNode):
|
||||
return DirectoryNode(self.model, self, ref, row)
|
||||
|
||||
def _getChildren(self):
|
||||
return self.ref.dirs
|
||||
return self.model._dirs.get_subfolders(self.ref)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
if self.parent is not None:
|
||||
return self.ref[-1]
|
||||
else:
|
||||
return unicode(self.ref)
|
||||
|
||||
|
||||
class DirectoriesModel(TreeModel):
|
||||
@@ -70,13 +77,13 @@ class DirectoriesModel(TreeModel):
|
||||
node = index.internalPointer()
|
||||
if role == Qt.DisplayRole:
|
||||
if index.column() == 0:
|
||||
return node.ref.name
|
||||
return node.name
|
||||
else:
|
||||
return STATES[self._dirs.get_state(node.ref.path)]
|
||||
return STATES[self._dirs.get_state(node.ref)]
|
||||
elif role == Qt.EditRole and index.column() == 1:
|
||||
return self._dirs.get_state(node.ref.path)
|
||||
return self._dirs.get_state(node.ref)
|
||||
elif role == Qt.ForegroundRole:
|
||||
state = self._dirs.get_state(node.ref.path)
|
||||
state = self._dirs.get_state(node.ref)
|
||||
if state == 1:
|
||||
return QBrush(Qt.blue)
|
||||
elif state == 2:
|
||||
@@ -101,6 +108,6 @@ class DirectoriesModel(TreeModel):
|
||||
if not index.isValid() or role != Qt.EditRole or index.column() != 1:
|
||||
return False
|
||||
node = index.internalPointer()
|
||||
self._dirs.set_state(node.ref.path, value)
|
||||
self._dirs.set_state(node.ref, value)
|
||||
return True
|
||||
|
||||
|
||||
@@ -8,12 +8,13 @@
|
||||
import objc
|
||||
from AppKit import *
|
||||
|
||||
from dupeguru import app_me_cocoa, scanner
|
||||
from dupeguru_me.app_cocoa import DupeGuruME
|
||||
from dupeguru.scanner import (SCAN_TYPE_FILENAME, SCAN_TYPE_FIELDS, SCAN_TYPE_FIELDS_NO_ORDER,
|
||||
SCAN_TYPE_TAG, SCAN_TYPE_CONTENT, SCAN_TYPE_CONTENT_AUDIO)
|
||||
|
||||
# Fix py2app imports which chokes on relative imports
|
||||
from dupeguru import app, app_cocoa, data, directories, engine, export, ignore, results, scanner
|
||||
from hsfs import auto, stats, tree, music
|
||||
from hsfs.phys import music
|
||||
from dupeguru_me import app_cocoa, data, fs, scanner
|
||||
from dupeguru import app, app_cocoa, data, directories, engine, export, ignore, results, scanner, fs
|
||||
from hsmedia import aiff, flac, genres, id3v1, id3v2, mp4, mpeg, ogg, wma
|
||||
from hsutil import conflict
|
||||
|
||||
@@ -23,7 +24,7 @@ class PyApp(NSObject):
|
||||
class PyDupeGuru(PyApp):
|
||||
def init(self):
|
||||
self = super(PyDupeGuru,self).init()
|
||||
self.app = app_me_cocoa.DupeGuruME()
|
||||
self.app = DupeGuruME()
|
||||
return self
|
||||
|
||||
#---Directories
|
||||
@@ -180,12 +181,12 @@ class PyDupeGuru(PyApp):
|
||||
def setScanType_(self, scan_type):
|
||||
try:
|
||||
self.app.scanner.scan_type = [
|
||||
scanner.SCAN_TYPE_FILENAME,
|
||||
scanner.SCAN_TYPE_FIELDS,
|
||||
scanner.SCAN_TYPE_FIELDS_NO_ORDER,
|
||||
scanner.SCAN_TYPE_TAG,
|
||||
scanner.SCAN_TYPE_CONTENT,
|
||||
scanner.SCAN_TYPE_CONTENT_AUDIO
|
||||
SCAN_TYPE_FILENAME,
|
||||
SCAN_TYPE_FIELDS,
|
||||
SCAN_TYPE_FIELDS_NO_ORDER,
|
||||
SCAN_TYPE_TAG,
|
||||
SCAN_TYPE_CONTENT,
|
||||
SCAN_TYPE_CONTENT_AUDIO
|
||||
][scan_type]
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
0
me/py/__init__.py
Normal file
0
me/py/__init__.py
Normal file
@@ -7,29 +7,29 @@
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.hardcoded.net/licenses/hs_license
|
||||
|
||||
import os.path as op
|
||||
import logging
|
||||
from appscript import app, k, CommandError
|
||||
import time
|
||||
|
||||
from hsutil.cocoa import as_fetch
|
||||
import hsfs.phys.music
|
||||
|
||||
import app_cocoa, data_me, scanner
|
||||
from dupeguru.app_cocoa import JOBID2TITLE, DupeGuru as DupeGuruBase
|
||||
|
||||
from . import data, scanner, fs
|
||||
|
||||
JOB_REMOVE_DEAD_TRACKS = 'jobRemoveDeadTracks'
|
||||
JOB_SCAN_DEAD_TRACKS = 'jobScanDeadTracks'
|
||||
|
||||
app_cocoa.JOBID2TITLE.update({
|
||||
JOBID2TITLE.update({
|
||||
JOB_REMOVE_DEAD_TRACKS: "Removing dead tracks from your iTunes Library",
|
||||
JOB_SCAN_DEAD_TRACKS: "Scanning the iTunes Library",
|
||||
})
|
||||
|
||||
class DupeGuruME(app_cocoa.DupeGuru):
|
||||
class DupeGuruME(DupeGuruBase):
|
||||
def __init__(self):
|
||||
app_cocoa.DupeGuru.__init__(self, data_me, 'dupeGuru Music Edition', appid=1)
|
||||
DupeGuruBase.__init__(self, data, 'dupeGuru Music Edition', appid=1)
|
||||
self.scanner = scanner.ScannerME()
|
||||
self.directories.dirclass = hsfs.phys.music.Directory
|
||||
self.directories.fileclasses = [fs.Mp3File, fs.Mp4File, fs.WmaFile, fs.OggFile, fs.FlacFile, fs.AiffFile]
|
||||
self.dead_tracks = []
|
||||
|
||||
def remove_dead_tracks(self):
|
||||
@@ -8,7 +8,7 @@
|
||||
# http://www.hardcoded.net/licenses/hs_license
|
||||
|
||||
from hsutil.str import format_time, FT_MINUTES, format_size
|
||||
from .data import (format_path, format_timestamp, format_words, format_perc,
|
||||
from dupeguru.data import (format_path, format_timestamp, format_words, format_perc,
|
||||
format_dupe_count, cmp_value)
|
||||
|
||||
COLUMNS = [
|
||||
@@ -76,7 +76,7 @@ def GetDisplayInfo(dupe, group, delta):
|
||||
str(dupe.track),
|
||||
dupe.comment,
|
||||
format_perc(percentage),
|
||||
format_words(dupe.words),
|
||||
format_words(dupe.words) if hasattr(dupe, 'words') else '',
|
||||
format_dupe_count(dupe_count)
|
||||
]
|
||||
|
||||
183
me/py/fs.py
Normal file
183
me/py/fs.py
Normal file
@@ -0,0 +1,183 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2009-10-23
|
||||
# $Id$
|
||||
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "HS" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.hardcoded.net/licenses/hs_license
|
||||
|
||||
from hsmedia import mpeg, wma, mp4, ogg, flac, aiff
|
||||
from hsutil.str import get_file_ext
|
||||
from dupeguru import fs
|
||||
|
||||
TAG_FIELDS = ['audiosize', 'duration', 'bitrate', 'samplerate', 'title', 'artist',
|
||||
'album', 'genre', 'year', 'track', 'comment']
|
||||
|
||||
class MusicFile(fs.File):
|
||||
INITIAL_INFO = fs.File.INITIAL_INFO.copy()
|
||||
INITIAL_INFO.update({
|
||||
'audiosize': 0,
|
||||
'bitrate' : 0,
|
||||
'duration' : 0,
|
||||
'samplerate':0,
|
||||
'artist' : '',
|
||||
'album' : '',
|
||||
'title' : '',
|
||||
'genre' : '',
|
||||
'comment' : '',
|
||||
'year' : '',
|
||||
'track' : 0,
|
||||
})
|
||||
HANDLED_EXTS = set()
|
||||
|
||||
@classmethod
|
||||
def can_handle(cls, path):
|
||||
if not fs.File.can_handle(path):
|
||||
return False
|
||||
return get_file_ext(path[-1]) in cls.HANDLED_EXTS
|
||||
|
||||
|
||||
class Mp3File(MusicFile):
|
||||
HANDLED_EXTS = set(['mp3'])
|
||||
def _read_info(self, field):
|
||||
if field == 'md5partial':
|
||||
fileinfo = mpeg.Mpeg(unicode(self.path))
|
||||
self._md5partial_offset = fileinfo.audio_offset
|
||||
self._md5partial_size = fileinfo.audio_size
|
||||
MusicFile._read_info(self, field)
|
||||
if field in TAG_FIELDS:
|
||||
fileinfo = mpeg.Mpeg(unicode(self.path))
|
||||
self.audiosize = fileinfo.audio_size
|
||||
self.bitrate = fileinfo.bitrate
|
||||
self.duration = fileinfo.duration
|
||||
self.samplerate = fileinfo.sample_rate
|
||||
i1 = fileinfo.id3v1
|
||||
# id3v1, even when non-existant, gives empty values. not id3v2. if id3v2 don't exist,
|
||||
# just replace it with id3v1
|
||||
i2 = fileinfo.id3v2
|
||||
if not i2.exists:
|
||||
i2 = i1
|
||||
self.artist = i2.artist or i1.artist
|
||||
self.album = i2.album or i1.album
|
||||
self.title = i2.title or i1.title
|
||||
self.genre = i2.genre or i1.genre
|
||||
self.comment = i2.comment or i1.comment
|
||||
self.year = i2.year or i1.year
|
||||
self.track = i2.track or i1.track
|
||||
|
||||
class WmaFile(MusicFile):
|
||||
HANDLED_EXTS = set(['wma'])
|
||||
def _read_info(self, field):
|
||||
if field == 'md5partial':
|
||||
dec = wma.WMADecoder(unicode(self.path))
|
||||
self._md5partial_offset = dec.audio_offset
|
||||
self._md5partial_size = dec.audio_size
|
||||
MusicFile._read_info(self, field)
|
||||
if field in TAG_FIELDS:
|
||||
dec = wma.WMADecoder(unicode(self.path))
|
||||
self.audiosize = dec.audio_size
|
||||
self.bitrate = dec.bitrate
|
||||
self.duration = dec.duration
|
||||
self.samplerate = dec.sample_rate
|
||||
self.artist = dec.artist
|
||||
self.album = dec.album
|
||||
self.title = dec.title
|
||||
self.genre = dec.genre
|
||||
self.comment = dec.comment
|
||||
self.year = dec.year
|
||||
self.track = dec.track
|
||||
|
||||
class Mp4File(MusicFile):
|
||||
HANDLED_EXTS = set(['m4a', 'm4p'])
|
||||
def _read_info(self, field):
|
||||
if field == 'md5partial':
|
||||
dec = mp4.File(unicode(self.path))
|
||||
self._md5partial_offset = dec.audio_offset
|
||||
self._md5partial_size = dec.audio_size
|
||||
dec.close()
|
||||
MusicFile._read_info(self, field)
|
||||
if field in TAG_FIELDS:
|
||||
dec = mp4.File(unicode(self.path))
|
||||
self.audiosize = dec.audio_size
|
||||
self.bitrate = dec.bitrate
|
||||
self.duration = dec.duration
|
||||
self.samplerate = dec.sample_rate
|
||||
self.artist = dec.artist
|
||||
self.album = dec.album
|
||||
self.title = dec.title
|
||||
self.genre = dec.genre
|
||||
self.comment = dec.comment
|
||||
self.year = dec.year
|
||||
self.track = dec.track
|
||||
dec.close()
|
||||
|
||||
class OggFile(MusicFile):
|
||||
HANDLED_EXTS = set(['ogg'])
|
||||
def _read_info(self, field):
|
||||
if field == 'md5partial':
|
||||
dec = ogg.Vorbis(unicode(self.path))
|
||||
self._md5partial_offset = dec.audio_offset
|
||||
self._md5partial_size = dec.audio_size
|
||||
MusicFile._read_info(self, field)
|
||||
if field in TAG_FIELDS:
|
||||
dec = ogg.Vorbis(unicode(self.path))
|
||||
self.audiosize = dec.audio_size
|
||||
self.bitrate = dec.bitrate
|
||||
self.duration = dec.duration
|
||||
self.samplerate = dec.sample_rate
|
||||
self.artist = dec.artist
|
||||
self.album = dec.album
|
||||
self.title = dec.title
|
||||
self.genre = dec.genre
|
||||
self.comment = dec.comment
|
||||
self.year = dec.year
|
||||
self.track = dec.track
|
||||
|
||||
class FlacFile(MusicFile):
|
||||
HANDLED_EXTS = set(['flac'])
|
||||
def _read_info(self, field):
|
||||
if field == 'md5partial':
|
||||
dec = flac.FLAC(unicode(self.path))
|
||||
self._md5partial_offset = dec.audio_offset
|
||||
self._md5partial_size = dec.audio_size
|
||||
MusicFile._read_info(self, field)
|
||||
if field in TAG_FIELDS:
|
||||
dec = flac.FLAC(unicode(self.path))
|
||||
self.audiosize = dec.audio_size
|
||||
self.bitrate = dec.bitrate
|
||||
self.duration = dec.duration
|
||||
self.samplerate = dec.sample_rate
|
||||
self.artist = dec.artist
|
||||
self.album = dec.album
|
||||
self.title = dec.title
|
||||
self.genre = dec.genre
|
||||
self.comment = dec.comment
|
||||
self.year = dec.year
|
||||
self.track = dec.track
|
||||
|
||||
class AiffFile(MusicFile):
|
||||
HANDLED_EXTS = set(['aif', 'aiff', 'aifc'])
|
||||
def _read_info(self, field):
|
||||
if field == 'md5partial':
|
||||
dec = aiff.File(unicode(self.path))
|
||||
self._md5partial_offset = dec.audio_offset
|
||||
self._md5partial_size = dec.audio_size
|
||||
MusicFile._read_info(self, field)
|
||||
if field in TAG_FIELDS:
|
||||
dec = aiff.File(unicode(self.path))
|
||||
self.audiosize = dec.audio_size
|
||||
self.bitrate = dec.bitrate
|
||||
self.duration = dec.duration
|
||||
self.samplerate = dec.sample_rate
|
||||
tag = dec.tag
|
||||
if tag is not None:
|
||||
self.artist = tag.artist
|
||||
self.album = tag.album
|
||||
self.title = tag.title
|
||||
self.genre = tag.genre
|
||||
self.comment = tag.comment
|
||||
self.year = tag.year
|
||||
self.track = tag.track
|
||||
|
||||
16
me/py/scanner.py
Normal file
16
me/py/scanner.py
Normal file
@@ -0,0 +1,16 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2006/03/03
|
||||
# $Id$
|
||||
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "HS" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.hardcoded.net/licenses/hs_license
|
||||
|
||||
from dupeguru.scanner import Scanner as ScannerBase
|
||||
|
||||
class ScannerME(ScannerBase):
|
||||
@staticmethod
|
||||
def _key_func(dupe):
|
||||
return (not dupe.is_ref, -dupe.bitrate, -dupe.size)
|
||||
|
||||
0
me/py/tests/__init__.py
Normal file
0
me/py/tests/__init__.py
Normal file
33
me/py/tests/scanner_test.py
Normal file
33
me/py/tests/scanner_test.py
Normal file
@@ -0,0 +1,33 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2009-10-23
|
||||
# $Id$
|
||||
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "HS" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.hardcoded.net/licenses/hs_license
|
||||
|
||||
from hsutil.path import Path
|
||||
|
||||
from dupeguru.engine import getwords
|
||||
from ..scanner import *
|
||||
|
||||
class NamedObject(object):
|
||||
def __init__(self, name="foobar", size=1):
|
||||
self.name = name
|
||||
self.size = size
|
||||
self.path = Path('')
|
||||
self.words = getwords(name)
|
||||
|
||||
|
||||
no = NamedObject
|
||||
|
||||
def test_priorize_me():
|
||||
# in ScannerME, bitrate goes first (right after is_ref) in priorization
|
||||
s = ScannerME()
|
||||
o1, o2 = no('foo'), no('foo')
|
||||
o1.bitrate = 1
|
||||
o2.bitrate = 2
|
||||
[group] = s.GetDupeGroups([o1, o2])
|
||||
assert group.ref is o2
|
||||
@@ -7,9 +7,7 @@
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.hardcoded.net/licenses/hs_license
|
||||
|
||||
import hsfs.phys.music
|
||||
|
||||
from dupeguru import data_me, scanner
|
||||
from dupeguru_me import data, scanner, fs
|
||||
|
||||
from base.app import DupeGuru as DupeGuruBase
|
||||
from details_dialog import DetailsDialog
|
||||
@@ -23,11 +21,11 @@ class DupeGuru(DupeGuruBase):
|
||||
DELTA_COLUMNS = frozenset([2, 3, 4, 5, 7, 8])
|
||||
|
||||
def __init__(self):
|
||||
DupeGuruBase.__init__(self, data_me, appid=1)
|
||||
DupeGuruBase.__init__(self, data, appid=1)
|
||||
|
||||
def _setup(self):
|
||||
self.scanner = scanner.ScannerME()
|
||||
self.directories.dirclass = hsfs.phys.music.Directory
|
||||
self.directories.fileclasses = [fs.Mp3File, fs.Mp4File, fs.WmaFile, fs.OggFile, fs.FlacFile, fs.AiffFile]
|
||||
DupeGuruBase._setup(self)
|
||||
|
||||
def _update_options(self):
|
||||
|
||||
@@ -12,7 +12,6 @@ from dupeguru_pe import app_cocoa as app_pe_cocoa
|
||||
# Fix py2app imports which chokes on relative imports
|
||||
from dupeguru import app, app_cocoa, data, directories, engine, export, ignore, results, scanner
|
||||
from dupeguru_pe import block, cache, matchbase, data
|
||||
from hsfs import auto, stats, tree
|
||||
from hsutil import conflict
|
||||
|
||||
class PyApp(NSObject):
|
||||
@@ -39,7 +38,7 @@ class PyDupeGuru(PyApp):
|
||||
self.app.scanner.ignore_list.Clear()
|
||||
|
||||
def clearPictureCache(self):
|
||||
self.app.scanner.match_factory.cached_blocks.clear()
|
||||
self.app.scanner.cached_blocks.clear()
|
||||
|
||||
def doScan(self):
|
||||
return self.app.start_scanning()
|
||||
@@ -172,10 +171,10 @@ class PyDupeGuru(PyApp):
|
||||
|
||||
#---Properties
|
||||
def setMatchScaled_(self,match_scaled):
|
||||
self.app.scanner.match_factory.match_scaled = match_scaled
|
||||
self.app.scanner.match_scaled = match_scaled
|
||||
|
||||
def setMinMatchPercentage_(self,percentage):
|
||||
self.app.scanner.match_factory.threshold = int(percentage)
|
||||
self.app.scanner.threshold = int(percentage)
|
||||
|
||||
def setMixFileKind_(self,mix_file_kind):
|
||||
self.app.scanner.mix_file_kind = mix_file_kind
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
- date: 2009-10-24
|
||||
version: 1.7.8
|
||||
description: |
|
||||
* Fixed a bug sometimes causing some duplicates to be ignored during the scans. (#73)
|
||||
- date: 2009-10-14
|
||||
version: 1.7.7
|
||||
description: |
|
||||
|
||||
@@ -7,41 +7,43 @@
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.hardcoded.net/licenses/hs_license
|
||||
|
||||
import os
|
||||
import os.path as op
|
||||
import logging
|
||||
import plistlib
|
||||
import re
|
||||
|
||||
import objc
|
||||
from Foundation import *
|
||||
from AppKit import *
|
||||
from appscript import app, k
|
||||
|
||||
from hsutil import job, io
|
||||
import hsfs as fs
|
||||
from hsfs import phys, InvalidPath
|
||||
from hsutil import files
|
||||
from hsutil import io
|
||||
from hsutil.str import get_file_ext
|
||||
from hsutil.path import Path
|
||||
from hsutil.cocoa import as_fetch
|
||||
|
||||
from dupeguru import fs
|
||||
from dupeguru import app_cocoa, directories
|
||||
from . import data, matchbase
|
||||
from . import data
|
||||
from .cache import string_to_colors, Cache
|
||||
from .scanner import ScannerPE
|
||||
|
||||
mainBundle = NSBundle.mainBundle()
|
||||
PictureBlocks = mainBundle.classNamed_('PictureBlocks')
|
||||
assert PictureBlocks is not None
|
||||
|
||||
class Photo(phys.File):
|
||||
INITIAL_INFO = phys.File.INITIAL_INFO.copy()
|
||||
class Photo(fs.File):
|
||||
INITIAL_INFO = fs.File.INITIAL_INFO.copy()
|
||||
INITIAL_INFO.update({
|
||||
'dimensions': (0,0),
|
||||
})
|
||||
HANDLED_EXTS = set(['png', 'jpg', 'jpeg', 'gif', 'psd', 'bmp', 'tiff', 'tif', 'nef', 'cr2'])
|
||||
|
||||
@classmethod
|
||||
def can_handle(cls, path):
|
||||
return fs.File.can_handle(path) and get_file_ext(path[-1]) in cls.HANDLED_EXTS
|
||||
|
||||
def _read_info(self, field):
|
||||
super(Photo, self)._read_info(field)
|
||||
fs.File._read_info(self, field)
|
||||
if field == 'dimensions':
|
||||
size = PictureBlocks.getImageSize_(unicode(self.path))
|
||||
self.dimensions = (size.width, size.height)
|
||||
@@ -49,7 +51,7 @@ class Photo(phys.File):
|
||||
def get_blocks(self, block_count_per_side):
|
||||
try:
|
||||
blocks = PictureBlocks.getBlocksFromImagePath_blockCount_(unicode(self.path), block_count_per_side)
|
||||
except Exception, e:
|
||||
except Exception as e:
|
||||
raise IOError('The reading of "%s" failed with "%s"' % (unicode(self.path), unicode(e)))
|
||||
if not blocks:
|
||||
raise IOError('The picture %s could not be read' % unicode(self.path))
|
||||
@@ -57,89 +59,79 @@ class Photo(phys.File):
|
||||
|
||||
|
||||
class IPhoto(Photo):
|
||||
def __init__(self, parent, whole_path):
|
||||
super(IPhoto, self).__init__(parent, whole_path[-1])
|
||||
self.whole_path = whole_path
|
||||
|
||||
def _build_path(self):
|
||||
return self.whole_path
|
||||
|
||||
@property
|
||||
def display_path(self):
|
||||
return super(IPhoto, self)._build_path()
|
||||
return Path(('iPhoto Library', self.name))
|
||||
|
||||
def get_iphoto_database_path():
|
||||
ud = NSUserDefaults.standardUserDefaults()
|
||||
prefs = ud.persistentDomainForName_('com.apple.iApps')
|
||||
if 'iPhotoRecentDatabases' not in prefs:
|
||||
raise directories.InvalidPathError()
|
||||
plisturl = NSURL.URLWithString_(prefs['iPhotoRecentDatabases'][0])
|
||||
return Path(plisturl.path())
|
||||
|
||||
class Directory(phys.Directory):
|
||||
cls_file_class = Photo
|
||||
cls_supported_exts = ('png', 'jpg', 'jpeg', 'gif', 'psd', 'bmp', 'tiff', 'nef', 'cr2')
|
||||
|
||||
def _fetch_subitems(self):
|
||||
subdirs, subfiles = super(Directory,self)._fetch_subitems()
|
||||
return subdirs, [name for name in subfiles if get_file_ext(name) in self.cls_supported_exts]
|
||||
|
||||
|
||||
class IPhotoLibrary(fs.Directory):
|
||||
def __init__(self, plistpath):
|
||||
self.plistpath = plistpath
|
||||
self.refpath = plistpath[:-1]
|
||||
# the AlbumData.xml file lives right in the library path
|
||||
super(IPhotoLibrary, self).__init__(None, 'iPhoto Library')
|
||||
if not io.exists(plistpath):
|
||||
raise InvalidPath(self)
|
||||
|
||||
def _update_photo(self, photo_data):
|
||||
def get_iphoto_pictures(plistpath):
|
||||
if not io.exists(plistpath):
|
||||
raise InvalidPath(self)
|
||||
s = io.open(plistpath).read()
|
||||
# There was a case where a guy had 0x10 chars in his plist, causing expat errors on loading
|
||||
s = s.replace('\x10', '')
|
||||
# It seems that iPhoto sometimes doesn't properly escape & chars. The regexp below is to find
|
||||
# any & char that is not a &-based entity (&, ", etc.). based on TextMate's XML
|
||||
# bundle's regexp
|
||||
s, count = re.subn(r'&(?![a-zA-Z0-9_-]+|#[0-9]+|#x[0-9a-fA-F]+;)', '', s)
|
||||
if count:
|
||||
logging.warning("%d invalid XML entities replacement made", count)
|
||||
plist = plistlib.readPlistFromString(s)
|
||||
result = []
|
||||
for photo_data in plist['Master Image List'].values():
|
||||
if photo_data['MediaType'] != 'Image':
|
||||
return
|
||||
continue
|
||||
photo_path = Path(photo_data['ImagePath'])
|
||||
subpath = photo_path[len(self.refpath):-1]
|
||||
subdir = self
|
||||
for element in subpath:
|
||||
try:
|
||||
subdir = subdir[element]
|
||||
except KeyError:
|
||||
subdir = fs.Directory(subdir, element)
|
||||
try:
|
||||
IPhoto(subdir, photo_path)
|
||||
except fs.AlreadyExistsError:
|
||||
# it's possible for 2 entries in the plist to point to the same path. Ignore one of them.
|
||||
pass
|
||||
photo = IPhoto(photo_path)
|
||||
result.append(photo)
|
||||
return result
|
||||
|
||||
class Directories(directories.Directories):
|
||||
def __init__(self):
|
||||
directories.Directories.__init__(self, fileclasses=[Photo])
|
||||
self.iphoto_libpath = get_iphoto_database_path()
|
||||
self.set_state(self.iphoto_libpath[:-1], directories.STATE_EXCLUDED)
|
||||
|
||||
def update(self):
|
||||
self.clear()
|
||||
s = open(unicode(self.plistpath)).read()
|
||||
# There was a case where a guy had 0x10 chars in his plist, causing expat errors on loading
|
||||
s = s.replace('\x10', '')
|
||||
# It seems that iPhoto sometimes doesn't properly escape & chars. The regexp below is to find
|
||||
# any & char that is not a &-based entity (&, ", etc.). based on TextMate's XML
|
||||
# bundle's regexp
|
||||
s, count = re.subn(r'&(?![a-zA-Z0-9_-]+|#[0-9]+|#x[0-9a-fA-F]+;)', '', s)
|
||||
if count:
|
||||
logging.warning("%d invalid XML entities replacement made", count)
|
||||
plist = plistlib.readPlistFromString(s)
|
||||
for photo_data in plist['Master Image List'].values():
|
||||
self._update_photo(photo_data)
|
||||
def _get_files(self, from_path):
|
||||
if from_path == Path('iPhoto Library'):
|
||||
is_ref = self.get_state(from_path) == directories.STATE_REFERENCE
|
||||
photos = get_iphoto_pictures(self.iphoto_libpath)
|
||||
for photo in photos:
|
||||
photo.is_ref = is_ref
|
||||
return photos
|
||||
else:
|
||||
return directories.Directories._get_files(self, from_path)
|
||||
|
||||
def force_update(self): # Don't update
|
||||
pass
|
||||
@staticmethod
|
||||
def get_subfolders(path):
|
||||
if path == Path('iPhoto Library'):
|
||||
return []
|
||||
else:
|
||||
return directories.Directories.get_subfolders(path)
|
||||
|
||||
def add_path(self, path):
|
||||
if path == Path('iPhoto Library'):
|
||||
if path in self:
|
||||
raise AlreadyThereError()
|
||||
self._dirs.append(path)
|
||||
else:
|
||||
directories.Directories.add_path(self, path)
|
||||
|
||||
|
||||
class DupeGuruPE(app_cocoa.DupeGuru):
|
||||
def __init__(self):
|
||||
app_cocoa.DupeGuru.__init__(self, data, 'dupeGuru Picture Edition', appid=5)
|
||||
self.scanner.match_factory = matchbase.AsyncMatchFactory()
|
||||
self.directories.dirclass = Directory
|
||||
self.directories.special_dirclasses[Path('iPhoto Library')] = lambda _, __: self._create_iphoto_library()
|
||||
self.scanner = ScannerPE()
|
||||
self.directories = Directories()
|
||||
p = op.join(self.appdata, 'cached_pictures.db')
|
||||
self.scanner.match_factory.cached_blocks = Cache(p)
|
||||
|
||||
def _create_iphoto_library(self):
|
||||
ud = NSUserDefaults.standardUserDefaults()
|
||||
prefs = ud.persistentDomainForName_('com.apple.iApps')
|
||||
if 'iPhotoRecentDatabases' not in prefs:
|
||||
raise directories.InvalidPathError
|
||||
plisturl = NSURL.URLWithString_(prefs['iPhotoRecentDatabases'][0])
|
||||
plistpath = Path(plisturl.path())
|
||||
return IPhotoLibrary(plistpath)
|
||||
self.scanner.cached_blocks = Cache(p)
|
||||
|
||||
def _do_delete(self, j):
|
||||
def op(dupe):
|
||||
@@ -174,40 +166,19 @@ class DupeGuruPE(app_cocoa.DupeGuru):
|
||||
|
||||
def _do_load(self, j):
|
||||
self.directories.load_from_file(op.join(self.appdata, 'last_directories.xml'))
|
||||
for d in self.directories:
|
||||
if isinstance(d, IPhotoLibrary):
|
||||
d.update()
|
||||
self.results.load_from_xml(op.join(self.appdata, 'last_results.xml'), self._get_file, j)
|
||||
|
||||
def _get_file(self, str_path):
|
||||
p = Path(str_path)
|
||||
for d in self.directories:
|
||||
result = None
|
||||
if p in d.path:
|
||||
result = d.find_path(p[d.path:])
|
||||
if isinstance(d, IPhotoLibrary) and p in d.refpath:
|
||||
result = d.find_path(p[d.refpath:])
|
||||
if result is not None:
|
||||
return result
|
||||
|
||||
def add_directory(self, d):
|
||||
result = app_cocoa.DupeGuru.add_directory(self, d)
|
||||
if (result == 0) and (d == 'iPhoto Library'):
|
||||
[iphotolib] = [dir for dir in self.directories if dir.path == d]
|
||||
iphotolib.update()
|
||||
return result
|
||||
if p in self.directories.iphoto_libpath[:-1]:
|
||||
return IPhoto(p)
|
||||
return app_cocoa.DupeGuru._get_file(self, str_path)
|
||||
|
||||
def copy_or_move(self, dupe, copy, destination, dest_type):
|
||||
if isinstance(dupe, IPhoto):
|
||||
copy = True
|
||||
return app_cocoa.DupeGuru.copy_or_move(self, dupe, copy, destination, dest_type)
|
||||
|
||||
def start_scanning(self):
|
||||
for directory in self.directories:
|
||||
if isinstance(directory, IPhotoLibrary):
|
||||
self.directories.set_state(directory.refpath, directories.STATE_EXCLUDED)
|
||||
return app_cocoa.DupeGuru.start_scanning(self)
|
||||
|
||||
def selected_dupe_path(self):
|
||||
if not self.selected_dupes:
|
||||
return None
|
||||
|
||||
@@ -20,58 +20,42 @@ from .block import avgdiff, DifferentBlockCountError, NoBlocksError
|
||||
from .cache import Cache
|
||||
|
||||
MIN_ITERATIONS = 3
|
||||
BLOCK_COUNT_PER_SIDE = 15
|
||||
|
||||
# Enough so that we're sure that the main thread will not wait after a result.get() call
|
||||
# cpucount*2 should be enough to be sure that the spawned process will not wait after the results
|
||||
# collection made by the main process.
|
||||
RESULTS_QUEUE_LIMIT = multiprocessing.cpu_count() * 2
|
||||
|
||||
def get_match(first,second,percentage):
|
||||
def prepare_pictures(pictures, cached_blocks, j=job.nulljob):
|
||||
# The MemoryError handlers in there use logging without first caring about whether or not
|
||||
# there is enough memory left to carry on the operation because it is assumed that the
|
||||
# MemoryError happens when trying to read an image file, which is freed from memory by the
|
||||
# time that MemoryError is raised.
|
||||
prepared = [] # only pictures for which there was no error getting blocks
|
||||
try:
|
||||
for picture in j.iter_with_progress(pictures, 'Analyzed %d/%d pictures'):
|
||||
picture.dimensions
|
||||
picture.unicode_path = unicode(picture.path)
|
||||
try:
|
||||
if picture.unicode_path not in cached_blocks:
|
||||
blocks = picture.get_blocks(BLOCK_COUNT_PER_SIDE)
|
||||
cached_blocks[picture.unicode_path] = blocks
|
||||
prepared.append(picture)
|
||||
except IOError as e:
|
||||
logging.warning(unicode(e))
|
||||
except MemoryError:
|
||||
logging.warning(u'Ran out of memory while reading %s of size %d' % (picture.unicode_path, picture.size))
|
||||
if picture.size < 10 * 1024 * 1024: # We're really running out of memory
|
||||
raise
|
||||
except MemoryError:
|
||||
logging.warning('Ran out of memory while preparing pictures')
|
||||
return prepared
|
||||
|
||||
def get_match(first, second, percentage):
|
||||
if percentage < 0:
|
||||
percentage = 0
|
||||
return Match(first,second,percentage)
|
||||
|
||||
class MatchFactory(object):
|
||||
cached_blocks = None
|
||||
block_count_per_side = 15
|
||||
threshold = 75
|
||||
match_scaled = False
|
||||
|
||||
def _do_getmatches(self, files, j):
|
||||
raise NotImplementedError()
|
||||
|
||||
def getmatches(self, files, j=job.nulljob):
|
||||
# The MemoryError handlers in there use logging without first caring about whether or not
|
||||
# there is enough memory left to carry on the operation because it is assumed that the
|
||||
# MemoryError happens when trying to read an image file, which is freed from memory by the
|
||||
# time that MemoryError is raised.
|
||||
j = j.start_subjob([3, 7])
|
||||
logging.info('Preparing %d files' % len(files))
|
||||
prepared = self.prepare_files(files, j)
|
||||
logging.info('Finished preparing %d files' % len(prepared))
|
||||
return self._do_getmatches(prepared, j)
|
||||
|
||||
def prepare_files(self, files, j=job.nulljob):
|
||||
prepared = [] # only files for which there was no error getting blocks
|
||||
try:
|
||||
for picture in j.iter_with_progress(files, 'Analyzed %d/%d pictures'):
|
||||
picture.dimensions
|
||||
picture.unicode_path = unicode(picture.path)
|
||||
try:
|
||||
if picture.unicode_path not in self.cached_blocks:
|
||||
blocks = picture.get_blocks(self.block_count_per_side)
|
||||
self.cached_blocks[picture.unicode_path] = blocks
|
||||
prepared.append(picture)
|
||||
except IOError as e:
|
||||
logging.warning(unicode(e))
|
||||
except MemoryError:
|
||||
logging.warning(u'Ran out of memory while reading %s of size %d' % (picture.unicode_path, picture.size))
|
||||
if picture.size < 10 * 1024 * 1024: # We're really running out of memory
|
||||
raise
|
||||
except MemoryError:
|
||||
logging.warning('Ran out of memory while preparing files')
|
||||
return prepared
|
||||
|
||||
return Match(first, second, percentage)
|
||||
|
||||
def async_compare(ref_id, other_ids, dbname, threshold):
|
||||
cache = Cache(dbname, threaded=False)
|
||||
@@ -89,53 +73,55 @@ def async_compare(ref_id, other_ids, dbname, threshold):
|
||||
results.append((ref_id, other_id, percentage))
|
||||
cache.con.close()
|
||||
return results
|
||||
|
||||
class AsyncMatchFactory(MatchFactory):
|
||||
def _do_getmatches(self, pictures, j):
|
||||
def empty_out_queue(queue, into):
|
||||
try:
|
||||
while True:
|
||||
into.append(queue.get(block=False))
|
||||
except Empty:
|
||||
pass
|
||||
|
||||
j = j.start_subjob([9, 1], 'Preparing for matching')
|
||||
cache = self.cached_blocks
|
||||
id2picture = {}
|
||||
dimensions2pictures = defaultdict(set)
|
||||
for picture in pictures:
|
||||
try:
|
||||
picture.cache_id = cache.get_id(picture.unicode_path)
|
||||
id2picture[picture.cache_id] = picture
|
||||
if not self.match_scaled:
|
||||
dimensions2pictures[picture.dimensions].add(picture)
|
||||
except ValueError:
|
||||
pass
|
||||
pictures = [p for p in pictures if hasattr(p, 'cache_id')]
|
||||
pool = multiprocessing.Pool()
|
||||
async_results = []
|
||||
matches = []
|
||||
pictures_copy = set(pictures)
|
||||
for ref in j.iter_with_progress(pictures, 'Matched %d/%d pictures'):
|
||||
others = pictures_copy if self.match_scaled else dimensions2pictures[ref.dimensions]
|
||||
others.remove(ref)
|
||||
if others:
|
||||
cache_ids = [f.cache_id for f in others]
|
||||
args = (ref.cache_id, cache_ids, self.cached_blocks.dbname, self.threshold)
|
||||
async_results.append(pool.apply_async(async_compare, args))
|
||||
if len(async_results) > RESULTS_QUEUE_LIMIT:
|
||||
result = async_results.pop(0)
|
||||
matches.extend(result.get())
|
||||
|
||||
result = []
|
||||
for ref_id, other_id, percentage in j.iter_with_progress(matches, 'Verified %d/%d matches', every=10):
|
||||
ref = id2picture[ref_id]
|
||||
other = id2picture[other_id]
|
||||
if percentage == 100 and ref.md5 != other.md5:
|
||||
percentage = 99
|
||||
if percentage >= self.threshold:
|
||||
result.append(get_match(ref, other, percentage))
|
||||
return result
|
||||
|
||||
def getmatches(pictures, cached_blocks, threshold=75, match_scaled=False, j=job.nulljob):
|
||||
def empty_out_queue(queue, into):
|
||||
try:
|
||||
while True:
|
||||
into.append(queue.get(block=False))
|
||||
except Empty:
|
||||
pass
|
||||
|
||||
j = j.start_subjob([3, 7])
|
||||
pictures = prepare_pictures(pictures, cached_blocks, j)
|
||||
j = j.start_subjob([9, 1], 'Preparing for matching')
|
||||
cache = cached_blocks
|
||||
id2picture = {}
|
||||
dimensions2pictures = defaultdict(set)
|
||||
for picture in pictures:
|
||||
try:
|
||||
picture.cache_id = cache.get_id(picture.unicode_path)
|
||||
id2picture[picture.cache_id] = picture
|
||||
if not match_scaled:
|
||||
dimensions2pictures[picture.dimensions].add(picture)
|
||||
except ValueError:
|
||||
pass
|
||||
pictures = [p for p in pictures if hasattr(p, 'cache_id')]
|
||||
pool = multiprocessing.Pool()
|
||||
async_results = []
|
||||
matches = []
|
||||
pictures_copy = set(pictures)
|
||||
for ref in j.iter_with_progress(pictures, 'Matched %d/%d pictures'):
|
||||
others = pictures_copy if match_scaled else dimensions2pictures[ref.dimensions]
|
||||
others.remove(ref)
|
||||
if others:
|
||||
cache_ids = [f.cache_id for f in others]
|
||||
args = (ref.cache_id, cache_ids, cached_blocks.dbname, threshold)
|
||||
async_results.append(pool.apply_async(async_compare, args))
|
||||
if len(async_results) > RESULTS_QUEUE_LIMIT:
|
||||
result = async_results.pop(0)
|
||||
matches.extend(result.get())
|
||||
for result in async_results: # process the rest of the results
|
||||
matches.extend(result.get())
|
||||
|
||||
result = []
|
||||
for ref_id, other_id, percentage in j.iter_with_progress(matches, 'Verified %d/%d matches', every=10):
|
||||
ref = id2picture[ref_id]
|
||||
other = id2picture[other_id]
|
||||
if percentage == 100 and ref.md5 != other.md5:
|
||||
percentage = 99
|
||||
if percentage >= threshold:
|
||||
result.append(get_match(ref, other, percentage))
|
||||
return result
|
||||
|
||||
multiprocessing.freeze_support()
|
||||
22
pe/py/scanner.py
Normal file
22
pe/py/scanner.py
Normal file
@@ -0,0 +1,22 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2009-10-18
|
||||
# $Id$
|
||||
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "HS" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.hardcoded.net/licenses/hs_license
|
||||
|
||||
from dupeguru.scanner import Scanner
|
||||
|
||||
from . import matchbase
|
||||
|
||||
class ScannerPE(Scanner):
|
||||
cached_blocks = None
|
||||
match_scaled = False
|
||||
threshold = 75
|
||||
|
||||
def _getmatches(self, files, j):
|
||||
return matchbase.getmatches(files, self.cached_blocks, self.threshold, self.match_scaled, j)
|
||||
|
||||
34
pe/qt/app.py
34
pe/qt/app.py
@@ -12,12 +12,12 @@ import os.path as op
|
||||
from PyQt4.QtGui import QImage
|
||||
import PIL.Image
|
||||
|
||||
from hsfs import phys
|
||||
from hsutil.str import get_file_ext
|
||||
|
||||
from dupeguru import fs
|
||||
from dupeguru_pe import data as data_pe
|
||||
from dupeguru_pe.cache import Cache
|
||||
from dupeguru_pe.matchbase import AsyncMatchFactory
|
||||
from dupeguru_pe.scanner import ScannerPE
|
||||
|
||||
from block import getblocks
|
||||
from base.app import DupeGuru as DupeGuruBase
|
||||
@@ -26,14 +26,19 @@ from main_window import MainWindow
|
||||
from preferences import Preferences
|
||||
from preferences_dialog import PreferencesDialog
|
||||
|
||||
class File(phys.File):
|
||||
INITIAL_INFO = phys.File.INITIAL_INFO.copy()
|
||||
class File(fs.File):
|
||||
INITIAL_INFO = fs.File.INITIAL_INFO.copy()
|
||||
INITIAL_INFO.update({
|
||||
'dimensions': (0,0),
|
||||
})
|
||||
HANDLED_EXTS = set(['png', 'jpg', 'jpeg', 'gif', 'bmp', 'tiff', 'tif'])
|
||||
|
||||
@classmethod
|
||||
def can_handle(cls, path):
|
||||
return fs.File.can_handle(path) and get_file_ext(path[-1]) in cls.HANDLED_EXTS
|
||||
|
||||
def _read_info(self, field):
|
||||
super(File, self)._read_info(field)
|
||||
fs.File._read_info(self, field)
|
||||
if field == 'dimensions':
|
||||
im = PIL.Image.open(unicode(self.path))
|
||||
self.dimensions = im.size
|
||||
@@ -44,15 +49,6 @@ class File(phys.File):
|
||||
return getblocks(image, block_count_per_side)
|
||||
|
||||
|
||||
class Directory(phys.Directory):
|
||||
cls_file_class = File
|
||||
cls_supported_exts = ('png', 'jpg', 'jpeg', 'gif', 'bmp', 'tiff')
|
||||
|
||||
def _fetch_subitems(self):
|
||||
subdirs, subfiles = super(Directory, self)._fetch_subitems()
|
||||
return subdirs, [name for name in subfiles if get_file_ext(name) in self.cls_supported_exts]
|
||||
|
||||
|
||||
class DupeGuru(DupeGuruBase):
|
||||
LOGO_NAME = 'logo_pe'
|
||||
NAME = 'dupeGuru Picture Edition'
|
||||
@@ -63,15 +59,15 @@ class DupeGuru(DupeGuruBase):
|
||||
DupeGuruBase.__init__(self, data_pe, appid=5)
|
||||
|
||||
def _setup(self):
|
||||
self.scanner.match_factory = AsyncMatchFactory()
|
||||
self.directories.dirclass = Directory
|
||||
self.scanner.match_factory.cached_blocks = Cache(op.join(self.appdata, 'cached_pictures.db'))
|
||||
self.scanner = ScannerPE()
|
||||
self.directories.fileclasses = [File]
|
||||
self.scanner.cached_blocks = Cache(op.join(self.appdata, 'cached_pictures.db'))
|
||||
DupeGuruBase._setup(self)
|
||||
|
||||
def _update_options(self):
|
||||
DupeGuruBase._update_options(self)
|
||||
self.scanner.match_factory.match_scaled = self.prefs.match_scaled
|
||||
self.scanner.match_factory.threshold = self.prefs.filter_hardness
|
||||
self.scanner.match_scaled = self.prefs.match_scaled
|
||||
self.scanner.threshold = self.prefs.filter_hardness
|
||||
|
||||
def _create_details_dialog(self, parent):
|
||||
return DetailsDialog(parent, self)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# -*- mode: python -*-
|
||||
a = Analysis([os.path.join(HOMEPATH,'support\\_mountzlib.py'), os.path.join(HOMEPATH,'support\\useUnicode.py'), 'start.py'],
|
||||
pathex=['C:\\src\\dupeguru\\pe\\qt'])
|
||||
pathex=[])
|
||||
pyz = PYZ(a.pure)
|
||||
exe = EXE(pyz,
|
||||
a.scripts,
|
||||
|
||||
@@ -16,6 +16,7 @@ from hsutil.build import print_and_do, build_all_qt_ui
|
||||
build_all_qt_ui(op.join('qtlib', 'ui'))
|
||||
build_all_qt_ui('base')
|
||||
build_all_qt_ui('.')
|
||||
print_and_do("pyrcc4 base\\dg.qrc > base\\dg_rc.py")
|
||||
|
||||
def move(src, dst):
|
||||
if not op.exists(src):
|
||||
|
||||
@@ -23,6 +23,6 @@ class MainWindow(MainWindowBase):
|
||||
title = "Clear Picture Cache"
|
||||
msg = "Do you really want to remove all your cached picture analysis?"
|
||||
if self._confirm(title, msg, QMessageBox.No):
|
||||
self.app.scanner.match_factory.cached_blocks.clear()
|
||||
self.app.scanner.cached_blocks.clear()
|
||||
QMessageBox.information(self, title, "Picture cache cleared.")
|
||||
|
||||
@@ -14,6 +14,9 @@ import base.dg_rc
|
||||
|
||||
from app import DupeGuru
|
||||
|
||||
# This is a workaround for a pyinstaller problem where compiled dupeguru can't read tiff files
|
||||
from PIL import TiffImagePlugin, TiffTags
|
||||
|
||||
if __name__ == "__main__":
|
||||
app = QApplication(sys.argv)
|
||||
app.setWindowIcon(QIcon(QPixmap(":/logo_pe")))
|
||||
|
||||
@@ -8,12 +8,12 @@
|
||||
import objc
|
||||
from AppKit import *
|
||||
|
||||
from dupeguru import app_se_cocoa, scanner
|
||||
from dupeguru_se.app_cocoa import DupeGuru
|
||||
from dupeguru import scanner
|
||||
|
||||
# Fix py2app imports with chokes on relative imports
|
||||
from dupeguru import app, app_cocoa, data, directories, engine, export, ignore, results, scanner
|
||||
from hsfs import auto, stats, tree
|
||||
from hsfs.phys import bundle
|
||||
from dupeguru_se import fs, data
|
||||
from dupeguru import app, app_cocoa, data, directories, engine, export, ignore, results, fs
|
||||
from hsutil import conflict
|
||||
|
||||
class PyApp(NSObject):
|
||||
@@ -22,7 +22,7 @@ class PyApp(NSObject):
|
||||
class PyDupeGuru(PyApp):
|
||||
def init(self):
|
||||
self = super(PyDupeGuru,self).init()
|
||||
self.app = app_se_cocoa.DupeGuru()
|
||||
self.app = DupeGuru()
|
||||
return self
|
||||
|
||||
#---Directories
|
||||
|
||||
11
se/py/LICENSE
Normal file
11
se/py/LICENSE
Normal file
@@ -0,0 +1,11 @@
|
||||
Copyright 2009 Hardcoded Software Inc. (http://www.hardcoded.net)
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of Hardcoded Software Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
|
||||
* If the source code has been published less than two years ago, any redistribution, in whole or in part, must retain full licensing functionality, without any attempt to change, obscure or in other ways circumvent its intent.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
1
se/py/__init__.py
Normal file
1
se/py/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
@@ -11,14 +11,15 @@ import logging
|
||||
|
||||
from AppKit import *
|
||||
|
||||
from hsfs.phys import Directory as DirectoryBase
|
||||
from hsfs.phys.bundle import Bundle
|
||||
from hsutil import io
|
||||
from hsutil.path import Path
|
||||
from hsutil.misc import extract
|
||||
from hsutil.str import get_file_ext
|
||||
|
||||
from . import app_cocoa, data
|
||||
from .directories import Directories as DirectoriesBase, STATE_EXCLUDED
|
||||
from dupeguru import fs
|
||||
from dupeguru.app_cocoa import DupeGuru as DupeGuruBase
|
||||
from dupeguru.directories import Directories as DirectoriesBase, STATE_EXCLUDED
|
||||
from . import data
|
||||
from .fs import Bundle as BundleBase
|
||||
|
||||
if NSWorkspace.sharedWorkspace().respondsToSelector_('typeOfFile:error:'): # Only from 10.5
|
||||
def is_bundle(str_path):
|
||||
@@ -31,27 +32,17 @@ else: # Tiger
|
||||
def is_bundle(str_path): # just return a list of a few known bundle extensions.
|
||||
return get_file_ext(str_path) in ('app', 'pages', 'numbers')
|
||||
|
||||
class DGDirectory(DirectoryBase):
|
||||
def _create_sub_file(self, name, with_parent=True):
|
||||
if is_bundle(unicode(self.path + name)):
|
||||
parent = self if with_parent else None
|
||||
return Bundle(parent, name)
|
||||
else:
|
||||
return super(DGDirectory, self)._create_sub_file(name, with_parent)
|
||||
|
||||
def _fetch_subitems(self):
|
||||
subdirs, subfiles = super(DGDirectory, self)._fetch_subitems()
|
||||
apps, normal_dirs = extract(lambda name: is_bundle(unicode(self.path + name)), subdirs)
|
||||
subfiles += apps
|
||||
return normal_dirs, subfiles
|
||||
class Bundle(BundleBase):
|
||||
@classmethod
|
||||
def can_handle(cls, path):
|
||||
return not io.islink(path) and io.isdir(path) and is_bundle(unicode(path))
|
||||
|
||||
|
||||
class Directories(DirectoriesBase):
|
||||
ROOT_PATH_TO_EXCLUDE = map(Path, ['/Library', '/Volumes', '/System', '/bin', '/sbin', '/opt', '/private', '/dev'])
|
||||
HOME_PATH_TO_EXCLUDE = [Path('Library')]
|
||||
def __init__(self):
|
||||
DirectoriesBase.__init__(self)
|
||||
self.dirclass = DGDirectory
|
||||
DirectoriesBase.__init__(self, fileclasses=[Bundle, fs.File])
|
||||
|
||||
def _default_state_for_path(self, path):
|
||||
result = DirectoriesBase._default_state_for_path(self, path)
|
||||
@@ -63,8 +54,8 @@ class Directories(DirectoriesBase):
|
||||
return STATE_EXCLUDED
|
||||
|
||||
|
||||
class DupeGuru(app_cocoa.DupeGuru):
|
||||
class DupeGuru(DupeGuruBase):
|
||||
def __init__(self):
|
||||
app_cocoa.DupeGuru.__init__(self, data, 'dupeGuru', appid=4)
|
||||
DupeGuruBase.__init__(self, data, 'dupeGuru', appid=4)
|
||||
self.directories = Directories()
|
||||
|
||||
72
se/py/data.py
Normal file
72
se/py/data.py
Normal file
@@ -0,0 +1,72 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2006/03/15
|
||||
# $Id$
|
||||
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "HS" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.hardcoded.net/licenses/hs_license
|
||||
|
||||
from hsutil.str import format_size
|
||||
from dupeguru.data import (format_path, format_timestamp, format_words, format_perc,
|
||||
format_dupe_count, cmp_value)
|
||||
|
||||
COLUMNS = [
|
||||
{'attr':'name','display':'Filename'},
|
||||
{'attr':'path','display':'Directory'},
|
||||
{'attr':'size','display':'Size (KB)'},
|
||||
{'attr':'extension','display':'Kind'},
|
||||
{'attr':'ctime','display':'Creation'},
|
||||
{'attr':'mtime','display':'Modification'},
|
||||
{'attr':'percentage','display':'Match %'},
|
||||
{'attr':'words','display':'Words Used'},
|
||||
{'attr':'dupe_count','display':'Dupe Count'},
|
||||
]
|
||||
|
||||
METADATA_TO_READ = ['size', 'ctime', 'mtime']
|
||||
|
||||
def GetDisplayInfo(dupe, group, delta):
|
||||
size = dupe.size
|
||||
ctime = dupe.ctime
|
||||
mtime = dupe.mtime
|
||||
m = group.get_match_of(dupe)
|
||||
if m:
|
||||
percentage = m.percentage
|
||||
dupe_count = 0
|
||||
if delta:
|
||||
r = group.ref
|
||||
size -= r.size
|
||||
ctime -= r.ctime
|
||||
mtime -= r.mtime
|
||||
else:
|
||||
percentage = group.percentage
|
||||
dupe_count = len(group.dupes)
|
||||
return [
|
||||
dupe.name,
|
||||
format_path(dupe.path),
|
||||
format_size(size, 0, 1, False),
|
||||
dupe.extension,
|
||||
format_timestamp(ctime, delta and m),
|
||||
format_timestamp(mtime, delta and m),
|
||||
format_perc(percentage),
|
||||
format_words(dupe.words) if hasattr(dupe, 'words') else '',
|
||||
format_dupe_count(dupe_count)
|
||||
]
|
||||
|
||||
def GetDupeSortKey(dupe, get_group, key, delta):
|
||||
if key == 6:
|
||||
m = get_group().get_match_of(dupe)
|
||||
return m.percentage
|
||||
if key == 8:
|
||||
return 0
|
||||
r = cmp_value(getattr(dupe, COLUMNS[key]['attr']))
|
||||
if delta and (key in (2, 4, 5)):
|
||||
r -= cmp_value(getattr(get_group().ref, COLUMNS[key]['attr']))
|
||||
return r
|
||||
|
||||
def GetGroupSortKey(group, key):
|
||||
if key == 6:
|
||||
return group.percentage
|
||||
if key == 8:
|
||||
return len(group)
|
||||
return cmp_value(getattr(group.ref, COLUMNS[key]['attr']))
|
||||
43
se/py/fs.py
Normal file
43
se/py/fs.py
Normal file
@@ -0,0 +1,43 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2009-10-23
|
||||
# $Id$
|
||||
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "HS" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.hardcoded.net/licenses/hs_license
|
||||
|
||||
import hashlib
|
||||
|
||||
from hsutil import io
|
||||
from hsutil.misc import nonone
|
||||
|
||||
from dupeguru import fs
|
||||
|
||||
class Bundle(fs.File):
|
||||
"""This class is for Mac OSX bundles (.app). Bundles are seen by the OS as
|
||||
normal directories, but I don't want that in dupeGuru. I want dupeGuru
|
||||
to see them as files.
|
||||
"""
|
||||
def _read_info(self, field):
|
||||
if field in ('size', 'ctime', 'mtime'):
|
||||
files = fs.get_all_files(self.path)
|
||||
size = sum((file.size for file in files), 0)
|
||||
self.size = size
|
||||
stats = io.stat(self.path)
|
||||
self.ctime = nonone(stats.st_ctime, 0)
|
||||
self.mtime = nonone(stats.st_mtime, 0)
|
||||
elif field in ('md5', 'md5partial'):
|
||||
# What's sensitive here is that we must make sure that subfiles'
|
||||
# md5 are always added up in the same order, but we also want a
|
||||
# different md5 if a file gets moved in a different subdirectory.
|
||||
def get_dir_md5_concat():
|
||||
files = fs.get_all_files(self.path)
|
||||
files.sort(key=lambda f:f.path)
|
||||
md5s = [getattr(f, field) for f in files]
|
||||
return ''.join(md5s)
|
||||
|
||||
md5 = hashlib.md5(get_dir_md5_concat())
|
||||
digest = md5.digest()
|
||||
setattr(self, field, digest)
|
||||
0
se/py/tests/__init__.py
Normal file
0
se/py/tests/__init__.py
Normal file
48
se/py/tests/fs_test.py
Normal file
48
se/py/tests/fs_test.py
Normal file
@@ -0,0 +1,48 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2009-10-23
|
||||
# $Id$
|
||||
# Copyright 2009 Hardcoded Software (http://www.hardcoded.net)
|
||||
#
|
||||
# This software is licensed under the "HS" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.hardcoded.net/licenses/hs_license
|
||||
|
||||
import hashlib
|
||||
|
||||
from nose.tools import eq_
|
||||
|
||||
from hsutil.testcase import TestCase
|
||||
from dupeguru.fs import File
|
||||
from dupeguru.tests.directories_test import create_fake_fs
|
||||
|
||||
from .. import fs
|
||||
|
||||
class TCBundle(TestCase):
|
||||
def test_size_aggregates_subfiles(self):
|
||||
p = create_fake_fs(self.tmppath())
|
||||
b = fs.Bundle(p)
|
||||
eq_(b.size, 12)
|
||||
|
||||
def test_md5_aggregate_subfiles_sorted(self):
|
||||
#dir.allfiles can return child in any order. Thus, bundle.md5 must aggregate
|
||||
#all files' md5 it contains, but it must make sure that it does so in the
|
||||
#same order everytime.
|
||||
p = create_fake_fs(self.tmppath())
|
||||
b = fs.Bundle(p)
|
||||
md5s = File(p + ('dir1', 'file1.test')).md5
|
||||
md5s += File(p + ('dir2', 'file2.test')).md5
|
||||
md5s += File(p + ('dir3', 'file3.test')).md5
|
||||
md5s += File(p + 'file1.test').md5
|
||||
md5s += File(p + 'file2.test').md5
|
||||
md5s += File(p + 'file3.test').md5
|
||||
md5 = hashlib.md5(md5s)
|
||||
eq_(b.md5, md5.digest())
|
||||
|
||||
def test_has_file_attrs(self):
|
||||
#a Bundle must behave like a file, so it must have ctime and mtime attributes
|
||||
b = fs.Bundle(self.tmppath())
|
||||
assert b.mtime > 0
|
||||
assert b.ctime > 0
|
||||
eq_(b.extension, '')
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.hardcoded.net/licenses/hs_license
|
||||
|
||||
from dupeguru import data
|
||||
from dupeguru_se import data
|
||||
from dupeguru.directories import Directories as DirectoriesBase, STATE_EXCLUDED
|
||||
|
||||
from base.app import DupeGuru as DupeGuruBase
|
||||
|
||||
Reference in New Issue
Block a user