Converted to py3k. There's probably some bugs still. So far, I managed to run dupeGuru SE under pyobjc and qt.

This commit is contained in:
Virgil Dupras 2010-08-11 16:39:06 +02:00
parent fb79daad6a
commit 854d194f88
57 changed files with 394 additions and 338 deletions

View File

@ -20,10 +20,10 @@ from hscommon.build import add_to_pythonpath, print_and_do, build_all_qt_ui, cop
def build_cocoa(edition, dev, help_destpath):
if not dev:
print "Building help index"
print("Building help index")
os.system('open -a /Developer/Applications/Utilities/Help\\ Indexer.app {0}'.format(help_destpath))
print "Building dg_cocoa.plugin"
print("Building dg_cocoa.plugin")
if op.exists('build'):
shutil.rmtree('build')
os.mkdir('build')
@ -54,7 +54,7 @@ def build_cocoa(edition, dev, help_destpath):
pthpath = op.join(pluginpath, 'Contents/Resources/dev.pth')
open(pthpath, 'w').write(op.abspath('.'))
os.chdir(cocoa_project_path)
print "Building the XCode project"
print("Building the XCode project")
args = []
if dev:
args.append('-configuration dev')
@ -68,10 +68,10 @@ def build_qt(edition, dev):
build_all_qt_ui(op.join('qtlib', 'ui'))
build_all_qt_ui(op.join('qt', 'base'))
build_all_qt_ui(op.join('qt', edition))
print_and_do("pyrcc4 {0} > {1}".format(op.join('qt', 'base', 'dg.qrc'), op.join('qt', 'base', 'dg_rc.py')))
print_and_do("pyrcc4 -py3 {0} > {1}".format(op.join('qt', 'base', 'dg.qrc'), op.join('qt', 'base', 'dg_rc.py')))
if edition == 'pe':
os.chdir(op.join('qt', edition))
os.system('python gen.py')
os.system('python3 gen.py')
os.chdir(op.join('..', '..'))
def main():
@ -79,11 +79,11 @@ def main():
edition = conf['edition']
ui = conf['ui']
dev = conf['dev']
print "Building dupeGuru {0} with UI {1}".format(edition.upper(), ui)
print("Building dupeGuru {0} with UI {1}".format(edition.upper(), ui))
if dev:
print "Building in Dev mode"
print("Building in Dev mode")
add_to_pythonpath('.')
print "Generating Help"
print("Generating Help")
windows = sys.platform == 'win32'
profile = 'win_en' if windows else 'osx_en'
help_dir = 'help_{0}'.format(edition)
@ -91,10 +91,10 @@ def main():
help_basepath = op.abspath(help_dir)
help_destpath = op.abspath(op.join(help_dir, dest_dir))
helpgen.gen(help_basepath, help_destpath, profile=profile)
print "Building dupeGuru"
print("Building dupeGuru")
if edition == 'pe':
os.chdir('core_pe')
os.system('python gen.py')
os.system('python3 gen.py')
os.chdir('..')
if ui == 'cocoa':
build_cocoa(edition, dev, help_destpath)

View File

@ -23,10 +23,10 @@ class PyDupeGuru(PyDupeGuruBase):
#---Information
def getSelectedDupePath(self):
return unicode(self.py.selected_dupe_path())
return str(self.py.selected_dupe_path())
def getSelectedDupeRefPath(self):
return unicode(self.py.selected_dupe_ref_path())
return str(self.py.selected_dupe_ref_path())
#---Properties
def setMatchScaled_(self,match_scaled):

View File

@ -11,8 +11,10 @@ from core.app_cocoa_inter import PyDupeGuruBase, PyDetailsPanel
from core_se.app_cocoa import DupeGuru
# Fix py2app imports with chokes on relative imports and other stuff
from core_se import fs, data
from lxml import etree, _elementpath
import hsutil.conflict
import core.engine, core.fs, core.app
import core_se.fs, core_se.data
import lxml.etree, lxml._elementpath
import gzip
class PyDupeGuru(PyDupeGuruBase):

View File

@ -18,7 +18,7 @@ def main(edition, ui, dev):
if ui not in ('cocoa', 'qt'):
ui = 'cocoa' if sys.platform == 'darwin' else 'qt'
build_type = 'Dev' if dev else 'Release'
print "Configuring dupeGuru {0} for UI {1} ({2})".format(edition.upper(), ui, build_type)
print("Configuring dupeGuru {0} for UI {1} ({2})".format(edition.upper(), ui, build_type))
conf = {
'edition': edition,
'ui': ui,

View File

@ -6,7 +6,7 @@
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/hs_license
from __future__ import unicode_literals
import os
import os.path as op
@ -76,7 +76,7 @@ class DupeGuru(RegistrableApplication, Broadcaster):
def _do_delete_dupe(self, dupe):
if not io.exists(dupe.path):
return
send2trash(unicode(dupe.path)) # Raises OSError when there's a problem
send2trash(str(dupe.path)) # Raises OSError when there's a problem
self.clean_empty_dirs(dupe.path[:-1])
def _do_load(self, j):
@ -100,7 +100,7 @@ class DupeGuru(RegistrableApplication, Broadcaster):
try:
return self.data.GetDisplayInfo(dupe, group, delta)
except Exception as e:
logging.warning("Exception on GetDisplayInfo for %s: %s", unicode(dupe.path), unicode(e))
logging.warning("Exception on GetDisplayInfo for %s: %s", str(dupe.path), str(e))
return ['---'] * len(self.data.COLUMNS)
def _get_file(self, str_path):
@ -149,7 +149,7 @@ class DupeGuru(RegistrableApplication, Broadcaster):
g = self.results.get_group_of_duplicate(dupe)
for other in g:
if other is not dupe:
self.scanner.ignore_list.Ignore(unicode(other.path), unicode(dupe.path))
self.scanner.ignore_list.Ignore(str(other.path), str(dupe.path))
self.remove_duplicates(dupes)
def apply_filter(self, filter):
@ -208,7 +208,7 @@ class DupeGuru(RegistrableApplication, Broadcaster):
def export_to_xhtml(self, column_ids):
column_ids = [colid for colid in column_ids if colid.isdigit()]
column_ids = map(int, column_ids)
column_ids = list(map(int, column_ids))
column_ids.sort()
colnames = [col['display'] for i, col in enumerate(self.data.COLUMNS) if i in column_ids]
rows = []
@ -232,8 +232,8 @@ class DupeGuru(RegistrableApplication, Broadcaster):
dupe = self.selected_dupes[0]
group = self.results.get_group_of_duplicate(dupe)
ref = group.ref
cmd = cmd.replace('%d', unicode(dupe.path))
cmd = cmd.replace('%r', unicode(ref.path))
cmd = cmd.replace('%d', str(dupe.path))
cmd = cmd.replace('%r', str(ref.path))
match = re.match(r'"([^"]+)"(.*)', cmd)
if match is not None:
# This code here is because subprocess. Popen doesn't seem to accept, under Windows,
@ -313,7 +313,7 @@ class DupeGuru(RegistrableApplication, Broadcaster):
d.rename(newname)
return True
except (IndexError, fs.FSError) as e:
logging.warning("dupeGuru Warning: %s" % unicode(e))
logging.warning("dupeGuru Warning: %s" % str(e))
return False
def reveal_selected(self):

View File

@ -49,11 +49,11 @@ class DupeGuru(app.DupeGuru):
#--- Override
@staticmethod
def _open_path(path):
NSWorkspace.sharedWorkspace().openFile_(unicode(path))
NSWorkspace.sharedWorkspace().openFile_(str(path))
@staticmethod
def _reveal_path(path):
NSWorkspace.sharedWorkspace().selectFile_inFileViewerRootedAtPath_(unicode(path), '')
NSWorkspace.sharedWorkspace().selectFile_inFileViewerRootedAtPath_(str(path), '')
def _start_job(self, jobid, func):
try:

View File

@ -11,7 +11,7 @@ from hsutil.str import format_time, FT_DECIMAL, format_size
import time
def format_path(p):
return unicode(p[:-1])
return str(p[:-1])
def format_timestamp(t, delta):
if delta:
@ -38,4 +38,4 @@ def format_dupe_count(c):
return str(c) if c else '---'
def cmp_value(value):
return value.lower() if isinstance(value, basestring) else value
return value.lower() if isinstance(value, str) else value

View File

@ -151,11 +151,11 @@ class Directories(object):
root = etree.Element('directories')
for root_path in self:
root_path_node = etree.SubElement(root, 'root_directory')
root_path_node.set('path', unicode(root_path))
for path, state in self.states.iteritems():
root_path_node.set('path', str(root_path))
for path, state in self.states.items():
state_node = etree.SubElement(root, 'state')
state_node.set('path', unicode(path))
state_node.set('value', unicode(state))
state_node.set('path', str(path))
state_node.set('value', str(state))
tree = etree.ElementTree(root)
tree.write(fp, encoding='utf-8')

View File

@ -6,7 +6,7 @@
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/hs_license
from __future__ import division
import difflib
import itertools
import logging
@ -25,15 +25,15 @@ NO_FIELD_ORDER) = range(3)
JOB_REFRESH_RATE = 100
def getwords(s):
if isinstance(s, unicode):
if isinstance(s, str):
s = normalize('NFD', s)
s = multi_replace(s, "-_&+():;\\[]{}.,<>/?~!@#$*", ' ').lower()
s = ''.join(c for c in s if c in string.ascii_letters + string.digits + string.whitespace)
return filter(None, s.split(' ')) # filter() is to remove empty elements
return [_f for _f in s.split(' ') if _f] # remove empty elements
def getfields(s):
fields = [getwords(field) for field in s.split(' - ')]
return filter(None, fields)
return [_f for _f in fields if _f]
def unpack_fields(fields):
result = []
@ -118,7 +118,7 @@ def build_word_dict(objects, j=job.nulljob):
def merge_similar_words(word_dict):
"""Take all keys in word_dict that are similar, and merge them together.
"""
keys = word_dict.keys()
keys = list(word_dict.keys())
keys.sort(key=len)# we want the shortest word to stay
while keys:
key = keys.pop(0)
@ -138,7 +138,7 @@ def reduce_common_words(word_dict, threshold):
Because if we remove them, we will miss some duplicates!
"""
uncommon_words = set(word for word, objects in word_dict.items() if len(objects) < threshold)
for word, objects in word_dict.items():
for word, objects in list(word_dict.items()):
if len(objects) < threshold:
continue
reduced = set()

View File

@ -13,7 +13,7 @@ from tempfile import mkdtemp
# Yes, this is a very low-tech solution, but at least it doesn't have all these annoying dependency
# and resource problems.
MAIN_TEMPLATE = u"""
MAIN_TEMPLATE = """
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Strict//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>
<html xmlns="http://www.w3.org/1999/xhtml">
@ -104,33 +104,33 @@ $rows
</html>
"""
COLHEADERS_TEMPLATE = u"<th>{name}</th>"
COLHEADERS_TEMPLATE = "<th>{name}</th>"
ROW_TEMPLATE = u"""
ROW_TEMPLATE = """
<tr>
<td class="{indented}">{filename}</td>{cells}
</tr>
"""
CELL_TEMPLATE = u"""<td>{value}</td>"""
CELL_TEMPLATE = """<td>{value}</td>"""
def export_to_xhtml(colnames, rows):
# a row is a list of values with the first value being a flag indicating if the row should be indented
if rows:
assert len(rows[0]) == len(colnames) + 1 # + 1 is for the "indented" flag
colheaders = u''.join(COLHEADERS_TEMPLATE.format(name=name) for name in colnames)
colheaders = ''.join(COLHEADERS_TEMPLATE.format(name=name) for name in colnames)
rendered_rows = []
for row in rows:
# [2:] is to remove the indented flag + filename
indented = u'indented' if row[0] else u''
indented = 'indented' if row[0] else ''
filename = row[1]
cells = u''.join(CELL_TEMPLATE.format(value=value) for value in row[2:])
cells = ''.join(CELL_TEMPLATE.format(value=value) for value in row[2:])
rendered_rows.append(ROW_TEMPLATE.format(indented=indented, filename=filename, cells=cells))
rendered_rows = u''.join(rendered_rows)
rendered_rows = ''.join(rendered_rows)
# The main template can't use format because the css code uses {}
content = MAIN_TEMPLATE.replace('$colheaders', colheaders).replace('$rows', rendered_rows)
folder = mkdtemp()
destpath = op.join(folder, u'export.htm')
destpath = op.join(folder, 'export.htm')
fp = open(destpath, 'w')
fp.write(content.encode('utf-8'))
fp.close()

View File

@ -12,7 +12,7 @@
# resulting needless complexity and memory usage. It's been a while since I wanted to do that fork,
# and I'm doing it now.
from __future__ import unicode_literals
import hashlib
import logging
@ -25,13 +25,13 @@ class FSError(Exception):
cls_message = "An error has occured on '{name}' in '{parent}'"
def __init__(self, fsobject, parent=None):
message = self.cls_message
if isinstance(fsobject, basestring):
if isinstance(fsobject, str):
name = fsobject
elif isinstance(fsobject, File):
name = fsobject.name
else:
name = ''
parentname = unicode(parent) if parent is not None else ''
parentname = str(parent) if parent is not None else ''
Exception.__init__(self, message.format(name=name, parent=parentname))
@ -119,7 +119,7 @@ class File(object):
If `attrnames` is not None, caches only attrnames.
"""
if attrnames is None:
attrnames = self.INITIAL_INFO.keys()
attrnames = list(self.INITIAL_INFO.keys())
for attrname in attrnames:
if attrname not in self.__dict__:
self._read_info(attrname)

View File

@ -32,7 +32,7 @@ class DetailsPanel(GUIObject):
ref = group.ref if group is not None and group.ref is not dupe else None
l2 = self.app._get_display_info(ref, group, False)
names = [c['display'] for c in self.app.data.COLUMNS]
self._table = zip(names, l1, l2)
self._table = list(zip(names, l1, l2))
#--- Public
def row_count(self):

View File

@ -62,7 +62,7 @@ class DirectoryTree(GUIObject, Tree):
def _refresh(self):
self.clear()
for path in self.app.directories:
self.append(DirectoryNode(self.app, path, unicode(path)))
self.append(DirectoryNode(self.app, path, str(path)))
def add_directory(self, path):
self.app.add_directory(path)

View File

@ -39,5 +39,5 @@ class ProblemRow(Row):
Row.__init__(self, table)
self.dupe = dupe
self.msg = msg
self.path = unicode(dupe.path)
self.path = str(dupe.path)

View File

@ -63,7 +63,7 @@ class ResultTree(GUIObject, Tree):
def _select_nodes(self, nodes):
Tree._select_nodes(self, nodes)
self.app._select_dupes(map(attrgetter('_dupe'), nodes))
self.app._select_dupes(list(map(attrgetter('_dupe'), nodes)))
#--- Private
def _refresh(self):

View File

@ -22,7 +22,7 @@ class IgnoreList(object):
self._count = 0
def __iter__(self):
for first,seconds in self._ignored.iteritems():
for first,seconds in self._ignored.items():
for second in seconds:
yield (first,second)

View File

@ -147,7 +147,7 @@ class Results(Markable):
self.__filters.append(filter_str)
if self.__filtered_dupes is None:
self.__filtered_dupes = flatten(g[:] for g in self.groups)
self.__filtered_dupes = set(dupe for dupe in self.__filtered_dupes if filter_re.search(unicode(dupe.path)))
self.__filtered_dupes = set(dupe for dupe in self.__filtered_dupes if filter_re.search(str(dupe.path)))
filtered_groups = set()
for dupe in self.__filtered_dupes:
filtered_groups.add(self.get_group_of_duplicate(dupe))
@ -241,7 +241,7 @@ class Results(Markable):
func(dupe)
to_remove.append(dupe)
except EnvironmentError as e:
self.problems.append((dupe, unicode(e)))
self.problems.append((dupe, str(e)))
if remove_from_results:
self.remove_duplicates(to_remove)
self.mark_none()
@ -285,7 +285,7 @@ class Results(Markable):
words = ()
file_elem = etree.SubElement(group_elem, 'file')
try:
file_elem.set('path', unicode(d.path))
file_elem.set('path', str(d.path))
file_elem.set('words', ','.join(words))
except ValueError: # If there's an invalid character, just skip the file
file_elem.set('path', '')
@ -293,9 +293,9 @@ class Results(Markable):
file_elem.set('marked', ('y' if self.is_marked(d) else 'n'))
for match in g.matches:
match_elem = etree.SubElement(group_elem, 'match')
match_elem.set('first', unicode(dupe2index[match.first]))
match_elem.set('second', unicode(dupe2index[match.second]))
match_elem.set('percentage', unicode(int(match.percentage)))
match_elem.set('first', str(dupe2index[match.first]))
match_elem.set('second', str(dupe2index[match.second]))
match_elem.set('percentage', str(int(match.percentage)))
tree = etree.ElementTree(root)
with FileOrPath(outfile, 'wb') as fp:
tree.write(fp, encoding='utf-8')

View File

@ -1,109 +1,113 @@
# Created By: Virgil Dupras
# Created On: 2006/03/03
# Copyright 2010 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "HS" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/hs_license
import logging
from hscommon import job
from hsutil import io
from hsutil.misc import dedupe
from hsutil.str import get_file_ext, rem_file_ext
from . import engine
from .ignore import IgnoreList
(SCAN_TYPE_FILENAME,
SCAN_TYPE_FIELDS,
SCAN_TYPE_FIELDS_NO_ORDER,
SCAN_TYPE_TAG,
UNUSED, # Must not be removed. Constants here are what scan_type in the prefs are.
SCAN_TYPE_CONTENT,
SCAN_TYPE_CONTENT_AUDIO) = range(7)
SCANNABLE_TAGS = ['track', 'artist', 'album', 'title', 'genre', 'year']
class Scanner(object):
def __init__(self):
self.ignore_list = IgnoreList()
self.discarded_file_count = 0
def _getmatches(self, files, j):
if self.size_threshold:
j = j.start_subjob([2, 8])
for f in j.iter_with_progress(files, 'Read size of %d/%d files'):
f.size # pre-read, makes a smoother progress if read here (especially for bundles)
files = [f for f in files if f.size >= self.size_threshold]
if self.scan_type in (SCAN_TYPE_CONTENT, SCAN_TYPE_CONTENT_AUDIO):
sizeattr = 'size' if self.scan_type == SCAN_TYPE_CONTENT else 'audiosize'
return engine.getmatches_by_contents(files, sizeattr, partial=self.scan_type==SCAN_TYPE_CONTENT_AUDIO, j=j)
else:
j = j.start_subjob([2, 8])
kw = {}
kw['match_similar_words'] = self.match_similar_words
kw['weight_words'] = self.word_weighting
kw['min_match_percentage'] = self.min_match_percentage
if self.scan_type == SCAN_TYPE_FIELDS_NO_ORDER:
self.scan_type = SCAN_TYPE_FIELDS
kw['no_field_order'] = True
func = {
SCAN_TYPE_FILENAME: lambda f: engine.getwords(rem_file_ext(f.name)),
SCAN_TYPE_FIELDS: lambda f: engine.getfields(rem_file_ext(f.name)),
SCAN_TYPE_TAG: lambda f: [engine.getwords(unicode(getattr(f, attrname))) for attrname in SCANNABLE_TAGS if attrname in self.scanned_tags],
}[self.scan_type]
for f in j.iter_with_progress(files, 'Read metadata of %d/%d files'):
f.words = func(f)
return engine.getmatches(files, j=j, **kw)
@staticmethod
def _key_func(dupe):
return (not dupe.is_ref, -dupe.size)
@staticmethod
def _tie_breaker(ref, dupe):
refname = rem_file_ext(ref.name).lower()
dupename = rem_file_ext(dupe.name).lower()
if 'copy' in refname and 'copy' not in dupename:
return True
if refname.startswith(dupename) and (refname[len(dupename):].strip().isdigit()):
return True
return len(dupe.path) > len(ref.path)
def GetDupeGroups(self, files, j=job.nulljob):
j = j.start_subjob([8, 2])
for f in [f for f in files if not hasattr(f, 'is_ref')]:
f.is_ref = False
logging.info('Getting matches')
matches = self._getmatches(files, j)
logging.info('Found %d matches' % len(matches))
j.set_progress(100, 'Removing false matches')
if not self.mix_file_kind:
matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)]
matches = [m for m in matches if io.exists(m.first.path) and io.exists(m.second.path)]
if self.ignore_list:
j = j.start_subjob(2)
iter_matches = j.iter_with_progress(matches, 'Processed %d/%d matches against the ignore list')
matches = [m for m in iter_matches
if not self.ignore_list.AreIgnored(unicode(m.first.path), unicode(m.second.path))]
logging.info('Grouping matches')
groups = engine.get_groups(matches, j)
matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups)
groups = [g for g in groups if any(not f.is_ref for f in g)]
logging.info('Created %d groups' % len(groups))
j.set_progress(100, 'Doing group prioritization')
for g in groups:
g.prioritize(self._key_func, self._tie_breaker)
return groups
match_similar_words = False
min_match_percentage = 80
mix_file_kind = True
scan_type = SCAN_TYPE_FILENAME
scanned_tags = set(['artist', 'title'])
size_threshold = 0
word_weighting = False
# Created By: Virgil Dupras
# Created On: 2006/03/03
# Copyright 2010 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "HS" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/hs_license
import logging
from hscommon import job
from hsutil import io
from hsutil.misc import dedupe
from hsutil.str import get_file_ext, rem_file_ext
from . import engine
from .ignore import IgnoreList
(SCAN_TYPE_FILENAME,
SCAN_TYPE_FIELDS,
SCAN_TYPE_FIELDS_NO_ORDER,
SCAN_TYPE_TAG,
UNUSED, # Must not be removed. Constants here are what scan_type in the prefs are.
SCAN_TYPE_CONTENT,
SCAN_TYPE_CONTENT_AUDIO) = range(7)
SCANNABLE_TAGS = ['track', 'artist', 'album', 'title', 'genre', 'year']
class Scanner(object):
def __init__(self):
self.ignore_list = IgnoreList()
self.discarded_file_count = 0
def _getmatches(self, files, j):
if self.size_threshold:
j = j.start_subjob([2, 8])
for f in j.iter_with_progress(files, 'Read size of %d/%d files'):
f.size # pre-read, makes a smoother progress if read here (especially for bundles)
files = [f for f in files if f.size >= self.size_threshold]
if self.scan_type in (SCAN_TYPE_CONTENT, SCAN_TYPE_CONTENT_AUDIO):
sizeattr = 'size' if self.scan_type == SCAN_TYPE_CONTENT else 'audiosize'
return engine.getmatches_by_contents(files, sizeattr, partial=self.scan_type==SCAN_TYPE_CONTENT_AUDIO, j=j)
else:
j = j.start_subjob([2, 8])
kw = {}
kw['match_similar_words'] = self.match_similar_words
kw['weight_words'] = self.word_weighting
kw['min_match_percentage'] = self.min_match_percentage
if self.scan_type == SCAN_TYPE_FIELDS_NO_ORDER:
self.scan_type = SCAN_TYPE_FIELDS
kw['no_field_order'] = True
func = {
SCAN_TYPE_FILENAME: lambda f: engine.getwords(rem_file_ext(f.name)),
SCAN_TYPE_FIELDS: lambda f: engine.getfields(rem_file_ext(f.name)),
SCAN_TYPE_TAG: lambda f: [engine.getwords(str(getattr(f, attrname))) for attrname in SCANNABLE_TAGS if attrname in self.scanned_tags],
}[self.scan_type]
for f in j.iter_with_progress(files, 'Read metadata of %d/%d files'):
f.words = func(f)
return engine.getmatches(files, j=j, **kw)
@staticmethod
def _key_func(dupe):
return (not dupe.is_ref, -dupe.size)
@staticmethod
def _tie_breaker(ref, dupe):
refname = rem_file_ext(ref.name).lower()
dupename = rem_file_ext(dupe.name).lower()
if 'copy' in dupename:
return False
if 'copy' in refname:
return True
if dupename.startswith(refname) and (dupename[len(refname):].strip().isdigit()):
return False
if refname.startswith(dupename) and (refname[len(dupename):].strip().isdigit()):
return True
return len(dupe.path) > len(ref.path)
def GetDupeGroups(self, files, j=job.nulljob):
j = j.start_subjob([8, 2])
for f in [f for f in files if not hasattr(f, 'is_ref')]:
f.is_ref = False
logging.info('Getting matches')
matches = self._getmatches(files, j)
logging.info('Found %d matches' % len(matches))
j.set_progress(100, 'Removing false matches')
if not self.mix_file_kind:
matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)]
matches = [m for m in matches if io.exists(m.first.path) and io.exists(m.second.path)]
if self.ignore_list:
j = j.start_subjob(2)
iter_matches = j.iter_with_progress(matches, 'Processed %d/%d matches against the ignore list')
matches = [m for m in iter_matches
if not self.ignore_list.AreIgnored(str(m.first.path), str(m.second.path))]
logging.info('Grouping matches')
groups = engine.get_groups(matches, j)
matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups)
groups = [g for g in groups if any(not f.is_ref for f in g)]
logging.info('Created %d groups' % len(groups))
j.set_progress(100, 'Doing group prioritization')
for g in groups:
g.prioritize(self._key_func, self._tie_breaker)
return groups
match_similar_words = False
min_match_percentage = 80
mix_file_kind = True
scan_type = SCAN_TYPE_FILENAME
scanned_tags = set(['artist', 'title'])
size_threshold = 0
word_weighting = False

View File

@ -109,7 +109,7 @@ class TCDupeGuru(TestCase):
def test_Scan_with_objects_evaluating_to_false(self):
class FakeFile(fs.File):
def __nonzero__(self):
def __bool__(self):
return False
@ -200,11 +200,11 @@ class TCDupeGuruWithResults(TestCase):
if expected is not None:
expected = set(expected)
not_called = expected - calls
assert not not_called, u"These calls haven't been made: {0}".format(not_called)
assert not not_called, "These calls haven't been made: {0}".format(not_called)
if not_expected is not None:
not_expected = set(not_expected)
called = not_expected & calls
assert not called, u"These calls shouldn't have been made: {0}".format(called)
assert not called, "These calls shouldn't have been made: {0}".format(called)
gui.clear_calls()
def clear_gui_calls(self):
@ -409,9 +409,9 @@ class TCDupeGuruWithResults(TestCase):
def test_only_unicode_is_added_to_ignore_list(self):
def FakeIgnore(first,second):
if not isinstance(first,unicode):
if not isinstance(first,str):
self.fail()
if not isinstance(second,unicode):
if not isinstance(second,str):
self.fail()
app = self.app
@ -423,11 +423,11 @@ class TCDupeGuruWithResults(TestCase):
class TCDupeGuru_renameSelected(TestCase):
def setUp(self):
p = self.tmppath()
fp = open(unicode(p + 'foo bar 1'),mode='w')
fp = open(str(p + 'foo bar 1'),mode='w')
fp.close()
fp = open(unicode(p + 'foo bar 2'),mode='w')
fp = open(str(p + 'foo bar 2'),mode='w')
fp.close()
fp = open(unicode(p + 'foo bar 3'),mode='w')
fp = open(str(p + 'foo bar 3'),mode='w')
fp.close()
files = fs.get_files(p)
matches = engine.getmatches(files)

View File

@ -82,8 +82,8 @@ class TCDirectories(TestCase):
def test_AddPath_non_latin(self):
p = Path(self.tmpdir())
to_add = p + u'unicode\u201a'
os.mkdir(unicode(to_add))
to_add = p + 'unicode\u201a'
os.mkdir(str(to_add))
d = Directories()
try:
d.add_path(to_add)
@ -111,7 +111,7 @@ class TCDirectories(TestCase):
self.assertEqual(STATE_REFERENCE,d.get_state(p))
self.assertEqual(STATE_REFERENCE,d.get_state(p + 'dir1'))
self.assertEqual(1,len(d.states))
self.assertEqual(p,d.states.keys()[0])
self.assertEqual(p,list(d.states.keys())[0])
self.assertEqual(STATE_REFERENCE,d.states[p])
def test_get_state_with_path_not_there(self):
@ -213,11 +213,11 @@ class TCDirectories(TestCase):
def test_unicode_save(self):
d = Directories()
p1 = self.tmppath() + u'hello\xe9'
p1 = self.tmppath() + 'hello\xe9'
io.mkdir(p1)
io.mkdir(p1 + u'foo\xe9')
io.mkdir(p1 + 'foo\xe9')
d.add_path(p1)
d.set_state(p1 + u'foo\xe9', STATE_EXCLUDED)
d.set_state(p1 + 'foo\xe9', STATE_EXCLUDED)
tmpxml = op.join(self.tmpdir(), 'directories_testunit.xml')
try:
d.save_to_file(tmpxml)

View File

@ -62,12 +62,12 @@ class TCgetwords(TestCase):
def test_splitter_chars(self):
self.assertEqual(
[chr(i) for i in xrange(ord('a'),ord('z')+1)],
[chr(i) for i in range(ord('a'),ord('z')+1)],
getwords("a-b_c&d+e(f)g;h\\i[j]k{l}m:n.o,p<q>r/s?t~u!v@w#x$y*z")
)
def test_joiner_chars(self):
self.assertEqual(["aec"], getwords(u"a'e\u0301c"))
self.assertEqual(["aec"], getwords("a'e\u0301c"))
def test_empty(self):
self.assertEqual([], getwords(''))
@ -76,7 +76,7 @@ class TCgetwords(TestCase):
self.assertEqual(['foo', 'bar'], getwords('FOO BAR'))
def test_decompose_unicode(self):
self.assertEqual(getwords(u'foo\xe9bar'), ['fooebar'])
self.assertEqual(getwords('foo\xe9bar'), ['fooebar'])
class TCgetfields(TestCase):
@ -768,7 +768,7 @@ class TCget_groups(TestCase):
self.assert_(o3 in g)
def test_four_sized_group(self):
l = [NamedObject("foobar") for i in xrange(4)]
l = [NamedObject("foobar") for i in range(4)]
m = getmatches(l)
r = get_groups(m)
self.assertEqual(1,len(r))

View File

@ -6,7 +6,7 @@
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/hs_license
import cStringIO
import io
from lxml import etree
from hsutil.testutil import eq_
@ -59,7 +59,7 @@ def test_save_to_xml():
il.Ignore('foo','bar')
il.Ignore('foo','bleh')
il.Ignore('bleh','bar')
f = cStringIO.StringIO()
f = io.BytesIO()
il.save_to_xml(f)
f.seek(0)
doc = etree.parse(f)
@ -76,19 +76,19 @@ def test_SaveThenLoad():
il.Ignore('foo', 'bar')
il.Ignore('foo', 'bleh')
il.Ignore('bleh', 'bar')
il.Ignore(u'\u00e9', 'bar')
f = cStringIO.StringIO()
il.Ignore('\u00e9', 'bar')
f = io.BytesIO()
il.save_to_xml(f)
f.seek(0)
f.seek(0)
il = IgnoreList()
il.load_from_xml(f)
eq_(4,len(il))
assert il.AreIgnored(u'\u00e9','bar')
assert il.AreIgnored('\u00e9','bar')
def test_LoadXML_with_empty_file_tags():
f = cStringIO.StringIO()
f.write('<?xml version="1.0" encoding="utf-8"?><ignore_list><file><file/></file></ignore_list>')
f = io.BytesIO()
f.write(b'<?xml version="1.0" encoding="utf-8"?><ignore_list><file><file/></file></ignore_list>')
f.seek(0)
il = IgnoreList()
il.load_from_xml(f)
@ -130,12 +130,12 @@ def test_filter():
def test_save_with_non_ascii_items():
il = IgnoreList()
il.Ignore(u'\xac', u'\xbf')
f = cStringIO.StringIO()
il.Ignore('\xac', '\xbf')
f = io.BytesIO()
try:
il.save_to_xml(f)
except Exception as e:
raise AssertionError(unicode(e))
raise AssertionError(str(e))
def test_len():
il = IgnoreList()

View File

@ -7,7 +7,7 @@
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/hs_license
import StringIO
import io
import os.path as op
from lxml import etree
@ -25,7 +25,7 @@ class NamedObject(engine_test.NamedObject):
path = property(lambda x:Path('basepath') + x.name)
is_ref = False
def __nonzero__(self):
def __bool__(self):
return False #Make sure that operations are made correctly when the bool value of files is false.
# Returns a group set that looks like that:
@ -63,7 +63,7 @@ class TCResultsEmpty(TestCase):
self.assert_(self.results.get_group_of_duplicate('foo') is None)
def test_save_to_xml(self):
f = StringIO.StringIO()
f = io.BytesIO()
self.results.save_to_xml(f)
f.seek(0)
doc = etree.parse(f)
@ -324,7 +324,7 @@ class TCResultsMarkings(TestCase):
def test_SaveXML(self):
self.results.mark(self.objects[1])
self.results.mark_invert()
f = StringIO.StringIO()
f = io.BytesIO()
self.results.save_to_xml(f)
f.seek(0)
doc = etree.parse(f)
@ -345,7 +345,7 @@ class TCResultsMarkings(TestCase):
self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
self.results.mark(self.objects[1])
self.results.mark_invert()
f = StringIO.StringIO()
f = io.BytesIO()
self.results.save_to_xml(f)
f.seek(0)
r = Results(data)
@ -369,7 +369,7 @@ class TCResultsXML(TestCase):
def test_save_to_xml(self):
self.objects[0].is_ref = True
self.objects[0].words = [['foo','bar']]
f = StringIO.StringIO()
f = io.BytesIO()
self.results.save_to_xml(f)
f.seek(0)
doc = etree.parse(f)
@ -408,7 +408,7 @@ class TCResultsXML(TestCase):
self.objects[0].is_ref = True
self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
f = StringIO.StringIO()
f = io.BytesIO()
self.results.save_to_xml(f)
f.seek(0)
r = Results(data)
@ -451,7 +451,7 @@ class TCResultsXML(TestCase):
return [f for f in self.objects if str(f.path) == path][0]
self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
f = StringIO.StringIO()
f = io.BytesIO()
self.results.save_to_xml(f)
f.seek(0)
r = Results(data)
@ -490,7 +490,7 @@ class TCResultsXML(TestCase):
match_node.set('percentage', 'baz')
group_node = etree.SubElement(root, 'foobar') #invalid group
group_node = etree.SubElement(root, 'group') #empty group
f = StringIO.StringIO()
f = io.BytesIO()
tree = etree.ElementTree(root)
tree.write(f, encoding='utf-8')
f.seek(0)
@ -501,30 +501,30 @@ class TCResultsXML(TestCase):
def test_xml_non_ascii(self):
def get_file(path):
if path == op.join('basepath',u'\xe9foo bar'):
if path == op.join('basepath','\xe9foo bar'):
return objects[0]
if path == op.join('basepath',u'bar bleh'):
if path == op.join('basepath','bar bleh'):
return objects[1]
objects = [NamedObject(u"\xe9foo bar",True),NamedObject("bar bleh",True)]
objects = [NamedObject("\xe9foo bar",True),NamedObject("bar bleh",True)]
matches = engine.getmatches(objects) #we should have 5 matches
groups = engine.get_groups(matches) #We should have 2 groups
for g in groups:
g.prioritize(lambda x:objects.index(x)) #We want the dupes to be in the same order as the list is
results = Results(data)
results.groups = groups
f = StringIO.StringIO()
f = io.BytesIO()
results.save_to_xml(f)
f.seek(0)
r = Results(data)
r.load_from_xml(f,get_file)
g = r.groups[0]
self.assertEqual(u"\xe9foo bar",g[0].name)
self.assertEqual("\xe9foo bar",g[0].name)
self.assertEqual(['efoo','bar'],g[0].words)
def test_load_invalid_xml(self):
f = StringIO.StringIO()
f.write('<this is invalid')
f = io.BytesIO()
f.write(b'<this is invalid')
f.seek(0)
r = Results(data)
r.load_from_xml(f,None)
@ -546,7 +546,7 @@ class TCResultsXML(TestCase):
fake_matches.add(engine.Match(d1, d3, 43))
fake_matches.add(engine.Match(d2, d3, 46))
group.matches = fake_matches
f = StringIO.StringIO()
f = io.BytesIO()
results = self.results
results.save_to_xml(f)
f.seek(0)
@ -564,7 +564,7 @@ class TCResultsXML(TestCase):
def test_save_and_load(self):
# previously, when reloading matches, they wouldn't be reloaded as namedtuples
f = StringIO.StringIO()
f = io.BytesIO()
self.results.save_to_xml(f)
f.seek(0)
self.results.load_from_xml(f, self.get_file)
@ -572,13 +572,13 @@ class TCResultsXML(TestCase):
def test_apply_filter_works_on_paths(self):
# apply_filter() searches on the whole path, not just on the filename.
self.results.apply_filter(u'basepath')
self.results.apply_filter('basepath')
eq_(len(self.results.groups), 2)
def test_save_xml_with_invalid_characters(self):
# Don't crash when saving files that have invalid xml characters in their path
self.objects[0].name = u'foo\x19'
self.results.save_to_xml(StringIO.StringIO()) # don't crash
self.objects[0].name = 'foo\x19'
self.results.save_to_xml(io.BytesIO()) # don't crash
class TCResultsFilter(TestCase):

View File

@ -25,6 +25,9 @@ class NamedObject(object):
self.path = Path('')
self.words = getwords(name)
def __repr__(self):
return '<NamedObject %r>' % self.name
no = NamedObject
@ -297,8 +300,8 @@ class ScannerTestFakeFiles(TestCase):
s.scanned_tags = set(['title'])
o1 = no('foo')
o2 = no('bar')
o1.title = u'foobar\u00e9'
o2.title = u'foobar\u00e9'
o1.title = 'foobar\u00e9'
o2.title = 'foobar\u00e9'
try:
r = s.GetDupeGroups([o1, o2])
except UnicodeEncodeError:
@ -362,11 +365,11 @@ class ScannerTestFakeFiles(TestCase):
f1 = no('foobar')
f2 = no('foobar')
f3 = no('foobar')
f1.path = Path(u'foo1\u00e9')
f2.path = Path(u'foo2\u00e9')
f3.path = Path(u'foo3\u00e9')
s.ignore_list.Ignore(unicode(f1.path),unicode(f2.path))
s.ignore_list.Ignore(unicode(f1.path),unicode(f3.path))
f1.path = Path('foo1\u00e9')
f2.path = Path('foo2\u00e9')
f3.path = Path('foo3\u00e9')
s.ignore_list.Ignore(str(f1.path),str(f2.path))
s.ignore_list.Ignore(str(f1.path),str(f3.path))
r = s.GetDupeGroups([f1,f2,f3])
eq_(len(r), 1)
g = r[0]
@ -379,7 +382,7 @@ class ScannerTestFakeFiles(TestCase):
# A very wrong way to use any() was added at some point, causing resulting group list
# to be empty.
class FalseNamedObject(NamedObject):
def __nonzero__(self):
def __bool__(self):
return False

View File

@ -41,7 +41,7 @@ class DupeGuruME(DupeGuruBase):
try:
track.delete(timeout=0)
except CommandError as e:
logging.warning('Error while trying to remove a track from iTunes: %s' % unicode(e))
logging.warning('Error while trying to remove a track from iTunes: %s' % str(e))
self._start_job(JOB_REMOVE_DEAD_TRACKS, do)

View File

@ -42,12 +42,12 @@ class Mp3File(MusicFile):
HANDLED_EXTS = set(['mp3'])
def _read_info(self, field):
if field == 'md5partial':
fileinfo = mpeg.Mpeg(unicode(self.path))
fileinfo = mpeg.Mpeg(str(self.path))
self._md5partial_offset = fileinfo.audio_offset
self._md5partial_size = fileinfo.audio_size
MusicFile._read_info(self, field)
if field in TAG_FIELDS:
fileinfo = mpeg.Mpeg(unicode(self.path))
fileinfo = mpeg.Mpeg(str(self.path))
self.audiosize = fileinfo.audio_size
self.bitrate = fileinfo.bitrate
self.duration = fileinfo.duration
@ -70,12 +70,12 @@ class WmaFile(MusicFile):
HANDLED_EXTS = set(['wma'])
def _read_info(self, field):
if field == 'md5partial':
dec = wma.WMADecoder(unicode(self.path))
dec = wma.WMADecoder(str(self.path))
self._md5partial_offset = dec.audio_offset
self._md5partial_size = dec.audio_size
MusicFile._read_info(self, field)
if field in TAG_FIELDS:
dec = wma.WMADecoder(unicode(self.path))
dec = wma.WMADecoder(str(self.path))
self.audiosize = dec.audio_size
self.bitrate = dec.bitrate
self.duration = dec.duration
@ -92,13 +92,13 @@ class Mp4File(MusicFile):
HANDLED_EXTS = set(['m4a', 'm4p'])
def _read_info(self, field):
if field == 'md5partial':
dec = mp4.File(unicode(self.path))
dec = mp4.File(str(self.path))
self._md5partial_offset = dec.audio_offset
self._md5partial_size = dec.audio_size