From 854d194f889f2f6165c0348df1f4083d6fa0e498 Mon Sep 17 00:00:00 2001 From: Virgil Dupras Date: Wed, 11 Aug 2010 16:39:06 +0200 Subject: [PATCH] Converted to py3k. There's probably some bugs still. So far, I managed to run dupeGuru SE under pyobjc and qt. --- build.py | 20 +-- cocoa/pe/dg_cocoa.py | 4 +- cocoa/se/dg_cocoa.py | 6 +- configure.py | 2 +- core/app.py | 16 +-- core/app_cocoa.py | 4 +- core/data.py | 4 +- core/directories.py | 8 +- core/engine.py | 12 +- core/export.py | 18 +-- core/fs.py | 8 +- core/gui/details_panel.py | 2 +- core/gui/directory_tree.py | 2 +- core/gui/problem_table.py | 2 +- core/gui/result_tree.py | 2 +- core/ignore.py | 2 +- core/results.py | 12 +- core/scanner.py | 222 +++++++++++++++++---------------- core/tests/app_test.py | 16 +-- core/tests/directories_test.py | 12 +- core/tests/engine_test.py | 8 +- core/tests/ignore_test.py | 20 +-- core/tests/results_test.py | 42 +++---- core/tests/scanner_test.py | 19 +-- core_me/app_cocoa.py | 2 +- core_me/fs.py | 24 ++-- core_pe/app_cocoa.py | 18 +-- core_pe/block.py | 2 +- core_pe/cache.py | 4 +- core_pe/gen.py | 4 +- core_pe/matchbase.py | 6 +- core_pe/modules/block.c | 54 +++++--- core_pe/modules/block_osx.m | 32 +++-- core_pe/modules/cache.c | 24 +++- core_pe/modules/common.c | 2 +- core_pe/tests/block_test.py | 4 +- core_pe/tests/cache_test.py | 2 - core_se/app_cocoa.py | 8 +- core_se/fs.py | 2 +- package.py | 8 +- qt/base/app.py | 8 +- qt/base/directories_dialog.py | 4 +- qt/base/directories_model.py | 8 +- qt/base/main_window.py | 12 +- qt/base/platform.py | 6 +- qt/base/platform_lnx.py | 2 +- qt/base/platform_osx.py | 2 +- qt/base/platform_win.py | 2 +- qt/base/results_model.py | 2 +- qt/me/preferences_dialog.py | 2 +- qt/pe/app.py | 6 +- qt/pe/details_dialog.py | 4 +- qt/pe/gen.py | 2 +- qt/pe/preferences_dialog.py | 2 +- qt/se/preferences_dialog.py | 4 +- qt/se/start.py | 2 + run.py | 5 +- 57 files changed, 394 insertions(+), 338 deletions(-) diff --git a/build.py b/build.py index 6ceab378..4c6e5825 100644 --- a/build.py +++ b/build.py @@ -20,10 +20,10 @@ from hscommon.build import add_to_pythonpath, print_and_do, build_all_qt_ui, cop def build_cocoa(edition, dev, help_destpath): if not dev: - print "Building help index" + print("Building help index") os.system('open -a /Developer/Applications/Utilities/Help\\ Indexer.app {0}'.format(help_destpath)) - print "Building dg_cocoa.plugin" + print("Building dg_cocoa.plugin") if op.exists('build'): shutil.rmtree('build') os.mkdir('build') @@ -54,7 +54,7 @@ def build_cocoa(edition, dev, help_destpath): pthpath = op.join(pluginpath, 'Contents/Resources/dev.pth') open(pthpath, 'w').write(op.abspath('.')) os.chdir(cocoa_project_path) - print "Building the XCode project" + print("Building the XCode project") args = [] if dev: args.append('-configuration dev') @@ -68,10 +68,10 @@ def build_qt(edition, dev): build_all_qt_ui(op.join('qtlib', 'ui')) build_all_qt_ui(op.join('qt', 'base')) build_all_qt_ui(op.join('qt', edition)) - print_and_do("pyrcc4 {0} > {1}".format(op.join('qt', 'base', 'dg.qrc'), op.join('qt', 'base', 'dg_rc.py'))) + print_and_do("pyrcc4 -py3 {0} > {1}".format(op.join('qt', 'base', 'dg.qrc'), op.join('qt', 'base', 'dg_rc.py'))) if edition == 'pe': os.chdir(op.join('qt', edition)) - os.system('python gen.py') + os.system('python3 gen.py') os.chdir(op.join('..', '..')) def main(): @@ -79,11 +79,11 @@ def main(): edition = conf['edition'] ui = conf['ui'] dev = conf['dev'] - print "Building dupeGuru {0} with UI {1}".format(edition.upper(), ui) + print("Building dupeGuru {0} with UI {1}".format(edition.upper(), ui)) if dev: - print "Building in Dev mode" + print("Building in Dev mode") add_to_pythonpath('.') - print "Generating Help" + print("Generating Help") windows = sys.platform == 'win32' profile = 'win_en' if windows else 'osx_en' help_dir = 'help_{0}'.format(edition) @@ -91,10 +91,10 @@ def main(): help_basepath = op.abspath(help_dir) help_destpath = op.abspath(op.join(help_dir, dest_dir)) helpgen.gen(help_basepath, help_destpath, profile=profile) - print "Building dupeGuru" + print("Building dupeGuru") if edition == 'pe': os.chdir('core_pe') - os.system('python gen.py') + os.system('python3 gen.py') os.chdir('..') if ui == 'cocoa': build_cocoa(edition, dev, help_destpath) diff --git a/cocoa/pe/dg_cocoa.py b/cocoa/pe/dg_cocoa.py index 43921a17..971bfd01 100644 --- a/cocoa/pe/dg_cocoa.py +++ b/cocoa/pe/dg_cocoa.py @@ -23,10 +23,10 @@ class PyDupeGuru(PyDupeGuruBase): #---Information def getSelectedDupePath(self): - return unicode(self.py.selected_dupe_path()) + return str(self.py.selected_dupe_path()) def getSelectedDupeRefPath(self): - return unicode(self.py.selected_dupe_ref_path()) + return str(self.py.selected_dupe_ref_path()) #---Properties def setMatchScaled_(self,match_scaled): diff --git a/cocoa/se/dg_cocoa.py b/cocoa/se/dg_cocoa.py index 940b7588..0ca6c33a 100644 --- a/cocoa/se/dg_cocoa.py +++ b/cocoa/se/dg_cocoa.py @@ -11,8 +11,10 @@ from core.app_cocoa_inter import PyDupeGuruBase, PyDetailsPanel from core_se.app_cocoa import DupeGuru # Fix py2app imports with chokes on relative imports and other stuff -from core_se import fs, data -from lxml import etree, _elementpath +import hsutil.conflict +import core.engine, core.fs, core.app +import core_se.fs, core_se.data +import lxml.etree, lxml._elementpath import gzip class PyDupeGuru(PyDupeGuruBase): diff --git a/configure.py b/configure.py index 1ecc4704..d866ae86 100644 --- a/configure.py +++ b/configure.py @@ -18,7 +18,7 @@ def main(edition, ui, dev): if ui not in ('cocoa', 'qt'): ui = 'cocoa' if sys.platform == 'darwin' else 'qt' build_type = 'Dev' if dev else 'Release' - print "Configuring dupeGuru {0} for UI {1} ({2})".format(edition.upper(), ui, build_type) + print("Configuring dupeGuru {0} for UI {1} ({2})".format(edition.upper(), ui, build_type)) conf = { 'edition': edition, 'ui': ui, diff --git a/core/app.py b/core/app.py index 473b5aca..a8093507 100644 --- a/core/app.py +++ b/core/app.py @@ -6,7 +6,7 @@ # which should be included with this package. The terms are also available at # http://www.hardcoded.net/licenses/hs_license -from __future__ import unicode_literals + import os import os.path as op @@ -76,7 +76,7 @@ class DupeGuru(RegistrableApplication, Broadcaster): def _do_delete_dupe(self, dupe): if not io.exists(dupe.path): return - send2trash(unicode(dupe.path)) # Raises OSError when there's a problem + send2trash(str(dupe.path)) # Raises OSError when there's a problem self.clean_empty_dirs(dupe.path[:-1]) def _do_load(self, j): @@ -100,7 +100,7 @@ class DupeGuru(RegistrableApplication, Broadcaster): try: return self.data.GetDisplayInfo(dupe, group, delta) except Exception as e: - logging.warning("Exception on GetDisplayInfo for %s: %s", unicode(dupe.path), unicode(e)) + logging.warning("Exception on GetDisplayInfo for %s: %s", str(dupe.path), str(e)) return ['---'] * len(self.data.COLUMNS) def _get_file(self, str_path): @@ -149,7 +149,7 @@ class DupeGuru(RegistrableApplication, Broadcaster): g = self.results.get_group_of_duplicate(dupe) for other in g: if other is not dupe: - self.scanner.ignore_list.Ignore(unicode(other.path), unicode(dupe.path)) + self.scanner.ignore_list.Ignore(str(other.path), str(dupe.path)) self.remove_duplicates(dupes) def apply_filter(self, filter): @@ -208,7 +208,7 @@ class DupeGuru(RegistrableApplication, Broadcaster): def export_to_xhtml(self, column_ids): column_ids = [colid for colid in column_ids if colid.isdigit()] - column_ids = map(int, column_ids) + column_ids = list(map(int, column_ids)) column_ids.sort() colnames = [col['display'] for i, col in enumerate(self.data.COLUMNS) if i in column_ids] rows = [] @@ -232,8 +232,8 @@ class DupeGuru(RegistrableApplication, Broadcaster): dupe = self.selected_dupes[0] group = self.results.get_group_of_duplicate(dupe) ref = group.ref - cmd = cmd.replace('%d', unicode(dupe.path)) - cmd = cmd.replace('%r', unicode(ref.path)) + cmd = cmd.replace('%d', str(dupe.path)) + cmd = cmd.replace('%r', str(ref.path)) match = re.match(r'"([^"]+)"(.*)', cmd) if match is not None: # This code here is because subprocess. Popen doesn't seem to accept, under Windows, @@ -313,7 +313,7 @@ class DupeGuru(RegistrableApplication, Broadcaster): d.rename(newname) return True except (IndexError, fs.FSError) as e: - logging.warning("dupeGuru Warning: %s" % unicode(e)) + logging.warning("dupeGuru Warning: %s" % str(e)) return False def reveal_selected(self): diff --git a/core/app_cocoa.py b/core/app_cocoa.py index 40886808..3ef49c3d 100644 --- a/core/app_cocoa.py +++ b/core/app_cocoa.py @@ -49,11 +49,11 @@ class DupeGuru(app.DupeGuru): #--- Override @staticmethod def _open_path(path): - NSWorkspace.sharedWorkspace().openFile_(unicode(path)) + NSWorkspace.sharedWorkspace().openFile_(str(path)) @staticmethod def _reveal_path(path): - NSWorkspace.sharedWorkspace().selectFile_inFileViewerRootedAtPath_(unicode(path), '') + NSWorkspace.sharedWorkspace().selectFile_inFileViewerRootedAtPath_(str(path), '') def _start_job(self, jobid, func): try: diff --git a/core/data.py b/core/data.py index 48077efb..d5d45bd3 100644 --- a/core/data.py +++ b/core/data.py @@ -11,7 +11,7 @@ from hsutil.str import format_time, FT_DECIMAL, format_size import time def format_path(p): - return unicode(p[:-1]) + return str(p[:-1]) def format_timestamp(t, delta): if delta: @@ -38,4 +38,4 @@ def format_dupe_count(c): return str(c) if c else '---' def cmp_value(value): - return value.lower() if isinstance(value, basestring) else value + return value.lower() if isinstance(value, str) else value diff --git a/core/directories.py b/core/directories.py index 7fa75dd1..fa7202bf 100644 --- a/core/directories.py +++ b/core/directories.py @@ -151,11 +151,11 @@ class Directories(object): root = etree.Element('directories') for root_path in self: root_path_node = etree.SubElement(root, 'root_directory') - root_path_node.set('path', unicode(root_path)) - for path, state in self.states.iteritems(): + root_path_node.set('path', str(root_path)) + for path, state in self.states.items(): state_node = etree.SubElement(root, 'state') - state_node.set('path', unicode(path)) - state_node.set('value', unicode(state)) + state_node.set('path', str(path)) + state_node.set('value', str(state)) tree = etree.ElementTree(root) tree.write(fp, encoding='utf-8') diff --git a/core/engine.py b/core/engine.py index 9d758b98..18817ca3 100644 --- a/core/engine.py +++ b/core/engine.py @@ -6,7 +6,7 @@ # which should be included with this package. The terms are also available at # http://www.hardcoded.net/licenses/hs_license -from __future__ import division + import difflib import itertools import logging @@ -25,15 +25,15 @@ NO_FIELD_ORDER) = range(3) JOB_REFRESH_RATE = 100 def getwords(s): - if isinstance(s, unicode): + if isinstance(s, str): s = normalize('NFD', s) s = multi_replace(s, "-_&+():;\\[]{}.,<>/?~!@#$*", ' ').lower() s = ''.join(c for c in s if c in string.ascii_letters + string.digits + string.whitespace) - return filter(None, s.split(' ')) # filter() is to remove empty elements + return [_f for _f in s.split(' ') if _f] # remove empty elements def getfields(s): fields = [getwords(field) for field in s.split(' - ')] - return filter(None, fields) + return [_f for _f in fields if _f] def unpack_fields(fields): result = [] @@ -118,7 +118,7 @@ def build_word_dict(objects, j=job.nulljob): def merge_similar_words(word_dict): """Take all keys in word_dict that are similar, and merge them together. """ - keys = word_dict.keys() + keys = list(word_dict.keys()) keys.sort(key=len)# we want the shortest word to stay while keys: key = keys.pop(0) @@ -138,7 +138,7 @@ def reduce_common_words(word_dict, threshold): Because if we remove them, we will miss some duplicates! """ uncommon_words = set(word for word, objects in word_dict.items() if len(objects) < threshold) - for word, objects in word_dict.items(): + for word, objects in list(word_dict.items()): if len(objects) < threshold: continue reduced = set() diff --git a/core/export.py b/core/export.py index 0e29a71e..1f484d4d 100644 --- a/core/export.py +++ b/core/export.py @@ -13,7 +13,7 @@ from tempfile import mkdtemp # Yes, this is a very low-tech solution, but at least it doesn't have all these annoying dependency # and resource problems. -MAIN_TEMPLATE = u""" +MAIN_TEMPLATE = """ @@ -104,33 +104,33 @@ $rows """ -COLHEADERS_TEMPLATE = u"{name}" +COLHEADERS_TEMPLATE = "{name}" -ROW_TEMPLATE = u""" +ROW_TEMPLATE = """ {filename}{cells} """ -CELL_TEMPLATE = u"""{value}""" +CELL_TEMPLATE = """{value}""" def export_to_xhtml(colnames, rows): # a row is a list of values with the first value being a flag indicating if the row should be indented if rows: assert len(rows[0]) == len(colnames) + 1 # + 1 is for the "indented" flag - colheaders = u''.join(COLHEADERS_TEMPLATE.format(name=name) for name in colnames) + colheaders = ''.join(COLHEADERS_TEMPLATE.format(name=name) for name in colnames) rendered_rows = [] for row in rows: # [2:] is to remove the indented flag + filename - indented = u'indented' if row[0] else u'' + indented = 'indented' if row[0] else '' filename = row[1] - cells = u''.join(CELL_TEMPLATE.format(value=value) for value in row[2:]) + cells = ''.join(CELL_TEMPLATE.format(value=value) for value in row[2:]) rendered_rows.append(ROW_TEMPLATE.format(indented=indented, filename=filename, cells=cells)) - rendered_rows = u''.join(rendered_rows) + rendered_rows = ''.join(rendered_rows) # The main template can't use format because the css code uses {} content = MAIN_TEMPLATE.replace('$colheaders', colheaders).replace('$rows', rendered_rows) folder = mkdtemp() - destpath = op.join(folder, u'export.htm') + destpath = op.join(folder, 'export.htm') fp = open(destpath, 'w') fp.write(content.encode('utf-8')) fp.close() diff --git a/core/fs.py b/core/fs.py index 3cb5b0e0..1723f5e7 100644 --- a/core/fs.py +++ b/core/fs.py @@ -12,7 +12,7 @@ # resulting needless complexity and memory usage. It's been a while since I wanted to do that fork, # and I'm doing it now. -from __future__ import unicode_literals + import hashlib import logging @@ -25,13 +25,13 @@ class FSError(Exception): cls_message = "An error has occured on '{name}' in '{parent}'" def __init__(self, fsobject, parent=None): message = self.cls_message - if isinstance(fsobject, basestring): + if isinstance(fsobject, str): name = fsobject elif isinstance(fsobject, File): name = fsobject.name else: name = '' - parentname = unicode(parent) if parent is not None else '' + parentname = str(parent) if parent is not None else '' Exception.__init__(self, message.format(name=name, parent=parentname)) @@ -119,7 +119,7 @@ class File(object): If `attrnames` is not None, caches only attrnames. """ if attrnames is None: - attrnames = self.INITIAL_INFO.keys() + attrnames = list(self.INITIAL_INFO.keys()) for attrname in attrnames: if attrname not in self.__dict__: self._read_info(attrname) diff --git a/core/gui/details_panel.py b/core/gui/details_panel.py index 3f42324b..263e6855 100644 --- a/core/gui/details_panel.py +++ b/core/gui/details_panel.py @@ -32,7 +32,7 @@ class DetailsPanel(GUIObject): ref = group.ref if group is not None and group.ref is not dupe else None l2 = self.app._get_display_info(ref, group, False) names = [c['display'] for c in self.app.data.COLUMNS] - self._table = zip(names, l1, l2) + self._table = list(zip(names, l1, l2)) #--- Public def row_count(self): diff --git a/core/gui/directory_tree.py b/core/gui/directory_tree.py index d956adb6..5c48a7e3 100644 --- a/core/gui/directory_tree.py +++ b/core/gui/directory_tree.py @@ -62,7 +62,7 @@ class DirectoryTree(GUIObject, Tree): def _refresh(self): self.clear() for path in self.app.directories: - self.append(DirectoryNode(self.app, path, unicode(path))) + self.append(DirectoryNode(self.app, path, str(path))) def add_directory(self, path): self.app.add_directory(path) diff --git a/core/gui/problem_table.py b/core/gui/problem_table.py index 981fc46f..acdb58ad 100644 --- a/core/gui/problem_table.py +++ b/core/gui/problem_table.py @@ -39,5 +39,5 @@ class ProblemRow(Row): Row.__init__(self, table) self.dupe = dupe self.msg = msg - self.path = unicode(dupe.path) + self.path = str(dupe.path) diff --git a/core/gui/result_tree.py b/core/gui/result_tree.py index c7bfd24f..f1d9a616 100644 --- a/core/gui/result_tree.py +++ b/core/gui/result_tree.py @@ -63,7 +63,7 @@ class ResultTree(GUIObject, Tree): def _select_nodes(self, nodes): Tree._select_nodes(self, nodes) - self.app._select_dupes(map(attrgetter('_dupe'), nodes)) + self.app._select_dupes(list(map(attrgetter('_dupe'), nodes))) #--- Private def _refresh(self): diff --git a/core/ignore.py b/core/ignore.py index d51a579b..6cdb5395 100644 --- a/core/ignore.py +++ b/core/ignore.py @@ -22,7 +22,7 @@ class IgnoreList(object): self._count = 0 def __iter__(self): - for first,seconds in self._ignored.iteritems(): + for first,seconds in self._ignored.items(): for second in seconds: yield (first,second) diff --git a/core/results.py b/core/results.py index 9c74f1de..1f5c364e 100644 --- a/core/results.py +++ b/core/results.py @@ -147,7 +147,7 @@ class Results(Markable): self.__filters.append(filter_str) if self.__filtered_dupes is None: self.__filtered_dupes = flatten(g[:] for g in self.groups) - self.__filtered_dupes = set(dupe for dupe in self.__filtered_dupes if filter_re.search(unicode(dupe.path))) + self.__filtered_dupes = set(dupe for dupe in self.__filtered_dupes if filter_re.search(str(dupe.path))) filtered_groups = set() for dupe in self.__filtered_dupes: filtered_groups.add(self.get_group_of_duplicate(dupe)) @@ -241,7 +241,7 @@ class Results(Markable): func(dupe) to_remove.append(dupe) except EnvironmentError as e: - self.problems.append((dupe, unicode(e))) + self.problems.append((dupe, str(e))) if remove_from_results: self.remove_duplicates(to_remove) self.mark_none() @@ -285,7 +285,7 @@ class Results(Markable): words = () file_elem = etree.SubElement(group_elem, 'file') try: - file_elem.set('path', unicode(d.path)) + file_elem.set('path', str(d.path)) file_elem.set('words', ','.join(words)) except ValueError: # If there's an invalid character, just skip the file file_elem.set('path', '') @@ -293,9 +293,9 @@ class Results(Markable): file_elem.set('marked', ('y' if self.is_marked(d) else 'n')) for match in g.matches: match_elem = etree.SubElement(group_elem, 'match') - match_elem.set('first', unicode(dupe2index[match.first])) - match_elem.set('second', unicode(dupe2index[match.second])) - match_elem.set('percentage', unicode(int(match.percentage))) + match_elem.set('first', str(dupe2index[match.first])) + match_elem.set('second', str(dupe2index[match.second])) + match_elem.set('percentage', str(int(match.percentage))) tree = etree.ElementTree(root) with FileOrPath(outfile, 'wb') as fp: tree.write(fp, encoding='utf-8') diff --git a/core/scanner.py b/core/scanner.py index e9d739ca..e4eaab95 100644 --- a/core/scanner.py +++ b/core/scanner.py @@ -1,109 +1,113 @@ -# Created By: Virgil Dupras -# Created On: 2006/03/03 -# Copyright 2010 Hardcoded Software (http://www.hardcoded.net) -# -# This software is licensed under the "HS" License as described in the "LICENSE" file, -# which should be included with this package. The terms are also available at -# http://www.hardcoded.net/licenses/hs_license - -import logging - - -from hscommon import job -from hsutil import io -from hsutil.misc import dedupe -from hsutil.str import get_file_ext, rem_file_ext - -from . import engine -from .ignore import IgnoreList - -(SCAN_TYPE_FILENAME, -SCAN_TYPE_FIELDS, -SCAN_TYPE_FIELDS_NO_ORDER, -SCAN_TYPE_TAG, -UNUSED, # Must not be removed. Constants here are what scan_type in the prefs are. -SCAN_TYPE_CONTENT, -SCAN_TYPE_CONTENT_AUDIO) = range(7) - -SCANNABLE_TAGS = ['track', 'artist', 'album', 'title', 'genre', 'year'] - -class Scanner(object): - def __init__(self): - self.ignore_list = IgnoreList() - self.discarded_file_count = 0 - - def _getmatches(self, files, j): - if self.size_threshold: - j = j.start_subjob([2, 8]) - for f in j.iter_with_progress(files, 'Read size of %d/%d files'): - f.size # pre-read, makes a smoother progress if read here (especially for bundles) - files = [f for f in files if f.size >= self.size_threshold] - if self.scan_type in (SCAN_TYPE_CONTENT, SCAN_TYPE_CONTENT_AUDIO): - sizeattr = 'size' if self.scan_type == SCAN_TYPE_CONTENT else 'audiosize' - return engine.getmatches_by_contents(files, sizeattr, partial=self.scan_type==SCAN_TYPE_CONTENT_AUDIO, j=j) - else: - j = j.start_subjob([2, 8]) - kw = {} - kw['match_similar_words'] = self.match_similar_words - kw['weight_words'] = self.word_weighting - kw['min_match_percentage'] = self.min_match_percentage - if self.scan_type == SCAN_TYPE_FIELDS_NO_ORDER: - self.scan_type = SCAN_TYPE_FIELDS - kw['no_field_order'] = True - func = { - SCAN_TYPE_FILENAME: lambda f: engine.getwords(rem_file_ext(f.name)), - SCAN_TYPE_FIELDS: lambda f: engine.getfields(rem_file_ext(f.name)), - SCAN_TYPE_TAG: lambda f: [engine.getwords(unicode(getattr(f, attrname))) for attrname in SCANNABLE_TAGS if attrname in self.scanned_tags], - }[self.scan_type] - for f in j.iter_with_progress(files, 'Read metadata of %d/%d files'): - f.words = func(f) - return engine.getmatches(files, j=j, **kw) - - @staticmethod - def _key_func(dupe): - return (not dupe.is_ref, -dupe.size) - - @staticmethod - def _tie_breaker(ref, dupe): - refname = rem_file_ext(ref.name).lower() - dupename = rem_file_ext(dupe.name).lower() - if 'copy' in refname and 'copy' not in dupename: - return True - if refname.startswith(dupename) and (refname[len(dupename):].strip().isdigit()): - return True - return len(dupe.path) > len(ref.path) - - def GetDupeGroups(self, files, j=job.nulljob): - j = j.start_subjob([8, 2]) - for f in [f for f in files if not hasattr(f, 'is_ref')]: - f.is_ref = False - logging.info('Getting matches') - matches = self._getmatches(files, j) - logging.info('Found %d matches' % len(matches)) - j.set_progress(100, 'Removing false matches') - if not self.mix_file_kind: - matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)] - matches = [m for m in matches if io.exists(m.first.path) and io.exists(m.second.path)] - if self.ignore_list: - j = j.start_subjob(2) - iter_matches = j.iter_with_progress(matches, 'Processed %d/%d matches against the ignore list') - matches = [m for m in iter_matches - if not self.ignore_list.AreIgnored(unicode(m.first.path), unicode(m.second.path))] - logging.info('Grouping matches') - groups = engine.get_groups(matches, j) - matched_files = dedupe([m.first for m in matches] + [m.second for m in matches]) - self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups) - groups = [g for g in groups if any(not f.is_ref for f in g)] - logging.info('Created %d groups' % len(groups)) - j.set_progress(100, 'Doing group prioritization') - for g in groups: - g.prioritize(self._key_func, self._tie_breaker) - return groups - - match_similar_words = False - min_match_percentage = 80 - mix_file_kind = True - scan_type = SCAN_TYPE_FILENAME - scanned_tags = set(['artist', 'title']) - size_threshold = 0 - word_weighting = False +# Created By: Virgil Dupras +# Created On: 2006/03/03 +# Copyright 2010 Hardcoded Software (http://www.hardcoded.net) +# +# This software is licensed under the "HS" License as described in the "LICENSE" file, +# which should be included with this package. The terms are also available at +# http://www.hardcoded.net/licenses/hs_license + +import logging + + +from hscommon import job +from hsutil import io +from hsutil.misc import dedupe +from hsutil.str import get_file_ext, rem_file_ext + +from . import engine +from .ignore import IgnoreList + +(SCAN_TYPE_FILENAME, +SCAN_TYPE_FIELDS, +SCAN_TYPE_FIELDS_NO_ORDER, +SCAN_TYPE_TAG, +UNUSED, # Must not be removed. Constants here are what scan_type in the prefs are. +SCAN_TYPE_CONTENT, +SCAN_TYPE_CONTENT_AUDIO) = range(7) + +SCANNABLE_TAGS = ['track', 'artist', 'album', 'title', 'genre', 'year'] + +class Scanner(object): + def __init__(self): + self.ignore_list = IgnoreList() + self.discarded_file_count = 0 + + def _getmatches(self, files, j): + if self.size_threshold: + j = j.start_subjob([2, 8]) + for f in j.iter_with_progress(files, 'Read size of %d/%d files'): + f.size # pre-read, makes a smoother progress if read here (especially for bundles) + files = [f for f in files if f.size >= self.size_threshold] + if self.scan_type in (SCAN_TYPE_CONTENT, SCAN_TYPE_CONTENT_AUDIO): + sizeattr = 'size' if self.scan_type == SCAN_TYPE_CONTENT else 'audiosize' + return engine.getmatches_by_contents(files, sizeattr, partial=self.scan_type==SCAN_TYPE_CONTENT_AUDIO, j=j) + else: + j = j.start_subjob([2, 8]) + kw = {} + kw['match_similar_words'] = self.match_similar_words + kw['weight_words'] = self.word_weighting + kw['min_match_percentage'] = self.min_match_percentage + if self.scan_type == SCAN_TYPE_FIELDS_NO_ORDER: + self.scan_type = SCAN_TYPE_FIELDS + kw['no_field_order'] = True + func = { + SCAN_TYPE_FILENAME: lambda f: engine.getwords(rem_file_ext(f.name)), + SCAN_TYPE_FIELDS: lambda f: engine.getfields(rem_file_ext(f.name)), + SCAN_TYPE_TAG: lambda f: [engine.getwords(str(getattr(f, attrname))) for attrname in SCANNABLE_TAGS if attrname in self.scanned_tags], + }[self.scan_type] + for f in j.iter_with_progress(files, 'Read metadata of %d/%d files'): + f.words = func(f) + return engine.getmatches(files, j=j, **kw) + + @staticmethod + def _key_func(dupe): + return (not dupe.is_ref, -dupe.size) + + @staticmethod + def _tie_breaker(ref, dupe): + refname = rem_file_ext(ref.name).lower() + dupename = rem_file_ext(dupe.name).lower() + if 'copy' in dupename: + return False + if 'copy' in refname: + return True + if dupename.startswith(refname) and (dupename[len(refname):].strip().isdigit()): + return False + if refname.startswith(dupename) and (refname[len(dupename):].strip().isdigit()): + return True + return len(dupe.path) > len(ref.path) + + def GetDupeGroups(self, files, j=job.nulljob): + j = j.start_subjob([8, 2]) + for f in [f for f in files if not hasattr(f, 'is_ref')]: + f.is_ref = False + logging.info('Getting matches') + matches = self._getmatches(files, j) + logging.info('Found %d matches' % len(matches)) + j.set_progress(100, 'Removing false matches') + if not self.mix_file_kind: + matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)] + matches = [m for m in matches if io.exists(m.first.path) and io.exists(m.second.path)] + if self.ignore_list: + j = j.start_subjob(2) + iter_matches = j.iter_with_progress(matches, 'Processed %d/%d matches against the ignore list') + matches = [m for m in iter_matches + if not self.ignore_list.AreIgnored(str(m.first.path), str(m.second.path))] + logging.info('Grouping matches') + groups = engine.get_groups(matches, j) + matched_files = dedupe([m.first for m in matches] + [m.second for m in matches]) + self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups) + groups = [g for g in groups if any(not f.is_ref for f in g)] + logging.info('Created %d groups' % len(groups)) + j.set_progress(100, 'Doing group prioritization') + for g in groups: + g.prioritize(self._key_func, self._tie_breaker) + return groups + + match_similar_words = False + min_match_percentage = 80 + mix_file_kind = True + scan_type = SCAN_TYPE_FILENAME + scanned_tags = set(['artist', 'title']) + size_threshold = 0 + word_weighting = False diff --git a/core/tests/app_test.py b/core/tests/app_test.py index c09fbd98..29c4f7e4 100644 --- a/core/tests/app_test.py +++ b/core/tests/app_test.py @@ -109,7 +109,7 @@ class TCDupeGuru(TestCase): def test_Scan_with_objects_evaluating_to_false(self): class FakeFile(fs.File): - def __nonzero__(self): + def __bool__(self): return False @@ -200,11 +200,11 @@ class TCDupeGuruWithResults(TestCase): if expected is not None: expected = set(expected) not_called = expected - calls - assert not not_called, u"These calls haven't been made: {0}".format(not_called) + assert not not_called, "These calls haven't been made: {0}".format(not_called) if not_expected is not None: not_expected = set(not_expected) called = not_expected & calls - assert not called, u"These calls shouldn't have been made: {0}".format(called) + assert not called, "These calls shouldn't have been made: {0}".format(called) gui.clear_calls() def clear_gui_calls(self): @@ -409,9 +409,9 @@ class TCDupeGuruWithResults(TestCase): def test_only_unicode_is_added_to_ignore_list(self): def FakeIgnore(first,second): - if not isinstance(first,unicode): + if not isinstance(first,str): self.fail() - if not isinstance(second,unicode): + if not isinstance(second,str): self.fail() app = self.app @@ -423,11 +423,11 @@ class TCDupeGuruWithResults(TestCase): class TCDupeGuru_renameSelected(TestCase): def setUp(self): p = self.tmppath() - fp = open(unicode(p + 'foo bar 1'),mode='w') + fp = open(str(p + 'foo bar 1'),mode='w') fp.close() - fp = open(unicode(p + 'foo bar 2'),mode='w') + fp = open(str(p + 'foo bar 2'),mode='w') fp.close() - fp = open(unicode(p + 'foo bar 3'),mode='w') + fp = open(str(p + 'foo bar 3'),mode='w') fp.close() files = fs.get_files(p) matches = engine.getmatches(files) diff --git a/core/tests/directories_test.py b/core/tests/directories_test.py index cd34b6bc..8b4465ef 100644 --- a/core/tests/directories_test.py +++ b/core/tests/directories_test.py @@ -82,8 +82,8 @@ class TCDirectories(TestCase): def test_AddPath_non_latin(self): p = Path(self.tmpdir()) - to_add = p + u'unicode\u201a' - os.mkdir(unicode(to_add)) + to_add = p + 'unicode\u201a' + os.mkdir(str(to_add)) d = Directories() try: d.add_path(to_add) @@ -111,7 +111,7 @@ class TCDirectories(TestCase): self.assertEqual(STATE_REFERENCE,d.get_state(p)) self.assertEqual(STATE_REFERENCE,d.get_state(p + 'dir1')) self.assertEqual(1,len(d.states)) - self.assertEqual(p,d.states.keys()[0]) + self.assertEqual(p,list(d.states.keys())[0]) self.assertEqual(STATE_REFERENCE,d.states[p]) def test_get_state_with_path_not_there(self): @@ -213,11 +213,11 @@ class TCDirectories(TestCase): def test_unicode_save(self): d = Directories() - p1 = self.tmppath() + u'hello\xe9' + p1 = self.tmppath() + 'hello\xe9' io.mkdir(p1) - io.mkdir(p1 + u'foo\xe9') + io.mkdir(p1 + 'foo\xe9') d.add_path(p1) - d.set_state(p1 + u'foo\xe9', STATE_EXCLUDED) + d.set_state(p1 + 'foo\xe9', STATE_EXCLUDED) tmpxml = op.join(self.tmpdir(), 'directories_testunit.xml') try: d.save_to_file(tmpxml) diff --git a/core/tests/engine_test.py b/core/tests/engine_test.py index 0d227e32..40fd9245 100644 --- a/core/tests/engine_test.py +++ b/core/tests/engine_test.py @@ -62,12 +62,12 @@ class TCgetwords(TestCase): def test_splitter_chars(self): self.assertEqual( - [chr(i) for i in xrange(ord('a'),ord('z')+1)], + [chr(i) for i in range(ord('a'),ord('z')+1)], getwords("a-b_c&d+e(f)g;h\\i[j]k{l}m:n.o,pr/s?t~u!v@w#x$y*z") ) def test_joiner_chars(self): - self.assertEqual(["aec"], getwords(u"a'e\u0301c")) + self.assertEqual(["aec"], getwords("a'e\u0301c")) def test_empty(self): self.assertEqual([], getwords('')) @@ -76,7 +76,7 @@ class TCgetwords(TestCase): self.assertEqual(['foo', 'bar'], getwords('FOO BAR')) def test_decompose_unicode(self): - self.assertEqual(getwords(u'foo\xe9bar'), ['fooebar']) + self.assertEqual(getwords('foo\xe9bar'), ['fooebar']) class TCgetfields(TestCase): @@ -768,7 +768,7 @@ class TCget_groups(TestCase): self.assert_(o3 in g) def test_four_sized_group(self): - l = [NamedObject("foobar") for i in xrange(4)] + l = [NamedObject("foobar") for i in range(4)] m = getmatches(l) r = get_groups(m) self.assertEqual(1,len(r)) diff --git a/core/tests/ignore_test.py b/core/tests/ignore_test.py index dd3bf569..598cacf6 100644 --- a/core/tests/ignore_test.py +++ b/core/tests/ignore_test.py @@ -6,7 +6,7 @@ # which should be included with this package. The terms are also available at # http://www.hardcoded.net/licenses/hs_license -import cStringIO +import io from lxml import etree from hsutil.testutil import eq_ @@ -59,7 +59,7 @@ def test_save_to_xml(): il.Ignore('foo','bar') il.Ignore('foo','bleh') il.Ignore('bleh','bar') - f = cStringIO.StringIO() + f = io.BytesIO() il.save_to_xml(f) f.seek(0) doc = etree.parse(f) @@ -76,19 +76,19 @@ def test_SaveThenLoad(): il.Ignore('foo', 'bar') il.Ignore('foo', 'bleh') il.Ignore('bleh', 'bar') - il.Ignore(u'\u00e9', 'bar') - f = cStringIO.StringIO() + il.Ignore('\u00e9', 'bar') + f = io.BytesIO() il.save_to_xml(f) f.seek(0) f.seek(0) il = IgnoreList() il.load_from_xml(f) eq_(4,len(il)) - assert il.AreIgnored(u'\u00e9','bar') + assert il.AreIgnored('\u00e9','bar') def test_LoadXML_with_empty_file_tags(): - f = cStringIO.StringIO() - f.write('') + f = io.BytesIO() + f.write(b'') f.seek(0) il = IgnoreList() il.load_from_xml(f) @@ -130,12 +130,12 @@ def test_filter(): def test_save_with_non_ascii_items(): il = IgnoreList() - il.Ignore(u'\xac', u'\xbf') - f = cStringIO.StringIO() + il.Ignore('\xac', '\xbf') + f = io.BytesIO() try: il.save_to_xml(f) except Exception as e: - raise AssertionError(unicode(e)) + raise AssertionError(str(e)) def test_len(): il = IgnoreList() diff --git a/core/tests/results_test.py b/core/tests/results_test.py index e03bd339..f9bce8a5 100644 --- a/core/tests/results_test.py +++ b/core/tests/results_test.py @@ -7,7 +7,7 @@ # which should be included with this package. The terms are also available at # http://www.hardcoded.net/licenses/hs_license -import StringIO +import io import os.path as op from lxml import etree @@ -25,7 +25,7 @@ class NamedObject(engine_test.NamedObject): path = property(lambda x:Path('basepath') + x.name) is_ref = False - def __nonzero__(self): + def __bool__(self): return False #Make sure that operations are made correctly when the bool value of files is false. # Returns a group set that looks like that: @@ -63,7 +63,7 @@ class TCResultsEmpty(TestCase): self.assert_(self.results.get_group_of_duplicate('foo') is None) def test_save_to_xml(self): - f = StringIO.StringIO() + f = io.BytesIO() self.results.save_to_xml(f) f.seek(0) doc = etree.parse(f) @@ -324,7 +324,7 @@ class TCResultsMarkings(TestCase): def test_SaveXML(self): self.results.mark(self.objects[1]) self.results.mark_invert() - f = StringIO.StringIO() + f = io.BytesIO() self.results.save_to_xml(f) f.seek(0) doc = etree.parse(f) @@ -345,7 +345,7 @@ class TCResultsMarkings(TestCase): self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path self.results.mark(self.objects[1]) self.results.mark_invert() - f = StringIO.StringIO() + f = io.BytesIO() self.results.save_to_xml(f) f.seek(0) r = Results(data) @@ -369,7 +369,7 @@ class TCResultsXML(TestCase): def test_save_to_xml(self): self.objects[0].is_ref = True self.objects[0].words = [['foo','bar']] - f = StringIO.StringIO() + f = io.BytesIO() self.results.save_to_xml(f) f.seek(0) doc = etree.parse(f) @@ -408,7 +408,7 @@ class TCResultsXML(TestCase): self.objects[0].is_ref = True self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path - f = StringIO.StringIO() + f = io.BytesIO() self.results.save_to_xml(f) f.seek(0) r = Results(data) @@ -451,7 +451,7 @@ class TCResultsXML(TestCase): return [f for f in self.objects if str(f.path) == path][0] self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path - f = StringIO.StringIO() + f = io.BytesIO() self.results.save_to_xml(f) f.seek(0) r = Results(data) @@ -490,7 +490,7 @@ class TCResultsXML(TestCase): match_node.set('percentage', 'baz') group_node = etree.SubElement(root, 'foobar') #invalid group group_node = etree.SubElement(root, 'group') #empty group - f = StringIO.StringIO() + f = io.BytesIO() tree = etree.ElementTree(root) tree.write(f, encoding='utf-8') f.seek(0) @@ -501,30 +501,30 @@ class TCResultsXML(TestCase): def test_xml_non_ascii(self): def get_file(path): - if path == op.join('basepath',u'\xe9foo bar'): + if path == op.join('basepath','\xe9foo bar'): return objects[0] - if path == op.join('basepath',u'bar bleh'): + if path == op.join('basepath','bar bleh'): return objects[1] - objects = [NamedObject(u"\xe9foo bar",True),NamedObject("bar bleh",True)] + objects = [NamedObject("\xe9foo bar",True),NamedObject("bar bleh",True)] matches = engine.getmatches(objects) #we should have 5 matches groups = engine.get_groups(matches) #We should have 2 groups for g in groups: g.prioritize(lambda x:objects.index(x)) #We want the dupes to be in the same order as the list is results = Results(data) results.groups = groups - f = StringIO.StringIO() + f = io.BytesIO() results.save_to_xml(f) f.seek(0) r = Results(data) r.load_from_xml(f,get_file) g = r.groups[0] - self.assertEqual(u"\xe9foo bar",g[0].name) + self.assertEqual("\xe9foo bar",g[0].name) self.assertEqual(['efoo','bar'],g[0].words) def test_load_invalid_xml(self): - f = StringIO.StringIO() - f.write('' % self.name + no = NamedObject @@ -297,8 +300,8 @@ class ScannerTestFakeFiles(TestCase): s.scanned_tags = set(['title']) o1 = no('foo') o2 = no('bar') - o1.title = u'foobar\u00e9' - o2.title = u'foobar\u00e9' + o1.title = 'foobar\u00e9' + o2.title = 'foobar\u00e9' try: r = s.GetDupeGroups([o1, o2]) except UnicodeEncodeError: @@ -362,11 +365,11 @@ class ScannerTestFakeFiles(TestCase): f1 = no('foobar') f2 = no('foobar') f3 = no('foobar') - f1.path = Path(u'foo1\u00e9') - f2.path = Path(u'foo2\u00e9') - f3.path = Path(u'foo3\u00e9') - s.ignore_list.Ignore(unicode(f1.path),unicode(f2.path)) - s.ignore_list.Ignore(unicode(f1.path),unicode(f3.path)) + f1.path = Path('foo1\u00e9') + f2.path = Path('foo2\u00e9') + f3.path = Path('foo3\u00e9') + s.ignore_list.Ignore(str(f1.path),str(f2.path)) + s.ignore_list.Ignore(str(f1.path),str(f3.path)) r = s.GetDupeGroups([f1,f2,f3]) eq_(len(r), 1) g = r[0] @@ -379,7 +382,7 @@ class ScannerTestFakeFiles(TestCase): # A very wrong way to use any() was added at some point, causing resulting group list # to be empty. class FalseNamedObject(NamedObject): - def __nonzero__(self): + def __bool__(self): return False diff --git a/core_me/app_cocoa.py b/core_me/app_cocoa.py index b5f84d3e..4e31483a 100644 --- a/core_me/app_cocoa.py +++ b/core_me/app_cocoa.py @@ -41,7 +41,7 @@ class DupeGuruME(DupeGuruBase): try: track.delete(timeout=0) except CommandError as e: - logging.warning('Error while trying to remove a track from iTunes: %s' % unicode(e)) + logging.warning('Error while trying to remove a track from iTunes: %s' % str(e)) self._start_job(JOB_REMOVE_DEAD_TRACKS, do) diff --git a/core_me/fs.py b/core_me/fs.py index 5a7d9cf8..a03dcdeb 100644 --- a/core_me/fs.py +++ b/core_me/fs.py @@ -42,12 +42,12 @@ class Mp3File(MusicFile): HANDLED_EXTS = set(['mp3']) def _read_info(self, field): if field == 'md5partial': - fileinfo = mpeg.Mpeg(unicode(self.path)) + fileinfo = mpeg.Mpeg(str(self.path)) self._md5partial_offset = fileinfo.audio_offset self._md5partial_size = fileinfo.audio_size MusicFile._read_info(self, field) if field in TAG_FIELDS: - fileinfo = mpeg.Mpeg(unicode(self.path)) + fileinfo = mpeg.Mpeg(str(self.path)) self.audiosize = fileinfo.audio_size self.bitrate = fileinfo.bitrate self.duration = fileinfo.duration @@ -70,12 +70,12 @@ class WmaFile(MusicFile): HANDLED_EXTS = set(['wma']) def _read_info(self, field): if field == 'md5partial': - dec = wma.WMADecoder(unicode(self.path)) + dec = wma.WMADecoder(str(self.path)) self._md5partial_offset = dec.audio_offset self._md5partial_size = dec.audio_size MusicFile._read_info(self, field) if field in TAG_FIELDS: - dec = wma.WMADecoder(unicode(self.path)) + dec = wma.WMADecoder(str(self.path)) self.audiosize = dec.audio_size self.bitrate = dec.bitrate self.duration = dec.duration @@ -92,13 +92,13 @@ class Mp4File(MusicFile): HANDLED_EXTS = set(['m4a', 'm4p']) def _read_info(self, field): if field == 'md5partial': - dec = mp4.File(unicode(self.path)) + dec = mp4.File(str(self.path)) self._md5partial_offset = dec.audio_offset self._md5partial_size = dec.audio_size dec.close() MusicFile._read_info(self, field) if field in TAG_FIELDS: - dec = mp4.File(unicode(self.path)) + dec = mp4.File(str(self.path)) self.audiosize = dec.audio_size self.bitrate = dec.bitrate self.duration = dec.duration @@ -116,12 +116,12 @@ class OggFile(MusicFile): HANDLED_EXTS = set(['ogg']) def _read_info(self, field): if field == 'md5partial': - dec = ogg.Vorbis(unicode(self.path)) + dec = ogg.Vorbis(str(self.path)) self._md5partial_offset = dec.audio_offset self._md5partial_size = dec.audio_size MusicFile._read_info(self, field) if field in TAG_FIELDS: - dec = ogg.Vorbis(unicode(self.path)) + dec = ogg.Vorbis(str(self.path)) self.audiosize = dec.audio_size self.bitrate = dec.bitrate self.duration = dec.duration @@ -138,12 +138,12 @@ class FlacFile(MusicFile): HANDLED_EXTS = set(['flac']) def _read_info(self, field): if field == 'md5partial': - dec = flac.FLAC(unicode(self.path)) + dec = flac.FLAC(str(self.path)) self._md5partial_offset = dec.audio_offset self._md5partial_size = dec.audio_size MusicFile._read_info(self, field) if field in TAG_FIELDS: - dec = flac.FLAC(unicode(self.path)) + dec = flac.FLAC(str(self.path)) self.audiosize = dec.audio_size self.bitrate = dec.bitrate self.duration = dec.duration @@ -160,12 +160,12 @@ class AiffFile(MusicFile): HANDLED_EXTS = set(['aif', 'aiff', 'aifc']) def _read_info(self, field): if field == 'md5partial': - dec = aiff.File(unicode(self.path)) + dec = aiff.File(str(self.path)) self._md5partial_offset = dec.audio_offset self._md5partial_size = dec.audio_size MusicFile._read_info(self, field) if field in TAG_FIELDS: - dec = aiff.File(unicode(self.path)) + dec = aiff.File(str(self.path)) self.audiosize = dec.audio_size self.bitrate = dec.bitrate self.duration = dec.duration diff --git a/core_pe/app_cocoa.py b/core_pe/app_cocoa.py index b5db330c..f56a6773 100644 --- a/core_pe/app_cocoa.py +++ b/core_pe/app_cocoa.py @@ -37,15 +37,15 @@ class Photo(fs.File): def _read_info(self, field): fs.File._read_info(self, field) if field == 'dimensions': - self.dimensions = _block_osx.get_image_size(unicode(self.path)) + self.dimensions = _block_osx.get_image_size(str(self.path)) def get_blocks(self, block_count_per_side): try: - blocks = _block_osx.getblocks(unicode(self.path), block_count_per_side) + blocks = _block_osx.getblocks(str(self.path), block_count_per_side) except Exception as e: - raise IOError('The reading of "%s" failed with "%s"' % (unicode(self.path), unicode(e))) + raise IOError('The reading of "%s" failed with "%s"' % (str(self.path), str(e))) if not blocks: - raise IOError('The picture %s could not be read' % unicode(self.path)) + raise IOError('The picture %s could not be read' % str(self.path)) return blocks @@ -140,7 +140,7 @@ class DupeGuruPE(app_cocoa.DupeGuru): photos = as_fetch(a.photo_library_album().photos, k.item) for photo in j.iter_with_progress(photos): try: - self.path2iphoto[unicode(photo.image_path(timeout=0))] = photo + self.path2iphoto[str(photo.image_path(timeout=0))] = photo except CommandError: pass except (CommandError, RuntimeError): @@ -151,15 +151,15 @@ class DupeGuruPE(app_cocoa.DupeGuru): def _do_delete_dupe(self, dupe): if isinstance(dupe, IPhoto): - if unicode(dupe.path) in self.path2iphoto: - photo = self.path2iphoto[unicode(dupe.path)] + if str(dupe.path) in self.path2iphoto: + photo = self.path2iphoto[str(dupe.path)] try: a = app('iPhoto') a.remove(photo, timeout=0) except (CommandError, RuntimeError) as e: - raise EnvironmentError(unicode(e)) + raise EnvironmentError(str(e)) else: - msg = u"Could not find photo %s in iPhoto Library" % unicode(dupe.path) + msg = "Could not find photo %s in iPhoto Library" % str(dupe.path) raise EnvironmentError(msg) else: app_cocoa.DupeGuru._do_delete_dupe(self, dupe) diff --git a/core_pe/block.py b/core_pe/block.py index 56649d5d..0365c31f 100644 --- a/core_pe/block.py +++ b/core_pe/block.py @@ -6,7 +6,7 @@ # which should be included with this package. The terms are also available at # http://www.hardcoded.net/licenses/hs_license -from _block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2 +from ._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2 # Converted to C # def getblock(image): diff --git a/core_pe/cache.py b/core_pe/cache.py index 5ff2fe32..338baede 100644 --- a/core_pe/cache.py +++ b/core_pe/cache.py @@ -82,7 +82,7 @@ class Cache(object): self.con.execute(sql, [value, key]) except sqlite.OperationalError: logging.warning('Picture cache could not set %r for key %r', value, key) - except sqlite.DatabaseError, e: + except sqlite.DatabaseError as e: logging.warning('DatabaseError while setting %r for key %r: %s', value, key, str(e)) def _create_con(self, second_try=False): @@ -97,7 +97,7 @@ class Cache(object): self.con.execute("select * from pictures where 1=2") except sqlite.OperationalError: # new db create_tables() - except sqlite.DatabaseError, e: # corrupted db + except sqlite.DatabaseError as e: # corrupted db if second_try: raise # Something really strange is happening logging.warning('Could not create picture cache because of an error: %s', str(e)) diff --git a/core_pe/gen.py b/core_pe/gen.py index 55b59512..436f8929 100644 --- a/core_pe/gen.py +++ b/core_pe/gen.py @@ -15,11 +15,11 @@ def move(src, dst): return if op.exists(dst): os.remove(dst) - print 'Moving %s --> %s' % (src, dst) + print('Moving %s --> %s' % (src, dst)) os.rename(src, dst) os.chdir('modules') -os.system('python setup.py build_ext --inplace') +os.system('python3 setup.py build_ext --inplace') os.chdir('..') move(op.join('modules', '_block.so'), '_block.so') move(op.join('modules', '_block.pyd'), '_block.pyd') diff --git a/core_pe/matchbase.py b/core_pe/matchbase.py index 99680a44..25b2175f 100644 --- a/core_pe/matchbase.py +++ b/core_pe/matchbase.py @@ -34,16 +34,16 @@ def prepare_pictures(pictures, cache_path, j=job.nulljob): try: for picture in j.iter_with_progress(pictures, 'Analyzed %d/%d pictures'): picture.dimensions - picture.unicode_path = unicode(picture.path) + picture.unicode_path = str(picture.path) try: if picture.unicode_path not in cache: blocks = picture.get_blocks(BLOCK_COUNT_PER_SIDE) cache[picture.unicode_path] = blocks prepared.append(picture) except (IOError, ValueError) as e: - logging.warning(unicode(e)) + logging.warning(str(e)) except MemoryError: - logging.warning(u'Ran out of memory while reading %s of size %d' % (picture.unicode_path, picture.size)) + logging.warning('Ran out of memory while reading %s of size %d' % (picture.unicode_path, picture.size)) if picture.size < 10 * 1024 * 1024: # We're really running out of memory raise except MemoryError: diff --git a/core_pe/modules/block.c b/core_pe/modules/block.c index 25a24727..9b6a729c 100644 --- a/core_pe/modules/block.c +++ b/core_pe/modules/block.c @@ -39,9 +39,9 @@ static PyObject* getblock(PyObject *image) pg = PySequence_ITEM(ppixel, 1); pb = PySequence_ITEM(ppixel, 2); Py_DECREF(ppixel); - r = PyInt_AsSsize_t(pr); - g = PyInt_AsSsize_t(pg); - b = PyInt_AsSsize_t(pb); + r = PyLong_AsLong(pr); + g = PyLong_AsLong(pg); + b = PyLong_AsLong(pb); Py_DECREF(pr); Py_DECREF(pg); Py_DECREF(pb); @@ -67,14 +67,14 @@ static PyObject* getblock(PyObject *image) */ static int diff(PyObject *first, PyObject *second) { - Py_ssize_t r1, g1, b1, r2, b2, g2; + int r1, g1, b1, r2, b2, g2; PyObject *pr, *pg, *pb; pr = PySequence_ITEM(first, 0); pg = PySequence_ITEM(first, 1); pb = PySequence_ITEM(first, 2); - r1 = PyInt_AsSsize_t(pr); - g1 = PyInt_AsSsize_t(pg); - b1 = PyInt_AsSsize_t(pb); + r1 = PyLong_AsLong(pr); + g1 = PyLong_AsLong(pg); + b1 = PyLong_AsLong(pb); Py_DECREF(pr); Py_DECREF(pg); Py_DECREF(pb); @@ -82,9 +82,9 @@ static int diff(PyObject *first, PyObject *second) pr = PySequence_ITEM(second, 0); pg = PySequence_ITEM(second, 1); pb = PySequence_ITEM(second, 2); - r2 = PyInt_AsSsize_t(pr); - g2 = PyInt_AsSsize_t(pg); - b2 = PyInt_AsSsize_t(pb); + r2 = PyLong_AsLong(pr); + g2 = PyLong_AsLong(pg); + b2 = PyLong_AsLong(pb); Py_DECREF(pr); Py_DECREF(pg); Py_DECREF(pb); @@ -115,8 +115,8 @@ static PyObject* block_getblocks2(PyObject *self, PyObject *args) pimage_size = PyObject_GetAttrString(image, "size"); pwidth = PySequence_ITEM(pimage_size, 0); pheight = PySequence_ITEM(pimage_size, 1); - width = PyInt_AsSsize_t(pwidth); - height = PyInt_AsSsize_t(pheight); + width = PyLong_AsLong(pwidth); + height = PyLong_AsLong(pheight); Py_DECREF(pimage_size); Py_DECREF(pwidth); Py_DECREF(pheight); @@ -147,8 +147,8 @@ static PyObject* block_getblocks2(PyObject *self, PyObject *args) left = min(iw*block_width, width-block_width); right = left + block_width; pbox = inttuple(4, left, top, right, bottom); - pmethodname = PyString_FromString("crop"); - pcrop = PyObject_CallMethodObjArgs(image, pmethodname, pbox); + pmethodname = PyUnicode_FromString("crop"); + pcrop = PyObject_CallMethodObjArgs(image, pmethodname, pbox, NULL); Py_DECREF(pmethodname); Py_DECREF(pbox); if (pcrop == NULL) { @@ -207,7 +207,7 @@ static PyObject* block_avgdiff(PyObject *self, PyObject *args) Py_DECREF(item1); Py_DECREF(item2); if ((sum > limit*iteration_count) && (iteration_count >= min_iterations)) { - return PyInt_FromSsize_t(limit + 1); + return PyLong_FromLong(limit + 1); } } @@ -215,7 +215,7 @@ static PyObject* block_avgdiff(PyObject *self, PyObject *args) if (!result && sum) { result = 1; } - return PyInt_FromSsize_t(result); + return PyLong_FromLong(result); } static PyMethodDef BlockMethods[] = { @@ -224,16 +224,30 @@ static PyMethodDef BlockMethods[] = { {NULL, NULL, 0, NULL} /* Sentinel */ }; -PyMODINIT_FUNC -init_block(void) +static struct PyModuleDef BlockDef = { + PyModuleDef_HEAD_INIT, + "_block", + NULL, + -1, + BlockMethods, + NULL, + NULL, + NULL, + NULL +}; + +PyObject * +PyInit__block(void) { - PyObject *m = Py_InitModule("_block", BlockMethods); + PyObject *m = PyModule_Create(&BlockDef); if (m == NULL) { - return; + return NULL; } NoBlocksError = PyErr_NewException("_block.NoBlocksError", NULL, NULL); PyModule_AddObject(m, "NoBlocksError", NoBlocksError); DifferentBlockCountError = PyErr_NewException("_block.DifferentBlockCountError", NULL, NULL); PyModule_AddObject(m, "DifferentBlockCountError", DifferentBlockCountError); + + return m; } \ No newline at end of file diff --git a/core_pe/modules/block_osx.m b/core_pe/modules/block_osx.m index e4609a02..a43782fd 100644 --- a/core_pe/modules/block_osx.m +++ b/core_pe/modules/block_osx.m @@ -29,8 +29,8 @@ pystring2cfstring(PyObject *pystring) Py_INCREF(encoded); } - s = (UInt8*)PyString_AS_STRING(encoded); - size = PyString_GET_SIZE(encoded); + s = (UInt8*)PyBytes_AS_STRING(encoded); + size = PyUnicode_GET_SIZE(encoded); result = CFStringCreateWithBytes(NULL, s, size, kCFStringEncodingUTF8, FALSE); Py_DECREF(encoded); return result; @@ -43,7 +43,7 @@ static PyObject* block_osx_get_image_size(PyObject *self, PyObject *args) CFURLRef image_url; CGImageSourceRef source; CGImageRef image; - size_t width, height; + long width, height; PyObject *pwidth, *pheight; PyObject *result; @@ -72,11 +72,11 @@ static PyObject* block_osx_get_image_size(PyObject *self, PyObject *args) CFRelease(source); } - pwidth = PyInt_FromSsize_t(width); + pwidth = PyLong_FromLong(width); if (pwidth == NULL) { return NULL; } - pheight = PyInt_FromSsize_t(height); + pheight = PyLong_FromLong(height); if (pheight == NULL) { return NULL; } @@ -228,8 +228,24 @@ static PyMethodDef BlockOsxMethods[] = { {NULL, NULL, 0, NULL} /* Sentinel */ }; -PyMODINIT_FUNC -init_block_osx(void) +static struct PyModuleDef BlockOsxDef = { + PyModuleDef_HEAD_INIT, + "_block_osx", + NULL, + -1, + BlockOsxMethods, + NULL, + NULL, + NULL, + NULL +}; + +PyObject * +PyInit__block_osx(void) { - Py_InitModule("_block_osx", BlockOsxMethods); + PyObject *m = PyModule_Create(&BlockOsxDef); + if (m == NULL) { + return NULL; + } + return m; } \ No newline at end of file diff --git a/core_pe/modules/cache.c b/core_pe/modules/cache.c index 7e9e88c3..191e6eea 100644 --- a/core_pe/modules/cache.c +++ b/core_pe/modules/cache.c @@ -72,8 +72,24 @@ static PyMethodDef CacheMethods[] = { {NULL, NULL, 0, NULL} /* Sentinel */ }; -PyMODINIT_FUNC -init_cache(void) +static struct PyModuleDef CacheDef = { + PyModuleDef_HEAD_INIT, + "_cache", + NULL, + -1, + CacheMethods, + NULL, + NULL, + NULL, + NULL +}; + +PyObject * +PyInit__cache(void) { - (void)Py_InitModule("_cache", CacheMethods); -} + PyObject *m = PyModule_Create(&CacheDef); + if (m == NULL) { + return NULL; + } + return m; +} \ No newline at end of file diff --git a/core_pe/modules/common.c b/core_pe/modules/common.c index 2d456f1e..63732be0 100644 --- a/core_pe/modules/common.c +++ b/core_pe/modules/common.c @@ -32,7 +32,7 @@ PyObject* inttuple(int n, ...) result = PyTuple_New(n); for (i=0; i %s' % (src, dst) + print('Moving %s --> %s' % (src, dst)) os.rename(src, dst) os.chdir('modules') diff --git a/qt/pe/preferences_dialog.py b/qt/pe/preferences_dialog.py index 049e8c90..ff9a3920 100644 --- a/qt/pe/preferences_dialog.py +++ b/qt/pe/preferences_dialog.py @@ -46,7 +46,7 @@ class PreferencesDialog(QDialog, Ui_PreferencesDialog): prefs.use_regexp = ischecked(self.useRegexpBox) prefs.remove_empty_folders = ischecked(self.removeEmptyFoldersBox) prefs.destination_type = self.copyMoveDestinationComboBox.currentIndex() - prefs.custom_command = unicode(self.customCommandEdit.text()) + prefs.custom_command = str(self.customCommandEdit.text()) def resetToDefaults(self): self.load(preferences.Preferences()) diff --git a/qt/se/preferences_dialog.py b/qt/se/preferences_dialog.py index 1b47b999..c9a88751 100644 --- a/qt/se/preferences_dialog.py +++ b/qt/se/preferences_dialog.py @@ -48,7 +48,7 @@ class PreferencesDialog(QDialog, Ui_PreferencesDialog): setchecked(self.useRegexpBox, prefs.use_regexp) setchecked(self.removeEmptyFoldersBox, prefs.remove_empty_folders) setchecked(self.ignoreSmallFilesBox, prefs.ignore_small_files) - self.sizeThresholdEdit.setText(unicode(prefs.small_file_threshold)) + self.sizeThresholdEdit.setText(str(prefs.small_file_threshold)) self.copyMoveDestinationComboBox.setCurrentIndex(prefs.destination_type) self.customCommandEdit.setText(prefs.custom_command) @@ -65,7 +65,7 @@ class PreferencesDialog(QDialog, Ui_PreferencesDialog): prefs.ignore_small_files = ischecked(self.ignoreSmallFilesBox) prefs.small_file_threshold = tryint(self.sizeThresholdEdit.text()) prefs.destination_type = self.copyMoveDestinationComboBox.currentIndex() - prefs.custom_command = unicode(self.customCommandEdit.text()) + prefs.custom_command = str(self.customCommandEdit.text()) def resetToDefaults(self): self.load(preferences.Preferences()) diff --git a/qt/se/start.py b/qt/se/start.py index eabb6b09..33c3a06b 100644 --- a/qt/se/start.py +++ b/qt/se/start.py @@ -6,6 +6,8 @@ # http://www.hardcoded.net/licenses/hs_license import sys +import sip +sip.setapi('QVariant', 1) from PyQt4.QtCore import QCoreApplication from PyQt4.QtGui import QApplication, QIcon, QPixmap diff --git a/run.py b/run.py index babd5c10..44eb9b88 100644 --- a/run.py +++ b/run.py @@ -20,7 +20,7 @@ def main(): edition = conf['edition'] ui = conf['ui'] dev = conf['dev'] - print "Running dupeGuru {0} with UI {1}".format(edition.upper(), ui) + print("Running dupeGuru {0} with UI {1}".format(edition.upper(), ui)) if ui == 'cocoa': subfolder = 'dev' if dev else 'release' app_path = { @@ -32,8 +32,9 @@ def main(): elif ui == 'qt': add_to_pythonpath('.') add_to_pythonpath('qt') + add_to_pythonpath(op.join('qt', 'base')) os.chdir(op.join('qt', edition)) - os.system('python start.py') + os.system('python3 start.py') os.chdir('..') if __name__ == '__main__':