1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2025-03-10 05:34:36 +00:00

Removed dependency on lxml (it made the final package much bigger, and building it on windows is not fun).

This commit is contained in:
Virgil Dupras 2010-08-15 14:42:55 +02:00
parent 12e6c400b9
commit c8827769b4
14 changed files with 71 additions and 65 deletions

1
README
View File

@ -29,7 +29,6 @@ General dependencies
- Send2Trash3k (http://hg.hardcoded.net/send2trash3k) - Send2Trash3k (http://hg.hardcoded.net/send2trash3k)
- hsutil3k (http://hg.hardcoded.net/hsutil3k) - hsutil3k (http://hg.hardcoded.net/hsutil3k)
- hsaudiotag3k (for ME) (http://hg.hardcoded.net/hsaudiotag3k) - hsaudiotag3k (for ME) (http://hg.hardcoded.net/hsaudiotag3k)
- lxml, to read and write XML files. (http://codespeak.net/lxml/)
- Markdown, to generate help files. (http://pypi.python.org/pypi/Markdown) - Markdown, to generate help files. (http://pypi.python.org/pypi/Markdown)
- PyYaml, for help files and the build system. (http://pyyaml.org/) - PyYaml, for help files and the build system. (http://pyyaml.org/)
- py.test, to run unit tests. (http://codespeak.net/py/dist/test/) - py.test, to run unit tests. (http://codespeak.net/py/dist/test/)

View File

@ -13,7 +13,7 @@ from core.scanner import ScanType
# Fix py2app imports which chokes on relative imports and other stuff # Fix py2app imports which chokes on relative imports and other stuff
from core_me import app_cocoa, data, fs, scanner from core_me import app_cocoa, data, fs, scanner
from hsaudiotag import aiff, flac, genres, id3v1, id3v2, mp4, mpeg, ogg, wma from hsaudiotag import aiff, flac, genres, id3v1, id3v2, mp4, mpeg, ogg, wma
from lxml import etree, _elementpath import xml.etree.ElementPath
import gzip import gzip
class PyDupeGuru(PyDupeGuruBase): class PyDupeGuru(PyDupeGuruBase):

View File

@ -11,7 +11,7 @@ from core_pe import app_cocoa as app_pe_cocoa
import hsutil.conflict import hsutil.conflict
import core.engine, core.fs, core.app import core.engine, core.fs, core.app
import core_pe.block, core_pe.cache, core_pe.matchbase, core_pe.data, core_pe._block_osx import core_pe.block, core_pe.cache, core_pe.matchbase, core_pe.data, core_pe._block_osx
import lxml.etree, lxml._elementpath import xml.etree.ElementPath
import gzip import gzip
import aem.kae import aem.kae
import appscript.defaultterminology import appscript.defaultterminology

View File

@ -14,7 +14,7 @@ from core_se.app_cocoa import DupeGuru
import hsutil.conflict import hsutil.conflict
import core.engine, core.fs, core.app import core.engine, core.fs, core.app
import core_se.fs, core_se.data import core_se.fs, core_se.data
import lxml.etree, lxml._elementpath import xml.etree.ElementPath
import gzip import gzip
class PyDupeGuru(PyDupeGuruBase): class PyDupeGuru(PyDupeGuruBase):

View File

@ -6,7 +6,7 @@
# which should be included with this package. The terms are also available at # which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/hs_license # http://www.hardcoded.net/licenses/hs_license
from lxml import etree from xml.etree import ElementTree as ET
from hsutil import io from hsutil import io
from hsutil.files import FileOrPath from hsutil.files import FileOrPath
@ -126,10 +126,10 @@ class Directories(object):
def load_from_file(self, infile): def load_from_file(self, infile):
try: try:
root = etree.parse(infile).getroot() root = ET.parse(infile).getroot()
except: except Exception:
return return
for rdn in root.iterchildren('root_directory'): for rdn in root.getiterator('root_directory'):
attrib = rdn.attrib attrib = rdn.attrib
if 'path' not in attrib: if 'path' not in attrib:
continue continue
@ -138,7 +138,7 @@ class Directories(object):
self.add_path(Path(path)) self.add_path(Path(path))
except (AlreadyThereError, InvalidPathError): except (AlreadyThereError, InvalidPathError):
pass pass
for sn in root.iterchildren('state'): for sn in root.getiterator('state'):
attrib = sn.attrib attrib = sn.attrib
if not ('path' in attrib and 'value' in attrib): if not ('path' in attrib and 'value' in attrib):
continue continue
@ -148,15 +148,15 @@ class Directories(object):
def save_to_file(self, outfile): def save_to_file(self, outfile):
with FileOrPath(outfile, 'wb') as fp: with FileOrPath(outfile, 'wb') as fp:
root = etree.Element('directories') root = ET.Element('directories')
for root_path in self: for root_path in self:
root_path_node = etree.SubElement(root, 'root_directory') root_path_node = ET.SubElement(root, 'root_directory')
root_path_node.set('path', str(root_path)) root_path_node.set('path', str(root_path))
for path, state in self.states.items(): for path, state in self.states.items():
state_node = etree.SubElement(root, 'state') state_node = ET.SubElement(root, 'state')
state_node.set('path', str(path)) state_node.set('path', str(path))
state_node.set('value', str(state)) state_node.set('value', str(state))
tree = etree.ElementTree(root) tree = ET.ElementTree(root)
tree.write(fp, encoding='utf-8') tree.write(fp, encoding='utf-8')
def set_state(self, path, state): def set_state(self, path, state):

View File

@ -6,7 +6,7 @@
# which should be included with this package. The terms are also available at # which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/hs_license # http://www.hardcoded.net/licenses/hs_license
from lxml import etree from xml.etree import ElementTree as ET
from hsutil.files import FileOrPath from hsutil.files import FileOrPath
@ -77,14 +77,16 @@ class IgnoreList(object):
infile can be a file object or a filename. infile can be a file object or a filename.
""" """
try: try:
root = etree.parse(infile).getroot() root = ET.parse(infile).getroot()
except Exception: except Exception:
return return
for fn in root.iterchildren('file'): file_elems = (e for e in root if e.tag == 'file')
for fn in file_elems:
file_path = fn.get('path') file_path = fn.get('path')
if not file_path: if not file_path:
continue continue
for sfn in fn.iterchildren('file'): subfile_elems = (e for e in fn if e.tag == 'file')
for sfn in subfile_elems:
subfile_path = sfn.get('path') subfile_path = sfn.get('path')
if subfile_path: if subfile_path:
self.Ignore(file_path, subfile_path) self.Ignore(file_path, subfile_path)
@ -94,14 +96,14 @@ class IgnoreList(object):
outfile can be a file object or a filename. outfile can be a file object or a filename.
""" """
root = etree.Element('ignore_list') root = ET.Element('ignore_list')
for filename, subfiles in self._ignored.items(): for filename, subfiles in self._ignored.items():
file_node = etree.SubElement(root, 'file') file_node = ET.SubElement(root, 'file')
file_node.set('path', filename) file_node.set('path', filename)
for subfilename in subfiles: for subfilename in subfiles:
subfile_node = etree.SubElement(file_node, 'file') subfile_node = ET.SubElement(file_node, 'file')
subfile_node.set('path', subfilename) subfile_node.set('path', subfilename)
tree = etree.ElementTree(root) tree = ET.ElementTree(root)
with FileOrPath(outfile, 'wb') as fp: with FileOrPath(outfile, 'wb') as fp:
tree.write(fp, encoding='utf-8') tree.write(fp, encoding='utf-8')

View File

@ -8,7 +8,7 @@
import logging import logging
import re import re
from lxml import etree from xml.etree import ElementTree as ET
from . import engine from . import engine
from hscommon.job import nulljob from hscommon.job import nulljob
@ -178,16 +178,16 @@ class Results(Markable):
self.apply_filter(None) self.apply_filter(None)
try: try:
root = etree.parse(infile).getroot() root = ET.parse(infile).getroot()
except Exception: except Exception:
return return
group_elems = list(root.iterchildren('group')) group_elems = list(root.getiterator('group'))
groups = [] groups = []
marked = set() marked = set()
for group_elem in j.iter_with_progress(group_elems, every=100): for group_elem in j.iter_with_progress(group_elems, every=100):
group = engine.Group() group = engine.Group()
dupes = [] dupes = []
for file_elem in group_elem.iterchildren('file'): for file_elem in group_elem.getiterator('file'):
path = file_elem.get('path') path = file_elem.get('path')
words = file_elem.get('words', '') words = file_elem.get('words', '')
if not path: if not path:
@ -200,7 +200,7 @@ class Results(Markable):
dupes.append(file) dupes.append(file)
if file_elem.get('marked') == 'y': if file_elem.get('marked') == 'y':
marked.add(file) marked.add(file)
for match_elem in group_elem.iterchildren('match'): for match_elem in group_elem.getiterator('match'):
try: try:
attrs = match_elem.attrib attrs = match_elem.attrib
first_file = dupes[int(attrs['first'])] first_file = dupes[int(attrs['first'])]
@ -277,10 +277,10 @@ class Results(Markable):
def save_to_xml(self, outfile): def save_to_xml(self, outfile):
self.apply_filter(None) self.apply_filter(None)
root = etree.Element('results') root = ET.Element('results')
# writer = XMLGenerator(outfile, 'utf-8') # writer = XMLGenerator(outfile, 'utf-8')
for g in self.groups: for g in self.groups:
group_elem = etree.SubElement(root, 'group') group_elem = ET.SubElement(root, 'group')
dupe2index = {} dupe2index = {}
for index, d in enumerate(g): for index, d in enumerate(g):
dupe2index[d] = index dupe2index[d] = index
@ -288,7 +288,7 @@ class Results(Markable):
words = engine.unpack_fields(d.words) words = engine.unpack_fields(d.words)
except AttributeError: except AttributeError:
words = () words = ()
file_elem = etree.SubElement(group_elem, 'file') file_elem = ET.SubElement(group_elem, 'file')
try: try:
file_elem.set('path', str(d.path)) file_elem.set('path', str(d.path))
file_elem.set('words', ','.join(words)) file_elem.set('words', ','.join(words))
@ -297,11 +297,11 @@ class Results(Markable):
file_elem.set('is_ref', ('y' if d.is_ref else 'n')) file_elem.set('is_ref', ('y' if d.is_ref else 'n'))
file_elem.set('marked', ('y' if self.is_marked(d) else 'n')) file_elem.set('marked', ('y' if self.is_marked(d) else 'n'))
for match in g.matches: for match in g.matches:
match_elem = etree.SubElement(group_elem, 'match') match_elem = ET.SubElement(group_elem, 'match')
match_elem.set('first', str(dupe2index[match.first])) match_elem.set('first', str(dupe2index[match.first]))
match_elem.set('second', str(dupe2index[match.second])) match_elem.set('second', str(dupe2index[match.second]))
match_elem.set('percentage', str(int(match.percentage))) match_elem.set('percentage', str(int(match.percentage)))
tree = etree.ElementTree(root) tree = ET.ElementTree(root)
with FileOrPath(outfile, 'wb') as fp: with FileOrPath(outfile, 'wb') as fp:
tree.write(fp, encoding='utf-8') tree.write(fp, encoding='utf-8')
self.is_modified = False self.is_modified = False

View File

@ -7,7 +7,7 @@
# http://www.hardcoded.net/licenses/hs_license # http://www.hardcoded.net/licenses/hs_license
import io import io
from lxml import etree from xml.etree import ElementTree as ET
from hsutil.testutil import eq_ from hsutil.testutil import eq_
@ -62,7 +62,7 @@ def test_save_to_xml():
f = io.BytesIO() f = io.BytesIO()
il.save_to_xml(f) il.save_to_xml(f)
f.seek(0) f.seek(0)
doc = etree.parse(f) doc = ET.parse(f)
root = doc.getroot() root = doc.getroot()
eq_(root.tag, 'ignore_list') eq_(root.tag, 'ignore_list')
eq_(len(root), 2) eq_(len(root), 2)
@ -80,7 +80,6 @@ def test_SaveThenLoad():
f = io.BytesIO() f = io.BytesIO()
il.save_to_xml(f) il.save_to_xml(f)
f.seek(0) f.seek(0)
f.seek(0)
il = IgnoreList() il = IgnoreList()
il.load_from_xml(f) il.load_from_xml(f)
eq_(4,len(il)) eq_(4,len(il))

View File

@ -10,7 +10,7 @@
import io import io
import os.path as op import os.path as op
from lxml import etree from xml.etree import ElementTree as ET
from hsutil.path import Path from hsutil.path import Path
from hsutil.testutil import eq_ from hsutil.testutil import eq_
@ -66,7 +66,7 @@ class TCResultsEmpty(TestCase):
f = io.BytesIO() f = io.BytesIO()
self.results.save_to_xml(f) self.results.save_to_xml(f)
f.seek(0) f.seek(0)
doc = etree.parse(f) doc = ET.parse(f)
root = doc.getroot() root = doc.getroot()
eq_('results', root.tag) eq_('results', root.tag)
@ -380,14 +380,14 @@ class TCResultsMarkings(TestCase):
f = io.BytesIO() f = io.BytesIO()
self.results.save_to_xml(f) self.results.save_to_xml(f)
f.seek(0) f.seek(0)
doc = etree.parse(f) doc = ET.parse(f)
root = doc.getroot() root = doc.getroot()
g1, g2 = root.iterchildren('group') g1, g2 = root.getiterator('group')
d1, d2, d3 = g1.iterchildren('file') d1, d2, d3 = g1.getiterator('file')
eq_('n', d1.get('marked')) eq_('n', d1.get('marked'))
eq_('n', d2.get('marked')) eq_('n', d2.get('marked'))
eq_('y', d3.get('marked')) eq_('y', d3.get('marked'))
d1, d2 = g2.iterchildren('file') d1, d2 = g2.getiterator('file')
eq_('n', d1.get('marked')) eq_('n', d1.get('marked'))
eq_('y', d2.get('marked')) eq_('y', d2.get('marked'))
@ -425,7 +425,7 @@ class TCResultsXML(TestCase):
f = io.BytesIO() f = io.BytesIO()
self.results.save_to_xml(f) self.results.save_to_xml(f)
f.seek(0) f.seek(0)
doc = etree.parse(f) doc = ET.parse(f)
root = doc.getroot() root = doc.getroot()
eq_('results', root.tag) eq_('results', root.tag)
eq_(2, len(root)) eq_(2, len(root))
@ -516,35 +516,35 @@ class TCResultsXML(TestCase):
def get_file(path): def get_file(path):
return [f for f in self.objects if str(f.path) == path][0] return [f for f in self.objects if str(f.path) == path][0]
root = etree.Element('foobar') #The root element shouldn't matter, really. root = ET.Element('foobar') #The root element shouldn't matter, really.
group_node = etree.SubElement(root, 'group') group_node = ET.SubElement(root, 'group')
dupe_node = etree.SubElement(group_node, 'file') #Perfectly correct file dupe_node = ET.SubElement(group_node, 'file') #Perfectly correct file
dupe_node.set('path', op.join('basepath','foo bar')) dupe_node.set('path', op.join('basepath','foo bar'))
dupe_node.set('is_ref', 'y') dupe_node.set('is_ref', 'y')
dupe_node.set('words', 'foo,bar') dupe_node.set('words', 'foo,bar')
dupe_node = etree.SubElement(group_node, 'file') #is_ref missing, default to 'n' dupe_node = ET.SubElement(group_node, 'file') #is_ref missing, default to 'n'
dupe_node.set('path',op.join('basepath','foo bleh')) dupe_node.set('path',op.join('basepath','foo bleh'))
dupe_node.set('words','foo,bleh') dupe_node.set('words','foo,bleh')
dupe_node = etree.SubElement(group_node, 'file') #words are missing, valid. dupe_node = ET.SubElement(group_node, 'file') #words are missing, valid.
dupe_node.set('path',op.join('basepath','bar bleh')) dupe_node.set('path',op.join('basepath','bar bleh'))
dupe_node = etree.SubElement(group_node, 'file') #path is missing, invalid. dupe_node = ET.SubElement(group_node, 'file') #path is missing, invalid.
dupe_node.set('words','foo,bleh') dupe_node.set('words','foo,bleh')
dupe_node = etree.SubElement(group_node, 'foobar') #Invalid element name dupe_node = ET.SubElement(group_node, 'foobar') #Invalid element name
dupe_node.set('path',op.join('basepath','bar bleh')) dupe_node.set('path',op.join('basepath','bar bleh'))
dupe_node.set('is_ref','y') dupe_node.set('is_ref','y')
dupe_node.set('words','bar,bleh') dupe_node.set('words','bar,bleh')
match_node = etree.SubElement(group_node, 'match') # match pointing to a bad index match_node = ET.SubElement(group_node, 'match') # match pointing to a bad index
match_node.set('first', '42') match_node.set('first', '42')
match_node.set('second', '45') match_node.set('second', '45')
match_node = etree.SubElement(group_node, 'match') # match with missing attrs match_node = ET.SubElement(group_node, 'match') # match with missing attrs
match_node = etree.SubElement(group_node, 'match') # match with non-int values match_node = ET.SubElement(group_node, 'match') # match with non-int values
match_node.set('first', 'foo') match_node.set('first', 'foo')
match_node.set('second', 'bar') match_node.set('second', 'bar')
match_node.set('percentage', 'baz') match_node.set('percentage', 'baz')
group_node = etree.SubElement(root, 'foobar') #invalid group group_node = ET.SubElement(root, 'foobar') #invalid group
group_node = etree.SubElement(root, 'group') #empty group group_node = ET.SubElement(root, 'group') #empty group
f = io.BytesIO() f = io.BytesIO()
tree = etree.ElementTree(root) tree = ET.ElementTree(root)
tree.write(f, encoding='utf-8') tree.write(f, encoding='utf-8')
f.seek(0) f.seek(0)
r = Results(data) r = Results(data)

View File

@ -8,12 +8,13 @@
import os.path as op import os.path as op
import plistlib import plistlib
import logging
import re
from lxml import etree
from appscript import app, k, CommandError from appscript import app, k, CommandError
from hsutil import io from hsutil import io
from hsutil.str import get_file_ext from hsutil.str import get_file_ext, remove_invalid_xml
from hsutil.path import Path from hsutil.path import Path
from hscommon.cocoa import as_fetch from hscommon.cocoa import as_fetch
from hscommon.cocoa.objcmin import NSUserDefaults, NSURL from hscommon.cocoa.objcmin import NSUserDefaults, NSURL
@ -67,11 +68,16 @@ def get_iphoto_database_path():
def get_iphoto_pictures(plistpath): def get_iphoto_pictures(plistpath):
if not io.exists(plistpath): if not io.exists(plistpath):
return [] return []
# We make the xml go through lxml so that it can fix broken xml which iPhoto sometimes produces. s = io.open(plistpath, 'rt', encoding='utf-8').read()
parser = etree.XMLParser(recover=True) # There was a case where a guy had 0x10 chars in his plist, causing expat errors on loading
root = etree.parse(io.open(plistpath), parser=parser).getroot() s = remove_invalid_xml(s, replace_with='')
s = etree.tostring(root) # It seems that iPhoto sometimes doesn't properly escape & chars. The regexp below is to find
plist = plistlib.readPlistFromString(s) # any & char that is not a &-based entity (&, ", etc.). based on TextMate's XML
# bundle's regexp
s, count = re.subn(r'&(?![a-zA-Z0-9_-]+|#[0-9]+|#x[0-9a-fA-F]+;)', '', s)
if count:
logging.warning("%d invalid XML entities replacement made", count)
plist = plistlib.readPlistFromBytes(s.encode('utf-8'))
result = [] result = []
for photo_data in plist['Master Image List'].values(): for photo_data in plist['Master Image List'].values():
if photo_data['MediaType'] != 'Image': if photo_data['MediaType'] != 'Image':

View File

@ -8,5 +8,5 @@ Homepage: http://www.hardcoded.net
Package: dupeguru-me Package: dupeguru-me
Architecture: any Architecture: any
Depends: python (>= 2.6), python-qt4 (>= 4.6), python-lxml (>= 2.1) Depends: python (>= 2.6), python-qt4 (>= 4.6)
Description: dupeGuru Music Edition Description: dupeGuru Music Edition

View File

@ -8,5 +8,5 @@ Homepage: http://www.hardcoded.net
Package: dupeguru-pe Package: dupeguru-pe
Architecture: any Architecture: any
Depends: python (>= 2.6), python-qt4 (>= 4.6), python-lxml (>= 2.1), python-imaging (>= 1.1.6) Depends: python (>= 2.6), python-qt4 (>= 4.6), python-imaging (>= 1.1.6)
Description: dupeGuru Picture Edition Description: dupeGuru Picture Edition

View File

@ -8,5 +8,5 @@ Homepage: http://www.hardcoded.net
Package: dupeguru-se Package: dupeguru-se
Architecture: any Architecture: any
Depends: python (>= 2.6), python-qt4 (>= 4.6), python-lxml (>= 2.1) Depends: python (>= 2.6), python-qt4 (>= 4.6)
Description: dupeGuru Description: dupeGuru

View File

@ -1,7 +1,7 @@
# cxfreeze has some problems detecting all dependencies. # cxfreeze has some problems detecting all dependencies.
# This modules explicitly import those problematic modules. # This modules explicitly import those problematic modules.
import lxml._elementpath import xml.etree.ElementPath
import gzip import gzip
import os import os