1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2025-09-11 17:58:17 +00:00

Use lxml everywhere for xml save/load (instead of ElementTree and minidom).

This commit is contained in:
Virgil Dupras 2010-03-01 12:21:43 +01:00
parent b17ca66f73
commit 47a6ceffbc
7 changed files with 199 additions and 273 deletions

View File

@ -6,7 +6,7 @@
# which should be included with this package. The terms are also available at # which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/hs_license # http://www.hardcoded.net/licenses/hs_license
import xml.dom.minidom from lxml import etree
from hsutil import io from hsutil import io
from hsutil.files import FileOrPath from hsutil.files import FileOrPath
@ -126,38 +126,38 @@ class Directories(object):
def load_from_file(self, infile): def load_from_file(self, infile):
try: try:
doc = xml.dom.minidom.parse(infile) root = etree.parse(infile).getroot()
except: except:
return return
root_path_nodes = doc.getElementsByTagName('root_directory') for rdn in root.iterchildren('root_directory'):
for rdn in root_path_nodes: attrib = rdn.attrib
if not rdn.getAttributeNode('path'): if 'path' not in attrib:
continue continue
path = rdn.getAttributeNode('path').nodeValue path = attrib['path']
try: try:
self.add_path(Path(path)) self.add_path(Path(path))
except (AlreadyThereError, InvalidPathError): except (AlreadyThereError, InvalidPathError):
pass pass
state_nodes = doc.getElementsByTagName('state') for sn in root.iterchildren('state'):
for sn in state_nodes: attrib = sn.attrib
if not (sn.getAttributeNode('path') and sn.getAttributeNode('value')): if not ('path' in attrib and 'value' in attrib):
continue continue
path = sn.getAttributeNode('path').nodeValue path = attrib['path']
state = sn.getAttributeNode('value').nodeValue state = attrib['value']
self.set_state(Path(path), int(state)) self.set_state(Path(path), int(state))
def save_to_file(self,outfile): def save_to_file(self, outfile):
with FileOrPath(outfile, 'wb') as fp: with FileOrPath(outfile, 'wb') as fp:
doc = xml.dom.minidom.Document() root = etree.Element('directories')
root = doc.appendChild(doc.createElement('directories'))
for root_path in self: for root_path in self:
root_path_node = root.appendChild(doc.createElement('root_directory')) root_path_node = etree.SubElement(root, 'root_directory')
root_path_node.setAttribute('path', unicode(root_path).encode('utf-8')) root_path_node.set('path', unicode(root_path))
for path, state in self.states.iteritems(): for path, state in self.states.iteritems():
state_node = root.appendChild(doc.createElement('state')) state_node = etree.SubElement(root, 'state')
state_node.setAttribute('path', unicode(path).encode('utf-8')) state_node.set('path', unicode(path))
state_node.setAttribute('value', str(state)) state_node.set('value', unicode(state))
doc.writexml(fp, '\t', '\t', '\n', encoding='utf-8') tree = etree.ElementTree(root)
tree.write(fp, encoding='utf-8')
def set_state(self, path, state): def set_state(self, path, state):
if self.get_state(path) == state: if self.get_state(path) == state:

View File

@ -6,9 +6,9 @@
# which should be included with this package. The terms are also available at # which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/hs_license # http://www.hardcoded.net/licenses/hs_license
from hsutil.files import FileOrPath from lxml import etree
import xml.dom.minidom from hsutil.files import FileOrPath
class IgnoreList(object): class IgnoreList(object):
"""An ignore list implementation that is iterable, filterable and exportable to XML. """An ignore list implementation that is iterable, filterable and exportable to XML.
@ -71,45 +71,38 @@ class IgnoreList(object):
self._ignored[first] = matches self._ignored[first] = matches
self._count += 1 self._count += 1
def load_from_xml(self,infile): def load_from_xml(self, infile):
"""Loads the ignore list from a XML created with save_to_xml. """Loads the ignore list from a XML created with save_to_xml.
infile can be a file object or a filename. infile can be a file object or a filename.
""" """
try: try:
doc = xml.dom.minidom.parse(infile) root = etree.parse(infile).getroot()
except Exception: except Exception:
return return
file_nodes = doc.getElementsByTagName('file') for fn in root.iterchildren('file'):
for fn in file_nodes: file_path = fn.get('path')
if not fn.getAttributeNode('path'): if not file_path:
continue continue
file_path = fn.getAttributeNode('path').nodeValue for sfn in fn.iterchildren('file'):
subfile_nodes = fn.getElementsByTagName('file') subfile_path = sfn.get('path')
for sfn in subfile_nodes: if subfile_path:
if not sfn.getAttributeNode('path'): self.Ignore(file_path, subfile_path)
continue
subfile_path = sfn.getAttributeNode('path').nodeValue
self.Ignore(file_path,subfile_path)
def save_to_xml(self,outfile): def save_to_xml(self, outfile):
"""Create a XML file that can be used by load_from_xml. """Create a XML file that can be used by load_from_xml.
outfile can be a file object or a filename. outfile can be a file object or a filename.
""" """
doc = xml.dom.minidom.Document() root = etree.Element('ignore_list')
root = doc.appendChild(doc.createElement('ignore_list')) for filename, subfiles in self._ignored.items():
for file,subfiles in self._ignored.items(): file_node = etree.SubElement(root, 'file')
file_node = root.appendChild(doc.createElement('file')) file_node.set('path', filename)
if isinstance(file,unicode): for subfilename in subfiles:
file = file.encode('utf-8') subfile_node = etree.SubElement(file_node, 'file')
file_node.setAttribute('path',file) subfile_node.set('path', subfilename)
for subfile in subfiles: tree = etree.ElementTree(root)
subfile_node = file_node.appendChild(doc.createElement('file'))
if isinstance(subfile,unicode):
subfile = subfile.encode('utf-8')
subfile_node.setAttribute('path',subfile)
with FileOrPath(outfile, 'wb') as fp: with FileOrPath(outfile, 'wb') as fp:
doc.writexml(fp,'\t','\t','\n',encoding='utf-8') tree.write(fp, encoding='utf-8')

View File

@ -8,16 +8,14 @@
import logging import logging
import re import re
from xml.sax import handler, make_parser, SAXException from lxml import etree
from xml.sax.saxutils import XMLGenerator
from xml.sax.xmlreader import AttributesImpl
from . import engine from . import engine
from hsutil.job import nulljob from hsutil.job import nulljob
from hsutil.markable import Markable from hsutil.markable import Markable
from hsutil.misc import flatten, cond, nonone from hsutil.misc import flatten, nonone
from hsutil.str import format_size from hsutil.str import format_size
from hsutil.files import open_if_filename from hsutil.files import FileOrPath
class Results(Markable): class Results(Markable):
#---Override #---Override
@ -168,42 +166,54 @@ class Results(Markable):
is_markable = _is_markable is_markable = _is_markable
def load_from_xml(self, infile, get_file, j=nulljob): def load_from_xml(self, infile, get_file, j=nulljob):
def do_match(ref_file, other_files, group):
if not other_files:
return
for other_file in other_files:
group.add_match(engine.get_match(ref_file, other_file))
do_match(other_files[0], other_files[1:], group)
self.apply_filter(None) self.apply_filter(None)
handler = _ResultsHandler(get_file)
try: try:
parser = make_parser() root = etree.parse(infile).getroot()
except Exception as e: except Exception:
# This special handling is to try to figure out the cause of #47
# We don't silently return, because we want the user to send error report.
logging.exception(e)
try:
import xml.parsers.expat
logging.warning('importing xml.parsers.expat went ok, WTF?')
except Exception as e:
# This log should give a little more details about the cause of this all
logging.exception(e)
raise
raise
parser.setContentHandler(handler)
try:
infile, must_close = open_if_filename(infile)
except IOError:
return return
BUFSIZE = 1024 * 1024 # 1mb buffer group_elems = list(root.iterchildren('group'))
infile.seek(0, 2) groups = []
j.start_job(infile.tell() // BUFSIZE) marked = set()
infile.seek(0, 0) for group_elem in j.iter_with_progress(group_elems, every=100):
group = engine.Group()
dupes = []
for file_elem in group_elem.iterchildren('file'):
path = file_elem.get('path')
words = file_elem.get('words', '')
if not path:
continue
file = get_file(path)
if file is None:
continue
file.words = words.split(',')
file.is_ref = file_elem.get('is_ref') == 'y'
dupes.append(file)
if file_elem.get('marked') == 'y':
marked.add(file)
for match_elem in group_elem.iterchildren('match'):
try: try:
while True: attrs = match_elem.attrib
data = infile.read(BUFSIZE) first_file = dupes[int(attrs['first'])]
if not data: second_file = dupes[int(attrs['second'])]
break percentage = int(attrs['percentage'])
parser.feed(data) group.add_match(engine.Match(first_file, second_file, percentage))
except (IndexError, KeyError, ValueError): # Covers missing attr, non-int values and indexes out of bounds
pass
if (not group.matches) and (len(dupes) >= 2):
do_match(dupes[0], dupes[1:], group)
group.prioritize(lambda x: dupes.index(x))
if len(group):
groups.append(group)
j.add_progress() j.add_progress()
except SAXException: self.groups = groups
return for dupe_file in marked:
self.groups = handler.groups
for dupe_file in handler.marked:
self.mark(dupe_file) self.mark(dupe_file)
def make_ref(self, dupe): def make_ref(self, dupe):
@ -256,13 +266,10 @@ class Results(Markable):
def save_to_xml(self, outfile): def save_to_xml(self, outfile):
self.apply_filter(None) self.apply_filter(None)
outfile, must_close = open_if_filename(outfile, 'wb') root = etree.Element('results')
writer = XMLGenerator(outfile, 'utf-8') # writer = XMLGenerator(outfile, 'utf-8')
writer.startDocument()
empty_attrs = AttributesImpl({})
writer.startElement('results', empty_attrs)
for g in self.groups: for g in self.groups:
writer.startElement('group', empty_attrs) group_elem = etree.SubElement(root, 'group')
dupe2index = {} dupe2index = {}
for index, d in enumerate(g): for index, d in enumerate(g):
dupe2index[d] = index dupe2index[d] = index
@ -270,27 +277,19 @@ class Results(Markable):
words = engine.unpack_fields(d.words) words = engine.unpack_fields(d.words)
except AttributeError: except AttributeError:
words = () words = ()
attrs = AttributesImpl({ file_elem = etree.SubElement(group_elem, 'file')
'path': unicode(d.path), file_elem.set('path', unicode(d.path))
'is_ref': cond(d.is_ref, 'y', 'n'), file_elem.set('is_ref', ('y' if d.is_ref else 'n'))
'words': ','.join(words), file_elem.set('words', ','.join(words))
'marked': cond(self.is_marked(d), 'y', 'n') file_elem.set('marked', ('y' if self.is_marked(d) else 'n'))
})
writer.startElement('file', attrs)
writer.endElement('file')
for match in g.matches: for match in g.matches:
attrs = AttributesImpl({ match_elem = etree.SubElement(group_elem, 'match')
'first': str(dupe2index[match.first]), match_elem.set('first', unicode(dupe2index[match.first]))
'second': str(dupe2index[match.second]), match_elem.set('second', unicode(dupe2index[match.second]))
'percentage': str(int(match.percentage)), match_elem.set('percentage', unicode(int(match.percentage)))
}) tree = etree.ElementTree(root)
writer.startElement('match', attrs) with FileOrPath(outfile, 'wb') as fp:
writer.endElement('match') tree.write(fp, encoding='utf-8')
writer.endElement('group')
writer.endElement('results')
writer.endDocument()
if must_close:
outfile.close()
def sort_dupes(self, key, asc=True, delta=False): def sort_dupes(self, key, asc=True, delta=False):
if not self.__dupes: if not self.__dupes:
@ -310,60 +309,3 @@ class Results(Markable):
dupes = property(__get_dupe_list) dupes = property(__get_dupe_list)
groups = property(__get_groups, __set_groups) groups = property(__get_groups, __set_groups)
stat_line = property(__get_stat_line) stat_line = property(__get_stat_line)
class _ResultsHandler(handler.ContentHandler):
def __init__(self, get_file):
self.group = None
self.dupes = None
self.marked = set()
self.groups = []
self.get_file = get_file
def startElement(self, name, attrs):
if name == 'group':
self.group = engine.Group()
self.dupes = []
return
if (name == 'file') and (self.group is not None):
if not (('path' in attrs) and ('words' in attrs)):
return
path = attrs['path']
file = self.get_file(path)
if file is None:
return
file.words = attrs['words'].split(',')
file.is_ref = attrs.get('is_ref') == 'y'
self.dupes.append(file)
if attrs.get('marked') == 'y':
self.marked.add(file)
if (name == 'match') and (self.group is not None):
try:
first_file = self.dupes[int(attrs['first'])]
second_file = self.dupes[int(attrs['second'])]
percentage = int(attrs['percentage'])
self.group.add_match(engine.Match(first_file, second_file, percentage))
except (IndexError, KeyError, ValueError): # Covers missing attr, non-int values and indexes out of bounds
pass
def endElement(self, name):
def do_match(ref_file, other_files, group):
if not other_files:
return
for other_file in other_files:
group.add_match(engine.get_match(ref_file, other_file))
do_match(other_files[0], other_files[1:], group)
if name == 'group':
group = self.group
self.group = None
dupes = self.dupes
self.dupes = []
if group is None:
return
if len(dupes) < 2:
return
if not group.matches: # <match> elements not present, do it manually, without %
do_match(dupes[0], dupes[1:], group)
group.prioritize(lambda x: dupes.index(x))
self.groups.append(group)

View File

@ -248,7 +248,7 @@ class TCDupeGuruWithResults(TestCase):
self.rtree.selected_paths = paths self.rtree.selected_paths = paths
self.app.remove_selected() self.app.remove_selected()
# The first 2 dupes have been removed. The 3rd one is a ref. it stays there, in first pos. # The first 2 dupes have been removed. The 3rd one is a ref. it stays there, in first pos.
eq_(self.rtree.selected_paths, [[0]]) # no exception eq_(self.rtree.selected_paths, [[0, 0]]) # no exception
def test_selectResultNodePaths(self): def test_selectResultNodePaths(self):
app = self.app app = self.app
@ -366,10 +366,7 @@ class TCDupeGuruWithResults(TestCase):
app = self.app app = self.app
self.rtree.selected_paths = [[0, 0], [1, 0]] self.rtree.selected_paths = [[0, 0], [1, 0]]
app.remove_selected() app.remove_selected()
eq_(len(app.results.dupes), 1) eq_(len(app.results.dupes), 1) # the first path is now selected
app.remove_selected()
eq_(len(app.results.dupes), 1)
self.rtree.selected_path = [0, 0]
app.remove_selected() app.remove_selected()
eq_(len(app.results.dupes), 0) eq_(len(app.results.dupes), 0)

View File

@ -229,10 +229,9 @@ class TCbuild_word_dict(TestCase):
self.log = [] self.log = []
s = "foo bar" s = "foo bar"
build_word_dict([NamedObject(s, True), NamedObject(s, True), NamedObject(s, True)], j) build_word_dict([NamedObject(s, True), NamedObject(s, True), NamedObject(s, True)], j)
# We don't have intermediate log because iter_with_progress is called with every > 1
self.assertEqual(0,self.log[0]) self.assertEqual(0,self.log[0])
self.assertEqual(33,self.log[1]) self.assertEqual(100,self.log[1])
self.assertEqual(66,self.log[2])
self.assertEqual(100,self.log[3])
class TCmerge_similar_words(TestCase): class TCmerge_similar_words(TestCase):

View File

@ -7,7 +7,7 @@
# http://www.hardcoded.net/licenses/hs_license # http://www.hardcoded.net/licenses/hs_license
import cStringIO import cStringIO
import xml.dom.minidom from lxml import etree
from nose.tools import eq_ from nose.tools import eq_
@ -62,26 +62,25 @@ def test_save_to_xml():
f = cStringIO.StringIO() f = cStringIO.StringIO()
il.save_to_xml(f) il.save_to_xml(f)
f.seek(0) f.seek(0)
doc = xml.dom.minidom.parse(f) doc = etree.parse(f)
root = doc.documentElement root = doc.getroot()
eq_('ignore_list',root.nodeName) eq_(root.tag, 'ignore_list')
children = [c for c in root.childNodes if c.localName] eq_(len(root), 2)
eq_(2,len(children)) eq_(len([c for c in root if c.tag == 'file']), 2)
eq_(2,len([c for c in children if c.nodeName == 'file'])) f1, f2 = root[:]
f1,f2 = children subchildren = [c for c in f1 if c.tag == 'file'] + [c for c in f2 if c.tag == 'file']
subchildren = [c for c in f1.childNodes if c.localName == 'file'] +\ eq_(len(subchildren), 3)
[c for c in f2.childNodes if c.localName == 'file']
eq_(3,len(subchildren))
def test_SaveThenLoad(): def test_SaveThenLoad():
il = IgnoreList() il = IgnoreList()
il.Ignore('foo','bar') il.Ignore('foo', 'bar')
il.Ignore('foo','bleh') il.Ignore('foo', 'bleh')
il.Ignore('bleh','bar') il.Ignore('bleh', 'bar')
il.Ignore(u'\u00e9','bar') il.Ignore(u'\u00e9', 'bar')
f = cStringIO.StringIO() f = cStringIO.StringIO()
il.save_to_xml(f) il.save_to_xml(f)
f.seek(0) f.seek(0)
f.seek(0)
il = IgnoreList() il = IgnoreList()
il.load_from_xml(f) il.load_from_xml(f)
eq_(4,len(il)) eq_(4,len(il))
@ -129,9 +128,9 @@ def test_filter():
assert not il.AreIgnored('foo','bar') assert not il.AreIgnored('foo','bar')
assert il.AreIgnored('bar','baz') assert il.AreIgnored('bar','baz')
def test_save_with_non_ascii_non_unicode_items(): def test_save_with_non_ascii_items():
il = IgnoreList() il = IgnoreList()
il.Ignore('\xac','\xbf') il.Ignore(u'\xac', u'\xbf')
f = cStringIO.StringIO() f = cStringIO.StringIO()
try: try:
il.save_to_xml(f) il.save_to_xml(f)

View File

@ -7,10 +7,9 @@
# which should be included with this package. The terms are also available at # which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/hs_license # http://www.hardcoded.net/licenses/hs_license
import unittest
import StringIO import StringIO
import xml.dom.minidom
import os.path as op import os.path as op
from lxml import etree
from hsutil.path import Path from hsutil.path import Path
from hsutil.testcase import TestCase from hsutil.testcase import TestCase
@ -18,7 +17,7 @@ from hsutil.misc import first
from . import engine_test, data from . import engine_test, data
from .. import engine from .. import engine
from ..results import * from ..results import Results
class NamedObject(engine_test.NamedObject): class NamedObject(engine_test.NamedObject):
path = property(lambda x:Path('basepath') + x.name) path = property(lambda x:Path('basepath') + x.name)
@ -65,9 +64,9 @@ class TCResultsEmpty(TestCase):
f = StringIO.StringIO() f = StringIO.StringIO()
self.results.save_to_xml(f) self.results.save_to_xml(f)
f.seek(0) f.seek(0)
doc = xml.dom.minidom.parse(f) doc = etree.parse(f)
root = doc.documentElement root = doc.getroot()
self.assertEqual('results',root.nodeName) self.assertEqual('results', root.tag)
class TCResultsWithSomeGroups(TestCase): class TCResultsWithSomeGroups(TestCase):
@ -321,16 +320,16 @@ class TCResultsMarkings(TestCase):
f = StringIO.StringIO() f = StringIO.StringIO()
self.results.save_to_xml(f) self.results.save_to_xml(f)
f.seek(0) f.seek(0)
doc = xml.dom.minidom.parse(f) doc = etree.parse(f)
root = doc.documentElement root = doc.getroot()
g1,g2 = root.getElementsByTagName('group') g1, g2 = root.iterchildren('group')
d1,d2,d3 = g1.getElementsByTagName('file') d1, d2, d3 = g1.iterchildren('file')
self.assertEqual('n',d1.getAttributeNode('marked').nodeValue) self.assertEqual('n', d1.get('marked'))
self.assertEqual('n',d2.getAttributeNode('marked').nodeValue) self.assertEqual('n', d2.get('marked'))
self.assertEqual('y',d3.getAttributeNode('marked').nodeValue) self.assertEqual('y', d3.get('marked'))
d1,d2 = g2.getElementsByTagName('file') d1, d2 = g2.iterchildren('file')
self.assertEqual('n',d1.getAttributeNode('marked').nodeValue) self.assertEqual('n', d1.get('marked'))
self.assertEqual('y',d2.getAttributeNode('marked').nodeValue) self.assertEqual('y', d2.get('marked'))
def test_LoadXML(self): def test_LoadXML(self):
def get_file(path): def get_file(path):
@ -366,38 +365,35 @@ class TCResultsXML(TestCase):
f = StringIO.StringIO() f = StringIO.StringIO()
self.results.save_to_xml(f) self.results.save_to_xml(f)
f.seek(0) f.seek(0)
doc = xml.dom.minidom.parse(f) doc = etree.parse(f)
root = doc.documentElement root = doc.getroot()
self.assertEqual('results',root.nodeName) self.assertEqual('results', root.tag)
children = [c for c in root.childNodes if c.localName] self.assertEqual(2, len(root))
self.assertEqual(2,len(children)) self.assertEqual(2, len([c for c in root if c.tag == 'group']))
self.assertEqual(2,len([c for c in children if c.nodeName == 'group'])) g1, g2 = root
g1,g2 = children self.assertEqual(6,len(g1))
children = [c for c in g1.childNodes if c.localName] self.assertEqual(3,len([c for c in g1 if c.tag == 'file']))
self.assertEqual(6,len(children)) self.assertEqual(3,len([c for c in g1 if c.tag == 'match']))
self.assertEqual(3,len([c for c in children if c.nodeName == 'file'])) d1, d2, d3 = [c for c in g1 if c.tag == 'file']
self.assertEqual(3,len([c for c in children if c.nodeName == 'match'])) self.assertEqual(op.join('basepath','foo bar'),d1.get('path'))
d1,d2,d3 = [c for c in children if c.nodeName == 'file'] self.assertEqual(op.join('basepath','bar bleh'),d2.get('path'))
self.assertEqual(op.join('basepath','foo bar'),d1.getAttributeNode('path').nodeValue) self.assertEqual(op.join('basepath','foo bleh'),d3.get('path'))
self.assertEqual(op.join('basepath','bar bleh'),d2.getAttributeNode('path').nodeValue) self.assertEqual('y',d1.get('is_ref'))
self.assertEqual(op.join('basepath','foo bleh'),d3.getAttributeNode('path').nodeValue) self.assertEqual('n',d2.get('is_ref'))
self.assertEqual('y',d1.getAttributeNode('is_ref').nodeValue) self.assertEqual('n',d3.get('is_ref'))
self.assertEqual('n',d2.getAttributeNode('is_ref').nodeValue) self.assertEqual('foo,bar',d1.get('words'))
self.assertEqual('n',d3.getAttributeNode('is_ref').nodeValue) self.assertEqual('bar,bleh',d2.get('words'))
self.assertEqual('foo,bar',d1.getAttributeNode('words').nodeValue) self.assertEqual('foo,bleh',d3.get('words'))
self.assertEqual('bar,bleh',d2.getAttributeNode('words').nodeValue) self.assertEqual(3,len(g2))
self.assertEqual('foo,bleh',d3.getAttributeNode('words').nodeValue) self.assertEqual(2,len([c for c in g2 if c.tag == 'file']))
children = [c for c in g2.childNodes if c.localName] self.assertEqual(1,len([c for c in g2 if c.tag == 'match']))
self.assertEqual(3,len(children)) d1, d2 = [c for c in g2 if c.tag == 'file']
self.assertEqual(2,len([c for c in children if c.nodeName == 'file'])) self.assertEqual(op.join('basepath','ibabtu'),d1.get('path'))
self.assertEqual(1,len([c for c in children if c.nodeName == 'match'])) self.assertEqual(op.join('basepath','ibabtu'),d2.get('path'))
d1,d2 = [c for c in children if c.nodeName == 'file'] self.assertEqual('n',d1.get('is_ref'))
self.assertEqual(op.join('basepath','ibabtu'),d1.getAttributeNode('path').nodeValue) self.assertEqual('n',d2.get('is_ref'))
self.assertEqual(op.join('basepath','ibabtu'),d2.getAttributeNode('path').nodeValue) self.assertEqual('ibabtu',d1.get('words'))
self.assertEqual('n',d1.getAttributeNode('is_ref').nodeValue) self.assertEqual('ibabtu',d2.get('words'))
self.assertEqual('n',d2.getAttributeNode('is_ref').nodeValue)
self.assertEqual('ibabtu',d1.getAttributeNode('words').nodeValue)
self.assertEqual('ibabtu',d2.getAttributeNode('words').nodeValue)
def test_LoadXML(self): def test_LoadXML(self):
def get_file(path): def get_file(path):
@ -460,41 +456,41 @@ class TCResultsXML(TestCase):
def get_file(path): def get_file(path):
return [f for f in self.objects if str(f.path) == path][0] return [f for f in self.objects if str(f.path) == path][0]
doc = xml.dom.minidom.Document() root = etree.Element('foobar') #The root element shouldn't matter, really.
root = doc.appendChild(doc.createElement('foobar')) #The root element shouldn't matter, really. group_node = etree.SubElement(root, 'group')
group_node = root.appendChild(doc.createElement('group')) dupe_node = etree.SubElement(group_node, 'file') #Perfectly correct file
dupe_node = group_node.appendChild(doc.createElement('file')) #Perfectly correct file dupe_node.set('path', op.join('basepath','foo bar'))
dupe_node.setAttribute('path',op.join('basepath','foo bar')) dupe_node.set('is_ref', 'y')
dupe_node.setAttribute('is_ref','y') dupe_node.set('words', 'foo,bar')
dupe_node.setAttribute('words','foo,bar') dupe_node = etree.SubElement(group_node, 'file') #is_ref missing, default to 'n'
dupe_node = group_node.appendChild(doc.createElement('file')) #is_ref missing, default to 'n' dupe_node.set('path',op.join('basepath','foo bleh'))
dupe_node.setAttribute('path',op.join('basepath','foo bleh')) dupe_node.set('words','foo,bleh')
dupe_node.setAttribute('words','foo,bleh') dupe_node = etree.SubElement(group_node, 'file') #words are missing, valid.
dupe_node = group_node.appendChild(doc.createElement('file')) #words are missing, invalid. dupe_node.set('path',op.join('basepath','bar bleh'))
dupe_node.setAttribute('path',op.join('basepath','bar bleh')) dupe_node = etree.SubElement(group_node, 'file') #path is missing, invalid.
dupe_node = group_node.appendChild(doc.createElement('file')) #path is missing, invalid. dupe_node.set('words','foo,bleh')
dupe_node.setAttribute('words','foo,bleh') dupe_node = etree.SubElement(group_node, 'foobar') #Invalid element name
dupe_node = group_node.appendChild(doc.createElement('foobar')) #Invalid element name dupe_node.set('path',op.join('basepath','bar bleh'))
dupe_node.setAttribute('path',op.join('basepath','bar bleh')) dupe_node.set('is_ref','y')
dupe_node.setAttribute('is_ref','y') dupe_node.set('words','bar,bleh')
dupe_node.setAttribute('words','bar,bleh') match_node = etree.SubElement(group_node, 'match') # match pointing to a bad index
match_node = group_node.appendChild(doc.createElement('match')) # match pointing to a bad index match_node.set('first', '42')
match_node.setAttribute('first', '42') match_node.set('second', '45')
match_node.setAttribute('second', '45') match_node = etree.SubElement(group_node, 'match') # match with missing attrs
match_node = group_node.appendChild(doc.createElement('match')) # match with missing attrs match_node = etree.SubElement(group_node, 'match') # match with non-int values
match_node = group_node.appendChild(doc.createElement('match')) # match with non-int values match_node.set('first', 'foo')
match_node.setAttribute('first', 'foo') match_node.set('second', 'bar')
match_node.setAttribute('second', 'bar') match_node.set('percentage', 'baz')
match_node.setAttribute('percentage', 'baz') group_node = etree.SubElement(root, 'foobar') #invalid group
group_node = root.appendChild(doc.createElement('foobar')) #invalid group group_node = etree.SubElement(root, 'group') #empty group
group_node = root.appendChild(doc.createElement('group')) #empty group
f = StringIO.StringIO() f = StringIO.StringIO()
doc.writexml(f,'\t','\t','\n',encoding='utf-8') tree = etree.ElementTree(root)
tree.write(f, encoding='utf-8')
f.seek(0) f.seek(0)
r = Results(data) r = Results(data)
r.load_from_xml(f,get_file) r.load_from_xml(f, get_file)
self.assertEqual(1,len(r.groups)) self.assertEqual(1,len(r.groups))
self.assertEqual(2,len(r.groups[0])) self.assertEqual(3,len(r.groups[0]))
def test_xml_non_ascii(self): def test_xml_non_ascii(self):
def get_file(path): def get_file(path):