Use lxml everywhere for xml save/load (instead of ElementTree and minidom).

This commit is contained in:
Virgil Dupras 2010-03-01 12:21:43 +01:00
parent b17ca66f73
commit 47a6ceffbc
7 changed files with 199 additions and 273 deletions

View File

@ -6,7 +6,7 @@
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/hs_license
import xml.dom.minidom
from lxml import etree
from hsutil import io
from hsutil.files import FileOrPath
@ -126,38 +126,38 @@ class Directories(object):
def load_from_file(self, infile):
try:
doc = xml.dom.minidom.parse(infile)
root = etree.parse(infile).getroot()
except:
return
root_path_nodes = doc.getElementsByTagName('root_directory')
for rdn in root_path_nodes:
if not rdn.getAttributeNode('path'):
for rdn in root.iterchildren('root_directory'):
attrib = rdn.attrib
if 'path' not in attrib:
continue
path = rdn.getAttributeNode('path').nodeValue
path = attrib['path']
try:
self.add_path(Path(path))
except (AlreadyThereError, InvalidPathError):
pass
state_nodes = doc.getElementsByTagName('state')
for sn in state_nodes:
if not (sn.getAttributeNode('path') and sn.getAttributeNode('value')):
for sn in root.iterchildren('state'):
attrib = sn.attrib
if not ('path' in attrib and 'value' in attrib):
continue
path = sn.getAttributeNode('path').nodeValue
state = sn.getAttributeNode('value').nodeValue
path = attrib['path']
state = attrib['value']
self.set_state(Path(path), int(state))
def save_to_file(self,outfile):
def save_to_file(self, outfile):
with FileOrPath(outfile, 'wb') as fp:
doc = xml.dom.minidom.Document()
root = doc.appendChild(doc.createElement('directories'))
root = etree.Element('directories')
for root_path in self:
root_path_node = root.appendChild(doc.createElement('root_directory'))
root_path_node.setAttribute('path', unicode(root_path).encode('utf-8'))
root_path_node = etree.SubElement(root, 'root_directory')
root_path_node.set('path', unicode(root_path))
for path, state in self.states.iteritems():
state_node = root.appendChild(doc.createElement('state'))
state_node.setAttribute('path', unicode(path).encode('utf-8'))
state_node.setAttribute('value', str(state))
doc.writexml(fp, '\t', '\t', '\n', encoding='utf-8')
state_node = etree.SubElement(root, 'state')
state_node.set('path', unicode(path))
state_node.set('value', unicode(state))
tree = etree.ElementTree(root)
tree.write(fp, encoding='utf-8')
def set_state(self, path, state):
if self.get_state(path) == state:

View File

@ -6,9 +6,9 @@
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/hs_license
from hsutil.files import FileOrPath
from lxml import etree
import xml.dom.minidom
from hsutil.files import FileOrPath
class IgnoreList(object):
"""An ignore list implementation that is iterable, filterable and exportable to XML.
@ -71,45 +71,38 @@ class IgnoreList(object):
self._ignored[first] = matches
self._count += 1
def load_from_xml(self,infile):
def load_from_xml(self, infile):
"""Loads the ignore list from a XML created with save_to_xml.
infile can be a file object or a filename.
"""
try:
doc = xml.dom.minidom.parse(infile)
root = etree.parse(infile).getroot()
except Exception:
return
file_nodes = doc.getElementsByTagName('file')
for fn in file_nodes:
if not fn.getAttributeNode('path'):
for fn in root.iterchildren('file'):
file_path = fn.get('path')
if not file_path:
continue
file_path = fn.getAttributeNode('path').nodeValue
subfile_nodes = fn.getElementsByTagName('file')
for sfn in subfile_nodes:
if not sfn.getAttributeNode('path'):
continue
subfile_path = sfn.getAttributeNode('path').nodeValue
self.Ignore(file_path,subfile_path)
for sfn in fn.iterchildren('file'):
subfile_path = sfn.get('path')
if subfile_path:
self.Ignore(file_path, subfile_path)
def save_to_xml(self,outfile):
def save_to_xml(self, outfile):
"""Create a XML file that can be used by load_from_xml.
outfile can be a file object or a filename.
"""
doc = xml.dom.minidom.Document()
root = doc.appendChild(doc.createElement('ignore_list'))
for file,subfiles in self._ignored.items():
file_node = root.appendChild(doc.createElement('file'))
if isinstance(file,unicode):
file = file.encode('utf-8')
file_node.setAttribute('path',file)
for subfile in subfiles:
subfile_node = file_node.appendChild(doc.createElement('file'))
if isinstance(subfile,unicode):
subfile = subfile.encode('utf-8')
subfile_node.setAttribute('path',subfile)
root = etree.Element('ignore_list')
for filename, subfiles in self._ignored.items():
file_node = etree.SubElement(root, 'file')
file_node.set('path', filename)
for subfilename in subfiles:
subfile_node = etree.SubElement(file_node, 'file')
subfile_node.set('path', subfilename)
tree = etree.ElementTree(root)
with FileOrPath(outfile, 'wb') as fp:
doc.writexml(fp,'\t','\t','\n',encoding='utf-8')
tree.write(fp, encoding='utf-8')

View File

@ -8,16 +8,14 @@
import logging
import re
from xml.sax import handler, make_parser, SAXException
from xml.sax.saxutils import XMLGenerator
from xml.sax.xmlreader import AttributesImpl
from lxml import etree
from . import engine
from hsutil.job import nulljob
from hsutil.markable import Markable
from hsutil.misc import flatten, cond, nonone
from hsutil.misc import flatten, nonone
from hsutil.str import format_size
from hsutil.files import open_if_filename
from hsutil.files import FileOrPath
class Results(Markable):
#---Override
@ -168,42 +166,54 @@ class Results(Markable):
is_markable = _is_markable
def load_from_xml(self, infile, get_file, j=nulljob):
def do_match(ref_file, other_files, group):
if not other_files:
return
for other_file in other_files:
group.add_match(engine.get_match(ref_file, other_file))
do_match(other_files[0], other_files[1:], group)
self.apply_filter(None)
handler = _ResultsHandler(get_file)
try:
parser = make_parser()
except Exception as e:
# This special handling is to try to figure out the cause of #47
# We don't silently return, because we want the user to send error report.
logging.exception(e)
try:
import xml.parsers.expat
logging.warning('importing xml.parsers.expat went ok, WTF?')
except Exception as e:
# This log should give a little more details about the cause of this all
logging.exception(e)
raise
raise
parser.setContentHandler(handler)
try:
infile, must_close = open_if_filename(infile)
except IOError:
root = etree.parse(infile).getroot()
except Exception:
return
BUFSIZE = 1024 * 1024 # 1mb buffer
infile.seek(0, 2)
j.start_job(infile.tell() // BUFSIZE)
infile.seek(0, 0)
try:
while True:
data = infile.read(BUFSIZE)
if not data:
break
parser.feed(data)
j.add_progress()
except SAXException:
return
self.groups = handler.groups
for dupe_file in handler.marked:
group_elems = list(root.iterchildren('group'))
groups = []
marked = set()
for group_elem in j.iter_with_progress(group_elems, every=100):
group = engine.Group()
dupes = []
for file_elem in group_elem.iterchildren('file'):
path = file_elem.get('path')
words = file_elem.get('words', '')
if not path:
continue
file = get_file(path)
if file is None:
continue
file.words = words.split(',')
file.is_ref = file_elem.get('is_ref') == 'y'
dupes.append(file)
if file_elem.get('marked') == 'y':
marked.add(file)
for match_elem in group_elem.iterchildren('match'):
try:
attrs = match_elem.attrib
first_file = dupes[int(attrs['first'])]
second_file = dupes[int(attrs['second'])]
percentage = int(attrs['percentage'])
group.add_match(engine.Match(first_file, second_file, percentage))
except (IndexError, KeyError, ValueError): # Covers missing attr, non-int values and indexes out of bounds
pass
if (not group.matches) and (len(dupes) >= 2):
do_match(dupes[0], dupes[1:], group)
group.prioritize(lambda x: dupes.index(x))
if len(group):
groups.append(group)
j.add_progress()
self.groups = groups
for dupe_file in marked:
self.mark(dupe_file)
def make_ref(self, dupe):
@ -256,13 +266,10 @@ class Results(Markable):
def save_to_xml(self, outfile):
self.apply_filter(None)
outfile, must_close = open_if_filename(outfile, 'wb')
writer = XMLGenerator(outfile, 'utf-8')
writer.startDocument()
empty_attrs = AttributesImpl({})
writer.startElement('results', empty_attrs)
root = etree.Element('results')
# writer = XMLGenerator(outfile, 'utf-8')
for g in self.groups:
writer.startElement('group', empty_attrs)
group_elem = etree.SubElement(root, 'group')
dupe2index = {}
for index, d in enumerate(g):
dupe2index[d] = index
@ -270,27 +277,19 @@ class Results(Markable):
words = engine.unpack_fields(d.words)
except AttributeError:
words = ()
attrs = AttributesImpl({
'path': unicode(d.path),
'is_ref': cond(d.is_ref, 'y', 'n'),
'words': ','.join(words),
'marked': cond(self.is_marked(d), 'y', 'n')
})
writer.startElement('file', attrs)
writer.endElement('file')
file_elem = etree.SubElement(group_elem, 'file')
file_elem.set('path', unicode(d.path))
file_elem.set('is_ref', ('y' if d.is_ref else 'n'))
file_elem.set('words', ','.join(words))
file_elem.set('marked', ('y' if self.is_marked(d) else 'n'))
for match in g.matches:
attrs = AttributesImpl({
'first': str(dupe2index[match.first]),
'second': str(dupe2index[match.second]),
'percentage': str(int(match.percentage)),
})
writer.startElement('match', attrs)
writer.endElement('match')
writer.endElement('group')
writer.endElement('results')
writer.endDocument()
if must_close:
outfile.close()
match_elem = etree.SubElement(group_elem, 'match')
match_elem.set('first', unicode(dupe2index[match.first]))
match_elem.set('second', unicode(dupe2index[match.second]))
match_elem.set('percentage', unicode(int(match.percentage)))
tree = etree.ElementTree(root)
with FileOrPath(outfile, 'wb') as fp:
tree.write(fp, encoding='utf-8')
def sort_dupes(self, key, asc=True, delta=False):
if not self.__dupes:
@ -310,60 +309,3 @@ class Results(Markable):
dupes = property(__get_dupe_list)
groups = property(__get_groups, __set_groups)
stat_line = property(__get_stat_line)
class _ResultsHandler(handler.ContentHandler):
def __init__(self, get_file):
self.group = None
self.dupes = None
self.marked = set()
self.groups = []
self.get_file = get_file
def startElement(self, name, attrs):
if name == 'group':
self.group = engine.Group()
self.dupes = []
return
if (name == 'file') and (self.group is not None):
if not (('path' in attrs) and ('words' in attrs)):
return
path = attrs['path']
file = self.get_file(path)
if file is None:
return
file.words = attrs['words'].split(',')
file.is_ref = attrs.get('is_ref') == 'y'
self.dupes.append(file)
if attrs.get('marked') == 'y':
self.marked.add(file)
if (name == 'match') and (self.group is not None):
try:
first_file = self.dupes[int(attrs['first'])]
second_file = self.dupes[int(attrs['second'])]
percentage = int(attrs['percentage'])
self.group.add_match(engine.Match(first_file, second_file, percentage))
except (IndexError, KeyError, ValueError): # Covers missing attr, non-int values and indexes out of bounds
pass
def endElement(self, name):
def do_match(ref_file, other_files, group):
if not other_files:
return
for other_file in other_files:
group.add_match(engine.get_match(ref_file, other_file))
do_match(other_files[0], other_files[1:], group)
if name == 'group':
group = self.group
self.group = None
dupes = self.dupes
self.dupes = []
if group is None:
return
if len(dupes) < 2:
return
if not group.matches: # <match> elements not present, do it manually, without %
do_match(dupes[0], dupes[1:], group)
group.prioritize(lambda x: dupes.index(x))
self.groups.append(group)

View File

@ -248,7 +248,7 @@ class TCDupeGuruWithResults(TestCase):
self.rtree.selected_paths = paths
self.app.remove_selected()
# The first 2 dupes have been removed. The 3rd one is a ref. it stays there, in first pos.
eq_(self.rtree.selected_paths, [[0]]) # no exception
eq_(self.rtree.selected_paths, [[0, 0]]) # no exception
def test_selectResultNodePaths(self):
app = self.app
@ -366,10 +366,7 @@ class TCDupeGuruWithResults(TestCase):
app = self.app
self.rtree.selected_paths = [[0, 0], [1, 0]]
app.remove_selected()
eq_(len(app.results.dupes), 1)
app.remove_selected()
eq_(len(app.results.dupes), 1)
self.rtree.selected_path = [0, 0]
eq_(len(app.results.dupes), 1) # the first path is now selected
app.remove_selected()
eq_(len(app.results.dupes), 0)

View File

@ -229,10 +229,9 @@ class TCbuild_word_dict(TestCase):
self.log = []
s = "foo bar"
build_word_dict([NamedObject(s, True), NamedObject(s, True), NamedObject(s, True)], j)
# We don't have intermediate log because iter_with_progress is called with every > 1
self.assertEqual(0,self.log[0])
self.assertEqual(33,self.log[1])
self.assertEqual(66,self.log[2])
self.assertEqual(100,self.log[3])
self.assertEqual(100,self.log[1])
class TCmerge_similar_words(TestCase):

View File

@ -7,7 +7,7 @@
# http://www.hardcoded.net/licenses/hs_license
import cStringIO
import xml.dom.minidom
from lxml import etree
from nose.tools import eq_
@ -62,26 +62,25 @@ def test_save_to_xml():
f = cStringIO.StringIO()
il.save_to_xml(f)
f.seek(0)
doc = xml.dom.minidom.parse(f)
root = doc.documentElement
eq_('ignore_list',root.nodeName)
children = [c for c in root.childNodes if c.localName]
eq_(2,len(children))
eq_(2,len([c for c in children if c.nodeName == 'file']))
f1,f2 = children
subchildren = [c for c in f1.childNodes if c.localName == 'file'] +\
[c for c in f2.childNodes if c.localName == 'file']
eq_(3,len(subchildren))
doc = etree.parse(f)
root = doc.getroot()
eq_(root.tag, 'ignore_list')
eq_(len(root), 2)
eq_(len([c for c in root if c.tag == 'file']), 2)
f1, f2 = root[:]
subchildren = [c for c in f1 if c.tag == 'file'] + [c for c in f2 if c.tag == 'file']
eq_(len(subchildren), 3)
def test_SaveThenLoad():
il = IgnoreList()
il.Ignore('foo','bar')
il.Ignore('foo','bleh')
il.Ignore('bleh','bar')
il.Ignore(u'\u00e9','bar')
il.Ignore('foo', 'bar')
il.Ignore('foo', 'bleh')
il.Ignore('bleh', 'bar')
il.Ignore(u'\u00e9', 'bar')
f = cStringIO.StringIO()
il.save_to_xml(f)
f.seek(0)
f.seek(0)
il = IgnoreList()
il.load_from_xml(f)
eq_(4,len(il))
@ -129,9 +128,9 @@ def test_filter():
assert not il.AreIgnored('foo','bar')
assert il.AreIgnored('bar','baz')
def test_save_with_non_ascii_non_unicode_items():
def test_save_with_non_ascii_items():
il = IgnoreList()
il.Ignore('\xac','\xbf')
il.Ignore(u'\xac', u'\xbf')
f = cStringIO.StringIO()
try:
il.save_to_xml(f)

View File

@ -7,10 +7,9 @@
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/hs_license
import unittest
import StringIO
import xml.dom.minidom
import os.path as op
from lxml import etree
from hsutil.path import Path
from hsutil.testcase import TestCase
@ -18,7 +17,7 @@ from hsutil.misc import first
from . import engine_test, data
from .. import engine
from ..results import *
from ..results import Results
class NamedObject(engine_test.NamedObject):
path = property(lambda x:Path('basepath') + x.name)
@ -65,9 +64,9 @@ class TCResultsEmpty(TestCase):
f = StringIO.StringIO()
self.results.save_to_xml(f)
f.seek(0)
doc = xml.dom.minidom.parse(f)
root = doc.documentElement
self.assertEqual('results',root.nodeName)
doc = etree.parse(f)
root = doc.getroot()
self.assertEqual('results', root.tag)
class TCResultsWithSomeGroups(TestCase):
@ -321,16 +320,16 @@ class TCResultsMarkings(TestCase):
f = StringIO.StringIO()
self.results.save_to_xml(f)
f.seek(0)
doc = xml.dom.minidom.parse(f)
root = doc.documentElement
g1,g2 = root.getElementsByTagName('group')
d1,d2,d3 = g1.getElementsByTagName('file')
self.assertEqual('n',d1.getAttributeNode('marked').nodeValue)
self.assertEqual('n',d2.getAttributeNode('marked').nodeValue)
self.assertEqual('y',d3.getAttributeNode('marked').nodeValue)
d1,d2 = g2.getElementsByTagName('file')
self.assertEqual('n',d1.getAttributeNode('marked').nodeValue)
self.assertEqual('y',d2.getAttributeNode('marked').nodeValue)
doc = etree.parse(f)
root = doc.getroot()
g1, g2 = root.iterchildren('group')
d1, d2, d3 = g1.iterchildren('file')
self.assertEqual('n', d1.get('marked'))
self.assertEqual('n', d2.get('marked'))
self.assertEqual('y', d3.get('marked'))
d1, d2 = g2.iterchildren('file')
self.assertEqual('n', d1.get('marked'))
self.assertEqual('y', d2.get('marked'))
def test_LoadXML(self):
def get_file(path):
@ -366,38 +365,35 @@ class TCResultsXML(TestCase):
f = StringIO.StringIO()
self.results.save_to_xml(f)
f.seek(0)
doc = xml.dom.minidom.parse(f)
root = doc.documentElement
self.assertEqual('results',root.nodeName)
children = [c for c in root.childNodes if c.localName]
self.assertEqual(2,len(children))
self.assertEqual(2,len([c for c in children if c.nodeName == 'group']))
g1,g2 = children
children = [c for c in g1.childNodes if c.localName]
self.assertEqual(6,len(children))
self.assertEqual(3,len([c for c in children if c.nodeName == 'file']))
self.assertEqual(3,len([c for c in children if c.nodeName == 'match']))
d1,d2,d3 = [c for c in children if c.nodeName == 'file']
self.assertEqual(op.join('basepath','foo bar'),d1.getAttributeNode('path').nodeValue)
self.assertEqual(op.join('basepath','bar bleh'),d2.getAttributeNode('path').nodeValue)
self.assertEqual(op.join('basepath','foo bleh'),d3.getAttributeNode('path').nodeValue)
self.assertEqual('y',d1.getAttributeNode('is_ref').nodeValue)
self.assertEqual('n',d2.getAttributeNode('is_ref').nodeValue)
self.assertEqual('n',d3.getAttributeNode('is_ref').nodeValue)
self.assertEqual('foo,bar',d1.getAttributeNode('words').nodeValue)
self.assertEqual('bar,bleh',d2.getAttributeNode('words').nodeValue)
self.assertEqual('foo,bleh',d3.getAttributeNode('words').nodeValue)
children = [c for c in g2.childNodes if c.localName]
self.assertEqual(3,len(children))
self.assertEqual(2,len([c for c in children if c.nodeName == 'file']))
self.assertEqual(1,len([c for c in children if c.nodeName == 'match']))
d1,d2 = [c for c in children if c.nodeName == 'file']
self.assertEqual(op.join('basepath','ibabtu'),d1.getAttributeNode('path').nodeValue)
self.assertEqual(op.join('basepath','ibabtu'),d2.getAttributeNode('path').nodeValue)
self.assertEqual('n',d1.getAttributeNode('is_ref').nodeValue)
self.assertEqual('n',d2.getAttributeNode('is_ref').nodeValue)
self.assertEqual('ibabtu',d1.getAttributeNode('words').nodeValue)
self.assertEqual('ibabtu',d2.getAttributeNode('words').nodeValue)
doc = etree.parse(f)
root = doc.getroot()
self.assertEqual('results', root.tag)
self.assertEqual(2, len(root))
self.assertEqual(2, len([c for c in root if c.tag == 'group']))
g1, g2 = root
self.assertEqual(6,len(g1))
self.assertEqual(3,len([c for c in g1 if c.tag == 'file']))
self.assertEqual(3,len([c for c in g1 if c.tag == 'match']))
d1, d2, d3 = [c for c in g1 if c.tag == 'file']
self.assertEqual(op.join('basepath','foo bar'),d1.get('path'))
self.assertEqual(op.join('basepath','bar bleh'),d2.get('path'))
self.assertEqual(op.join('basepath','foo bleh'),d3.get('path'))
self.assertEqual('y',d1.get('is_ref'))
self.assertEqual('n',d2.get('is_ref'))
self.assertEqual('n',d3.get('is_ref'))
self.assertEqual('foo,bar',d1.get('words'))
self.assertEqual('bar,bleh',d2.get('words'))
self.assertEqual('foo,bleh',d3.get('words'))
self.assertEqual(3,len(g2))
self.assertEqual(2,len([c for c in g2 if c.tag == 'file']))
self.assertEqual(1,len([c for c in g2 if c.tag == 'match']))
d1, d2 = [c for c in g2 if c.tag == 'file']
self.assertEqual(op.join('basepath','ibabtu'),d1.get('path'))
self.assertEqual(op.join('basepath','ibabtu'),d2.get('path'))
self.assertEqual('n',d1.get('is_ref'))
self.assertEqual('n',d2.get('is_ref'))
self.assertEqual('ibabtu',d1.get('words'))
self.assertEqual('ibabtu',d2.get('words'))
def test_LoadXML(self):
def get_file(path):
@ -460,41 +456,41 @@ class TCResultsXML(TestCase):
def get_file(path):
return [f for f in self.objects if str(f.path) == path][0]
doc = xml.dom.minidom.Document()
root = doc.appendChild(doc.createElement('foobar')) #The root element shouldn't matter, really.
group_node = root.appendChild(doc.createElement('group'))
dupe_node = group_node.appendChild(doc.createElement('file')) #Perfectly correct file
dupe_node.setAttribute('path',op.join('basepath','foo bar'))
dupe_node.setAttribute('is_ref','y')
dupe_node.setAttribute('words','foo,bar')
dupe_node = group_node.appendChild(doc.createElement('file')) #is_ref missing, default to 'n'
dupe_node.setAttribute('path',op.join('basepath','foo bleh'))
dupe_node.setAttribute('words','foo,bleh')
dupe_node = group_node.appendChild(doc.createElement('file')) #words are missing, invalid.
dupe_node.setAttribute('path',op.join('basepath','bar bleh'))
dupe_node = group_node.appendChild(doc.createElement('file')) #path is missing, invalid.
dupe_node.setAttribute('words','foo,bleh')
dupe_node = group_node.appendChild(doc.createElement('foobar')) #Invalid element name
dupe_node.setAttribute('path',op.join('basepath','bar bleh'))
dupe_node.setAttribute('is_ref','y')
dupe_node.setAttribute('words','bar,bleh')
match_node = group_node.appendChild(doc.createElement('match')) # match pointing to a bad index
match_node.setAttribute('first', '42')
match_node.setAttribute('second', '45')
match_node = group_node.appendChild(doc.createElement('match')) # match with missing attrs
match_node = group_node.appendChild(doc.createElement('match')) # match with non-int values
match_node.setAttribute('first', 'foo')
match_node.setAttribute('second', 'bar')
match_node.setAttribute('percentage', 'baz')
group_node = root.appendChild(doc.createElement('foobar')) #invalid group
group_node = root.appendChild(doc.createElement('group')) #empty group
root = etree.Element('foobar') #The root element shouldn't matter, really.
group_node = etree.SubElement(root, 'group')
dupe_node = etree.SubElement(group_node, 'file') #Perfectly correct file
dupe_node.set('path', op.join('basepath','foo bar'))
dupe_node.set('is_ref', 'y')
dupe_node.set('words', 'foo,bar')
dupe_node = etree.SubElement(group_node, 'file') #is_ref missing, default to 'n'
dupe_node.set('path',op.join('basepath','foo bleh'))
dupe_node.set('words','foo,bleh')
dupe_node = etree.SubElement(group_node, 'file') #words are missing, valid.
dupe_node.set('path',op.join('basepath','bar bleh'))
dupe_node = etree.SubElement(group_node, 'file') #path is missing, invalid.
dupe_node.set('words','foo,bleh')
dupe_node = etree.SubElement(group_node, 'foobar') #Invalid element name
dupe_node.set('path',op.join('basepath','bar bleh'))
dupe_node.set('is_ref','y')
dupe_node.set('words','bar,bleh')
match_node = etree.SubElement(group_node, 'match') # match pointing to a bad index
match_node.set('first', '42')
match_node.set('second', '45')
match_node = etree.SubElement(group_node, 'match') # match with missing attrs
match_node = etree.SubElement(group_node, 'match') # match with non-int values
match_node.set('first', 'foo')
match_node.set('second', 'bar')
match_node.set('percentage', 'baz')
group_node = etree.SubElement(root, 'foobar') #invalid group
group_node = etree.SubElement(root, 'group') #empty group
f = StringIO.StringIO()
doc.writexml(f,'\t','\t','\n',encoding='utf-8')
tree = etree.ElementTree(root)
tree.write(f, encoding='utf-8')
f.seek(0)
r = Results(data)
r.load_from_xml(f,get_file)
r.load_from_xml(f, get_file)
self.assertEqual(1,len(r.groups))
self.assertEqual(2,len(r.groups[0]))
self.assertEqual(3,len(r.groups[0]))
def test_xml_non_ascii(self):
def get_file(path):