From 47a6ceffbc7235288840e5a7a3e32a4cc9c8ad12 Mon Sep 17 00:00:00 2001 From: Virgil Dupras Date: Mon, 1 Mar 2010 12:21:43 +0100 Subject: [PATCH] Use lxml everywhere for xml save/load (instead of ElementTree and minidom). --- core/directories.py | 40 ++++---- core/ignore.py | 49 +++++----- core/results.py | 184 +++++++++++++------------------------ core/tests/app_test.py | 7 +- core/tests/engine_test.py | 5 +- core/tests/ignore_test.py | 33 ++++--- core/tests/results_test.py | 154 +++++++++++++++---------------- 7 files changed, 199 insertions(+), 273 deletions(-) diff --git a/core/directories.py b/core/directories.py index 1b46c6e9..7fa75dd1 100644 --- a/core/directories.py +++ b/core/directories.py @@ -6,7 +6,7 @@ # which should be included with this package. The terms are also available at # http://www.hardcoded.net/licenses/hs_license -import xml.dom.minidom +from lxml import etree from hsutil import io from hsutil.files import FileOrPath @@ -126,38 +126,38 @@ class Directories(object): def load_from_file(self, infile): try: - doc = xml.dom.minidom.parse(infile) + root = etree.parse(infile).getroot() except: return - root_path_nodes = doc.getElementsByTagName('root_directory') - for rdn in root_path_nodes: - if not rdn.getAttributeNode('path'): + for rdn in root.iterchildren('root_directory'): + attrib = rdn.attrib + if 'path' not in attrib: continue - path = rdn.getAttributeNode('path').nodeValue + path = attrib['path'] try: self.add_path(Path(path)) except (AlreadyThereError, InvalidPathError): pass - state_nodes = doc.getElementsByTagName('state') - for sn in state_nodes: - if not (sn.getAttributeNode('path') and sn.getAttributeNode('value')): + for sn in root.iterchildren('state'): + attrib = sn.attrib + if not ('path' in attrib and 'value' in attrib): continue - path = sn.getAttributeNode('path').nodeValue - state = sn.getAttributeNode('value').nodeValue + path = attrib['path'] + state = attrib['value'] self.set_state(Path(path), int(state)) - def save_to_file(self,outfile): + def save_to_file(self, outfile): with FileOrPath(outfile, 'wb') as fp: - doc = xml.dom.minidom.Document() - root = doc.appendChild(doc.createElement('directories')) + root = etree.Element('directories') for root_path in self: - root_path_node = root.appendChild(doc.createElement('root_directory')) - root_path_node.setAttribute('path', unicode(root_path).encode('utf-8')) + root_path_node = etree.SubElement(root, 'root_directory') + root_path_node.set('path', unicode(root_path)) for path, state in self.states.iteritems(): - state_node = root.appendChild(doc.createElement('state')) - state_node.setAttribute('path', unicode(path).encode('utf-8')) - state_node.setAttribute('value', str(state)) - doc.writexml(fp, '\t', '\t', '\n', encoding='utf-8') + state_node = etree.SubElement(root, 'state') + state_node.set('path', unicode(path)) + state_node.set('value', unicode(state)) + tree = etree.ElementTree(root) + tree.write(fp, encoding='utf-8') def set_state(self, path, state): if self.get_state(path) == state: diff --git a/core/ignore.py b/core/ignore.py index 45ea8cd7..d51a579b 100644 --- a/core/ignore.py +++ b/core/ignore.py @@ -6,9 +6,9 @@ # which should be included with this package. The terms are also available at # http://www.hardcoded.net/licenses/hs_license -from hsutil.files import FileOrPath +from lxml import etree -import xml.dom.minidom +from hsutil.files import FileOrPath class IgnoreList(object): """An ignore list implementation that is iterable, filterable and exportable to XML. @@ -71,45 +71,38 @@ class IgnoreList(object): self._ignored[first] = matches self._count += 1 - def load_from_xml(self,infile): + def load_from_xml(self, infile): """Loads the ignore list from a XML created with save_to_xml. infile can be a file object or a filename. """ try: - doc = xml.dom.minidom.parse(infile) + root = etree.parse(infile).getroot() except Exception: return - file_nodes = doc.getElementsByTagName('file') - for fn in file_nodes: - if not fn.getAttributeNode('path'): + for fn in root.iterchildren('file'): + file_path = fn.get('path') + if not file_path: continue - file_path = fn.getAttributeNode('path').nodeValue - subfile_nodes = fn.getElementsByTagName('file') - for sfn in subfile_nodes: - if not sfn.getAttributeNode('path'): - continue - subfile_path = sfn.getAttributeNode('path').nodeValue - self.Ignore(file_path,subfile_path) + for sfn in fn.iterchildren('file'): + subfile_path = sfn.get('path') + if subfile_path: + self.Ignore(file_path, subfile_path) - def save_to_xml(self,outfile): + def save_to_xml(self, outfile): """Create a XML file that can be used by load_from_xml. outfile can be a file object or a filename. """ - doc = xml.dom.minidom.Document() - root = doc.appendChild(doc.createElement('ignore_list')) - for file,subfiles in self._ignored.items(): - file_node = root.appendChild(doc.createElement('file')) - if isinstance(file,unicode): - file = file.encode('utf-8') - file_node.setAttribute('path',file) - for subfile in subfiles: - subfile_node = file_node.appendChild(doc.createElement('file')) - if isinstance(subfile,unicode): - subfile = subfile.encode('utf-8') - subfile_node.setAttribute('path',subfile) + root = etree.Element('ignore_list') + for filename, subfiles in self._ignored.items(): + file_node = etree.SubElement(root, 'file') + file_node.set('path', filename) + for subfilename in subfiles: + subfile_node = etree.SubElement(file_node, 'file') + subfile_node.set('path', subfilename) + tree = etree.ElementTree(root) with FileOrPath(outfile, 'wb') as fp: - doc.writexml(fp,'\t','\t','\n',encoding='utf-8') + tree.write(fp, encoding='utf-8') diff --git a/core/results.py b/core/results.py index ae19ead2..81f97335 100644 --- a/core/results.py +++ b/core/results.py @@ -8,16 +8,14 @@ import logging import re -from xml.sax import handler, make_parser, SAXException -from xml.sax.saxutils import XMLGenerator -from xml.sax.xmlreader import AttributesImpl +from lxml import etree from . import engine from hsutil.job import nulljob from hsutil.markable import Markable -from hsutil.misc import flatten, cond, nonone +from hsutil.misc import flatten, nonone from hsutil.str import format_size -from hsutil.files import open_if_filename +from hsutil.files import FileOrPath class Results(Markable): #---Override @@ -168,42 +166,54 @@ class Results(Markable): is_markable = _is_markable def load_from_xml(self, infile, get_file, j=nulljob): + def do_match(ref_file, other_files, group): + if not other_files: + return + for other_file in other_files: + group.add_match(engine.get_match(ref_file, other_file)) + do_match(other_files[0], other_files[1:], group) + self.apply_filter(None) - handler = _ResultsHandler(get_file) try: - parser = make_parser() - except Exception as e: - # This special handling is to try to figure out the cause of #47 - # We don't silently return, because we want the user to send error report. - logging.exception(e) - try: - import xml.parsers.expat - logging.warning('importing xml.parsers.expat went ok, WTF?') - except Exception as e: - # This log should give a little more details about the cause of this all - logging.exception(e) - raise - raise - parser.setContentHandler(handler) - try: - infile, must_close = open_if_filename(infile) - except IOError: + root = etree.parse(infile).getroot() + except Exception: return - BUFSIZE = 1024 * 1024 # 1mb buffer - infile.seek(0, 2) - j.start_job(infile.tell() // BUFSIZE) - infile.seek(0, 0) - try: - while True: - data = infile.read(BUFSIZE) - if not data: - break - parser.feed(data) - j.add_progress() - except SAXException: - return - self.groups = handler.groups - for dupe_file in handler.marked: + group_elems = list(root.iterchildren('group')) + groups = [] + marked = set() + for group_elem in j.iter_with_progress(group_elems, every=100): + group = engine.Group() + dupes = [] + for file_elem in group_elem.iterchildren('file'): + path = file_elem.get('path') + words = file_elem.get('words', '') + if not path: + continue + file = get_file(path) + if file is None: + continue + file.words = words.split(',') + file.is_ref = file_elem.get('is_ref') == 'y' + dupes.append(file) + if file_elem.get('marked') == 'y': + marked.add(file) + for match_elem in group_elem.iterchildren('match'): + try: + attrs = match_elem.attrib + first_file = dupes[int(attrs['first'])] + second_file = dupes[int(attrs['second'])] + percentage = int(attrs['percentage']) + group.add_match(engine.Match(first_file, second_file, percentage)) + except (IndexError, KeyError, ValueError): # Covers missing attr, non-int values and indexes out of bounds + pass + if (not group.matches) and (len(dupes) >= 2): + do_match(dupes[0], dupes[1:], group) + group.prioritize(lambda x: dupes.index(x)) + if len(group): + groups.append(group) + j.add_progress() + self.groups = groups + for dupe_file in marked: self.mark(dupe_file) def make_ref(self, dupe): @@ -256,13 +266,10 @@ class Results(Markable): def save_to_xml(self, outfile): self.apply_filter(None) - outfile, must_close = open_if_filename(outfile, 'wb') - writer = XMLGenerator(outfile, 'utf-8') - writer.startDocument() - empty_attrs = AttributesImpl({}) - writer.startElement('results', empty_attrs) + root = etree.Element('results') + # writer = XMLGenerator(outfile, 'utf-8') for g in self.groups: - writer.startElement('group', empty_attrs) + group_elem = etree.SubElement(root, 'group') dupe2index = {} for index, d in enumerate(g): dupe2index[d] = index @@ -270,27 +277,19 @@ class Results(Markable): words = engine.unpack_fields(d.words) except AttributeError: words = () - attrs = AttributesImpl({ - 'path': unicode(d.path), - 'is_ref': cond(d.is_ref, 'y', 'n'), - 'words': ','.join(words), - 'marked': cond(self.is_marked(d), 'y', 'n') - }) - writer.startElement('file', attrs) - writer.endElement('file') + file_elem = etree.SubElement(group_elem, 'file') + file_elem.set('path', unicode(d.path)) + file_elem.set('is_ref', ('y' if d.is_ref else 'n')) + file_elem.set('words', ','.join(words)) + file_elem.set('marked', ('y' if self.is_marked(d) else 'n')) for match in g.matches: - attrs = AttributesImpl({ - 'first': str(dupe2index[match.first]), - 'second': str(dupe2index[match.second]), - 'percentage': str(int(match.percentage)), - }) - writer.startElement('match', attrs) - writer.endElement('match') - writer.endElement('group') - writer.endElement('results') - writer.endDocument() - if must_close: - outfile.close() + match_elem = etree.SubElement(group_elem, 'match') + match_elem.set('first', unicode(dupe2index[match.first])) + match_elem.set('second', unicode(dupe2index[match.second])) + match_elem.set('percentage', unicode(int(match.percentage))) + tree = etree.ElementTree(root) + with FileOrPath(outfile, 'wb') as fp: + tree.write(fp, encoding='utf-8') def sort_dupes(self, key, asc=True, delta=False): if not self.__dupes: @@ -310,60 +309,3 @@ class Results(Markable): dupes = property(__get_dupe_list) groups = property(__get_groups, __set_groups) stat_line = property(__get_stat_line) - -class _ResultsHandler(handler.ContentHandler): - def __init__(self, get_file): - self.group = None - self.dupes = None - self.marked = set() - self.groups = [] - self.get_file = get_file - - def startElement(self, name, attrs): - if name == 'group': - self.group = engine.Group() - self.dupes = [] - return - if (name == 'file') and (self.group is not None): - if not (('path' in attrs) and ('words' in attrs)): - return - path = attrs['path'] - file = self.get_file(path) - if file is None: - return - file.words = attrs['words'].split(',') - file.is_ref = attrs.get('is_ref') == 'y' - self.dupes.append(file) - if attrs.get('marked') == 'y': - self.marked.add(file) - if (name == 'match') and (self.group is not None): - try: - first_file = self.dupes[int(attrs['first'])] - second_file = self.dupes[int(attrs['second'])] - percentage = int(attrs['percentage']) - self.group.add_match(engine.Match(first_file, second_file, percentage)) - except (IndexError, KeyError, ValueError): # Covers missing attr, non-int values and indexes out of bounds - pass - - def endElement(self, name): - def do_match(ref_file, other_files, group): - if not other_files: - return - for other_file in other_files: - group.add_match(engine.get_match(ref_file, other_file)) - do_match(other_files[0], other_files[1:], group) - - if name == 'group': - group = self.group - self.group = None - dupes = self.dupes - self.dupes = [] - if group is None: - return - if len(dupes) < 2: - return - if not group.matches: # elements not present, do it manually, without % - do_match(dupes[0], dupes[1:], group) - group.prioritize(lambda x: dupes.index(x)) - self.groups.append(group) - diff --git a/core/tests/app_test.py b/core/tests/app_test.py index 800d0d68..70533b83 100644 --- a/core/tests/app_test.py +++ b/core/tests/app_test.py @@ -248,7 +248,7 @@ class TCDupeGuruWithResults(TestCase): self.rtree.selected_paths = paths self.app.remove_selected() # The first 2 dupes have been removed. The 3rd one is a ref. it stays there, in first pos. - eq_(self.rtree.selected_paths, [[0]]) # no exception + eq_(self.rtree.selected_paths, [[0, 0]]) # no exception def test_selectResultNodePaths(self): app = self.app @@ -366,10 +366,7 @@ class TCDupeGuruWithResults(TestCase): app = self.app self.rtree.selected_paths = [[0, 0], [1, 0]] app.remove_selected() - eq_(len(app.results.dupes), 1) - app.remove_selected() - eq_(len(app.results.dupes), 1) - self.rtree.selected_path = [0, 0] + eq_(len(app.results.dupes), 1) # the first path is now selected app.remove_selected() eq_(len(app.results.dupes), 0) diff --git a/core/tests/engine_test.py b/core/tests/engine_test.py index f51c9b7e..528843e2 100644 --- a/core/tests/engine_test.py +++ b/core/tests/engine_test.py @@ -229,10 +229,9 @@ class TCbuild_word_dict(TestCase): self.log = [] s = "foo bar" build_word_dict([NamedObject(s, True), NamedObject(s, True), NamedObject(s, True)], j) + # We don't have intermediate log because iter_with_progress is called with every > 1 self.assertEqual(0,self.log[0]) - self.assertEqual(33,self.log[1]) - self.assertEqual(66,self.log[2]) - self.assertEqual(100,self.log[3]) + self.assertEqual(100,self.log[1]) class TCmerge_similar_words(TestCase): diff --git a/core/tests/ignore_test.py b/core/tests/ignore_test.py index 604482cd..4054a9cf 100644 --- a/core/tests/ignore_test.py +++ b/core/tests/ignore_test.py @@ -7,7 +7,7 @@ # http://www.hardcoded.net/licenses/hs_license import cStringIO -import xml.dom.minidom +from lxml import etree from nose.tools import eq_ @@ -62,26 +62,25 @@ def test_save_to_xml(): f = cStringIO.StringIO() il.save_to_xml(f) f.seek(0) - doc = xml.dom.minidom.parse(f) - root = doc.documentElement - eq_('ignore_list',root.nodeName) - children = [c for c in root.childNodes if c.localName] - eq_(2,len(children)) - eq_(2,len([c for c in children if c.nodeName == 'file'])) - f1,f2 = children - subchildren = [c for c in f1.childNodes if c.localName == 'file'] +\ - [c for c in f2.childNodes if c.localName == 'file'] - eq_(3,len(subchildren)) + doc = etree.parse(f) + root = doc.getroot() + eq_(root.tag, 'ignore_list') + eq_(len(root), 2) + eq_(len([c for c in root if c.tag == 'file']), 2) + f1, f2 = root[:] + subchildren = [c for c in f1 if c.tag == 'file'] + [c for c in f2 if c.tag == 'file'] + eq_(len(subchildren), 3) def test_SaveThenLoad(): il = IgnoreList() - il.Ignore('foo','bar') - il.Ignore('foo','bleh') - il.Ignore('bleh','bar') - il.Ignore(u'\u00e9','bar') + il.Ignore('foo', 'bar') + il.Ignore('foo', 'bleh') + il.Ignore('bleh', 'bar') + il.Ignore(u'\u00e9', 'bar') f = cStringIO.StringIO() il.save_to_xml(f) f.seek(0) + f.seek(0) il = IgnoreList() il.load_from_xml(f) eq_(4,len(il)) @@ -129,9 +128,9 @@ def test_filter(): assert not il.AreIgnored('foo','bar') assert il.AreIgnored('bar','baz') -def test_save_with_non_ascii_non_unicode_items(): +def test_save_with_non_ascii_items(): il = IgnoreList() - il.Ignore('\xac','\xbf') + il.Ignore(u'\xac', u'\xbf') f = cStringIO.StringIO() try: il.save_to_xml(f) diff --git a/core/tests/results_test.py b/core/tests/results_test.py index f7868799..2cdb43bd 100644 --- a/core/tests/results_test.py +++ b/core/tests/results_test.py @@ -7,10 +7,9 @@ # which should be included with this package. The terms are also available at # http://www.hardcoded.net/licenses/hs_license -import unittest import StringIO -import xml.dom.minidom import os.path as op +from lxml import etree from hsutil.path import Path from hsutil.testcase import TestCase @@ -18,7 +17,7 @@ from hsutil.misc import first from . import engine_test, data from .. import engine -from ..results import * +from ..results import Results class NamedObject(engine_test.NamedObject): path = property(lambda x:Path('basepath') + x.name) @@ -65,9 +64,9 @@ class TCResultsEmpty(TestCase): f = StringIO.StringIO() self.results.save_to_xml(f) f.seek(0) - doc = xml.dom.minidom.parse(f) - root = doc.documentElement - self.assertEqual('results',root.nodeName) + doc = etree.parse(f) + root = doc.getroot() + self.assertEqual('results', root.tag) class TCResultsWithSomeGroups(TestCase): @@ -321,16 +320,16 @@ class TCResultsMarkings(TestCase): f = StringIO.StringIO() self.results.save_to_xml(f) f.seek(0) - doc = xml.dom.minidom.parse(f) - root = doc.documentElement - g1,g2 = root.getElementsByTagName('group') - d1,d2,d3 = g1.getElementsByTagName('file') - self.assertEqual('n',d1.getAttributeNode('marked').nodeValue) - self.assertEqual('n',d2.getAttributeNode('marked').nodeValue) - self.assertEqual('y',d3.getAttributeNode('marked').nodeValue) - d1,d2 = g2.getElementsByTagName('file') - self.assertEqual('n',d1.getAttributeNode('marked').nodeValue) - self.assertEqual('y',d2.getAttributeNode('marked').nodeValue) + doc = etree.parse(f) + root = doc.getroot() + g1, g2 = root.iterchildren('group') + d1, d2, d3 = g1.iterchildren('file') + self.assertEqual('n', d1.get('marked')) + self.assertEqual('n', d2.get('marked')) + self.assertEqual('y', d3.get('marked')) + d1, d2 = g2.iterchildren('file') + self.assertEqual('n', d1.get('marked')) + self.assertEqual('y', d2.get('marked')) def test_LoadXML(self): def get_file(path): @@ -366,38 +365,35 @@ class TCResultsXML(TestCase): f = StringIO.StringIO() self.results.save_to_xml(f) f.seek(0) - doc = xml.dom.minidom.parse(f) - root = doc.documentElement - self.assertEqual('results',root.nodeName) - children = [c for c in root.childNodes if c.localName] - self.assertEqual(2,len(children)) - self.assertEqual(2,len([c for c in children if c.nodeName == 'group'])) - g1,g2 = children - children = [c for c in g1.childNodes if c.localName] - self.assertEqual(6,len(children)) - self.assertEqual(3,len([c for c in children if c.nodeName == 'file'])) - self.assertEqual(3,len([c for c in children if c.nodeName == 'match'])) - d1,d2,d3 = [c for c in children if c.nodeName == 'file'] - self.assertEqual(op.join('basepath','foo bar'),d1.getAttributeNode('path').nodeValue) - self.assertEqual(op.join('basepath','bar bleh'),d2.getAttributeNode('path').nodeValue) - self.assertEqual(op.join('basepath','foo bleh'),d3.getAttributeNode('path').nodeValue) - self.assertEqual('y',d1.getAttributeNode('is_ref').nodeValue) - self.assertEqual('n',d2.getAttributeNode('is_ref').nodeValue) - self.assertEqual('n',d3.getAttributeNode('is_ref').nodeValue) - self.assertEqual('foo,bar',d1.getAttributeNode('words').nodeValue) - self.assertEqual('bar,bleh',d2.getAttributeNode('words').nodeValue) - self.assertEqual('foo,bleh',d3.getAttributeNode('words').nodeValue) - children = [c for c in g2.childNodes if c.localName] - self.assertEqual(3,len(children)) - self.assertEqual(2,len([c for c in children if c.nodeName == 'file'])) - self.assertEqual(1,len([c for c in children if c.nodeName == 'match'])) - d1,d2 = [c for c in children if c.nodeName == 'file'] - self.assertEqual(op.join('basepath','ibabtu'),d1.getAttributeNode('path').nodeValue) - self.assertEqual(op.join('basepath','ibabtu'),d2.getAttributeNode('path').nodeValue) - self.assertEqual('n',d1.getAttributeNode('is_ref').nodeValue) - self.assertEqual('n',d2.getAttributeNode('is_ref').nodeValue) - self.assertEqual('ibabtu',d1.getAttributeNode('words').nodeValue) - self.assertEqual('ibabtu',d2.getAttributeNode('words').nodeValue) + doc = etree.parse(f) + root = doc.getroot() + self.assertEqual('results', root.tag) + self.assertEqual(2, len(root)) + self.assertEqual(2, len([c for c in root if c.tag == 'group'])) + g1, g2 = root + self.assertEqual(6,len(g1)) + self.assertEqual(3,len([c for c in g1 if c.tag == 'file'])) + self.assertEqual(3,len([c for c in g1 if c.tag == 'match'])) + d1, d2, d3 = [c for c in g1 if c.tag == 'file'] + self.assertEqual(op.join('basepath','foo bar'),d1.get('path')) + self.assertEqual(op.join('basepath','bar bleh'),d2.get('path')) + self.assertEqual(op.join('basepath','foo bleh'),d3.get('path')) + self.assertEqual('y',d1.get('is_ref')) + self.assertEqual('n',d2.get('is_ref')) + self.assertEqual('n',d3.get('is_ref')) + self.assertEqual('foo,bar',d1.get('words')) + self.assertEqual('bar,bleh',d2.get('words')) + self.assertEqual('foo,bleh',d3.get('words')) + self.assertEqual(3,len(g2)) + self.assertEqual(2,len([c for c in g2 if c.tag == 'file'])) + self.assertEqual(1,len([c for c in g2 if c.tag == 'match'])) + d1, d2 = [c for c in g2 if c.tag == 'file'] + self.assertEqual(op.join('basepath','ibabtu'),d1.get('path')) + self.assertEqual(op.join('basepath','ibabtu'),d2.get('path')) + self.assertEqual('n',d1.get('is_ref')) + self.assertEqual('n',d2.get('is_ref')) + self.assertEqual('ibabtu',d1.get('words')) + self.assertEqual('ibabtu',d2.get('words')) def test_LoadXML(self): def get_file(path): @@ -460,41 +456,41 @@ class TCResultsXML(TestCase): def get_file(path): return [f for f in self.objects if str(f.path) == path][0] - doc = xml.dom.minidom.Document() - root = doc.appendChild(doc.createElement('foobar')) #The root element shouldn't matter, really. - group_node = root.appendChild(doc.createElement('group')) - dupe_node = group_node.appendChild(doc.createElement('file')) #Perfectly correct file - dupe_node.setAttribute('path',op.join('basepath','foo bar')) - dupe_node.setAttribute('is_ref','y') - dupe_node.setAttribute('words','foo,bar') - dupe_node = group_node.appendChild(doc.createElement('file')) #is_ref missing, default to 'n' - dupe_node.setAttribute('path',op.join('basepath','foo bleh')) - dupe_node.setAttribute('words','foo,bleh') - dupe_node = group_node.appendChild(doc.createElement('file')) #words are missing, invalid. - dupe_node.setAttribute('path',op.join('basepath','bar bleh')) - dupe_node = group_node.appendChild(doc.createElement('file')) #path is missing, invalid. - dupe_node.setAttribute('words','foo,bleh') - dupe_node = group_node.appendChild(doc.createElement('foobar')) #Invalid element name - dupe_node.setAttribute('path',op.join('basepath','bar bleh')) - dupe_node.setAttribute('is_ref','y') - dupe_node.setAttribute('words','bar,bleh') - match_node = group_node.appendChild(doc.createElement('match')) # match pointing to a bad index - match_node.setAttribute('first', '42') - match_node.setAttribute('second', '45') - match_node = group_node.appendChild(doc.createElement('match')) # match with missing attrs - match_node = group_node.appendChild(doc.createElement('match')) # match with non-int values - match_node.setAttribute('first', 'foo') - match_node.setAttribute('second', 'bar') - match_node.setAttribute('percentage', 'baz') - group_node = root.appendChild(doc.createElement('foobar')) #invalid group - group_node = root.appendChild(doc.createElement('group')) #empty group + root = etree.Element('foobar') #The root element shouldn't matter, really. + group_node = etree.SubElement(root, 'group') + dupe_node = etree.SubElement(group_node, 'file') #Perfectly correct file + dupe_node.set('path', op.join('basepath','foo bar')) + dupe_node.set('is_ref', 'y') + dupe_node.set('words', 'foo,bar') + dupe_node = etree.SubElement(group_node, 'file') #is_ref missing, default to 'n' + dupe_node.set('path',op.join('basepath','foo bleh')) + dupe_node.set('words','foo,bleh') + dupe_node = etree.SubElement(group_node, 'file') #words are missing, valid. + dupe_node.set('path',op.join('basepath','bar bleh')) + dupe_node = etree.SubElement(group_node, 'file') #path is missing, invalid. + dupe_node.set('words','foo,bleh') + dupe_node = etree.SubElement(group_node, 'foobar') #Invalid element name + dupe_node.set('path',op.join('basepath','bar bleh')) + dupe_node.set('is_ref','y') + dupe_node.set('words','bar,bleh') + match_node = etree.SubElement(group_node, 'match') # match pointing to a bad index + match_node.set('first', '42') + match_node.set('second', '45') + match_node = etree.SubElement(group_node, 'match') # match with missing attrs + match_node = etree.SubElement(group_node, 'match') # match with non-int values + match_node.set('first', 'foo') + match_node.set('second', 'bar') + match_node.set('percentage', 'baz') + group_node = etree.SubElement(root, 'foobar') #invalid group + group_node = etree.SubElement(root, 'group') #empty group f = StringIO.StringIO() - doc.writexml(f,'\t','\t','\n',encoding='utf-8') + tree = etree.ElementTree(root) + tree.write(f, encoding='utf-8') f.seek(0) r = Results(data) - r.load_from_xml(f,get_file) + r.load_from_xml(f, get_file) self.assertEqual(1,len(r.groups)) - self.assertEqual(2,len(r.groups[0])) + self.assertEqual(3,len(r.groups[0])) def test_xml_non_ascii(self): def get_file(path):