From 47a6ceffbc7235288840e5a7a3e32a4cc9c8ad12 Mon Sep 17 00:00:00 2001
From: Virgil Dupras <hsoft@hardcoded.net>
Date: Mon, 1 Mar 2010 12:21:43 +0100
Subject: [PATCH] Use lxml everywhere for xml save/load (instead of ElementTree
 and minidom).

---
 core/directories.py        |  40 ++++----
 core/ignore.py             |  49 +++++-----
 core/results.py            | 184 +++++++++++++------------------------
 core/tests/app_test.py     |   7 +-
 core/tests/engine_test.py  |   5 +-
 core/tests/ignore_test.py  |  33 ++++---
 core/tests/results_test.py | 154 +++++++++++++++----------------
 7 files changed, 199 insertions(+), 273 deletions(-)

diff --git a/core/directories.py b/core/directories.py
index 1b46c6e9..7fa75dd1 100644
--- a/core/directories.py
+++ b/core/directories.py
@@ -6,7 +6,7 @@
 # which should be included with this package. The terms are also available at 
 # http://www.hardcoded.net/licenses/hs_license
 
-import xml.dom.minidom
+from lxml import etree
 
 from hsutil import io
 from hsutil.files import FileOrPath
@@ -126,38 +126,38 @@ class Directories(object):
     
     def load_from_file(self, infile):
         try:
-            doc = xml.dom.minidom.parse(infile)
+            root = etree.parse(infile).getroot()
         except:
             return
-        root_path_nodes = doc.getElementsByTagName('root_directory')
-        for rdn in root_path_nodes:
-            if not rdn.getAttributeNode('path'):
+        for rdn in root.iterchildren('root_directory'):
+            attrib = rdn.attrib
+            if 'path' not in attrib:
                 continue
-            path = rdn.getAttributeNode('path').nodeValue
+            path = attrib['path']
             try:
                 self.add_path(Path(path))
             except (AlreadyThereError, InvalidPathError):
                 pass
-        state_nodes = doc.getElementsByTagName('state')
-        for sn in state_nodes:
-            if not (sn.getAttributeNode('path') and sn.getAttributeNode('value')):
+        for sn in root.iterchildren('state'):
+            attrib = sn.attrib
+            if not ('path' in attrib and 'value' in attrib):
                 continue
-            path = sn.getAttributeNode('path').nodeValue
-            state = sn.getAttributeNode('value').nodeValue
+            path = attrib['path']
+            state = attrib['value']
             self.set_state(Path(path), int(state))
     
-    def save_to_file(self,outfile):
+    def save_to_file(self, outfile):
         with FileOrPath(outfile, 'wb') as fp:
-            doc = xml.dom.minidom.Document()
-            root = doc.appendChild(doc.createElement('directories'))
+            root = etree.Element('directories')
             for root_path in self:
-                root_path_node = root.appendChild(doc.createElement('root_directory'))
-                root_path_node.setAttribute('path', unicode(root_path).encode('utf-8'))
+                root_path_node = etree.SubElement(root, 'root_directory')
+                root_path_node.set('path', unicode(root_path))
             for path, state in self.states.iteritems():
-                state_node = root.appendChild(doc.createElement('state'))
-                state_node.setAttribute('path', unicode(path).encode('utf-8'))
-                state_node.setAttribute('value', str(state))
-            doc.writexml(fp, '\t', '\t', '\n', encoding='utf-8')
+                state_node = etree.SubElement(root, 'state')
+                state_node.set('path', unicode(path))
+                state_node.set('value', unicode(state))
+            tree = etree.ElementTree(root)
+            tree.write(fp, encoding='utf-8')
     
     def set_state(self, path, state):
         if self.get_state(path) == state:
diff --git a/core/ignore.py b/core/ignore.py
index 45ea8cd7..d51a579b 100644
--- a/core/ignore.py
+++ b/core/ignore.py
@@ -6,9 +6,9 @@
 # which should be included with this package. The terms are also available at 
 # http://www.hardcoded.net/licenses/hs_license
 
-from hsutil.files import FileOrPath
+from lxml import etree
 
-import xml.dom.minidom
+from hsutil.files import FileOrPath
 
 class IgnoreList(object):
     """An ignore list implementation that is iterable, filterable and exportable to XML.
@@ -71,45 +71,38 @@ class IgnoreList(object):
                 self._ignored[first] = matches
         self._count += 1
     
-    def load_from_xml(self,infile):
+    def load_from_xml(self, infile):
         """Loads the ignore list from a XML created with save_to_xml.
         
         infile can be a file object or a filename.
         """
         try:
-            doc = xml.dom.minidom.parse(infile)
+            root = etree.parse(infile).getroot()
         except Exception:
             return
-        file_nodes = doc.getElementsByTagName('file')
-        for fn in file_nodes:
-            if not fn.getAttributeNode('path'):
+        for fn in root.iterchildren('file'):
+            file_path = fn.get('path')
+            if not file_path:
                 continue
-            file_path = fn.getAttributeNode('path').nodeValue
-            subfile_nodes = fn.getElementsByTagName('file')
-            for sfn in subfile_nodes:
-                if not sfn.getAttributeNode('path'):
-                    continue
-                subfile_path = sfn.getAttributeNode('path').nodeValue
-                self.Ignore(file_path,subfile_path)
+            for sfn in fn.iterchildren('file'):
+                subfile_path = sfn.get('path')
+                if subfile_path:
+                    self.Ignore(file_path, subfile_path)
     
-    def save_to_xml(self,outfile):
+    def save_to_xml(self, outfile):
         """Create a XML file that can be used by load_from_xml.
         
         outfile can be a file object or a filename.
         """
-        doc = xml.dom.minidom.Document()
-        root = doc.appendChild(doc.createElement('ignore_list'))
-        for file,subfiles in self._ignored.items():
-            file_node = root.appendChild(doc.createElement('file'))
-            if isinstance(file,unicode):
-                file = file.encode('utf-8')
-            file_node.setAttribute('path',file)
-            for subfile in subfiles:
-                subfile_node = file_node.appendChild(doc.createElement('file'))
-                if isinstance(subfile,unicode):
-                    subfile = subfile.encode('utf-8')
-                subfile_node.setAttribute('path',subfile)
+        root = etree.Element('ignore_list')
+        for filename, subfiles in self._ignored.items():
+            file_node = etree.SubElement(root, 'file')
+            file_node.set('path', filename)
+            for subfilename in subfiles:
+                subfile_node = etree.SubElement(file_node, 'file')
+                subfile_node.set('path', subfilename)
+        tree = etree.ElementTree(root)
         with FileOrPath(outfile, 'wb') as fp:
-            doc.writexml(fp,'\t','\t','\n',encoding='utf-8')
+            tree.write(fp, encoding='utf-8')
     
 
diff --git a/core/results.py b/core/results.py
index ae19ead2..81f97335 100644
--- a/core/results.py
+++ b/core/results.py
@@ -8,16 +8,14 @@
 
 import logging
 import re
-from xml.sax import handler, make_parser, SAXException
-from xml.sax.saxutils import XMLGenerator
-from xml.sax.xmlreader import AttributesImpl
+from lxml import etree
 
 from . import engine
 from hsutil.job import nulljob
 from hsutil.markable import Markable
-from hsutil.misc import flatten, cond, nonone
+from hsutil.misc import flatten, nonone
 from hsutil.str import format_size
-from hsutil.files import open_if_filename
+from hsutil.files import FileOrPath
 
 class Results(Markable):
     #---Override
@@ -168,42 +166,54 @@ class Results(Markable):
     is_markable = _is_markable
     
     def load_from_xml(self, infile, get_file, j=nulljob):
+        def do_match(ref_file, other_files, group):
+            if not other_files:
+                return
+            for other_file in other_files:
+                group.add_match(engine.get_match(ref_file, other_file))
+            do_match(other_files[0], other_files[1:], group)
+        
         self.apply_filter(None)
-        handler = _ResultsHandler(get_file)
         try:
-            parser = make_parser()
-        except Exception as e:
-            # This special handling is to try to figure out the cause of #47
-            # We don't silently return, because we want the user to send error report.
-            logging.exception(e)
-            try:
-                import xml.parsers.expat
-                logging.warning('importing xml.parsers.expat went ok, WTF?')
-            except Exception as e:
-                # This log should give a little more details about the cause of this all
-                logging.exception(e)
-                raise
-            raise
-        parser.setContentHandler(handler)
-        try:
-            infile, must_close = open_if_filename(infile)
-        except IOError:
+            root = etree.parse(infile).getroot()
+        except Exception:
             return
-        BUFSIZE = 1024 * 1024 # 1mb buffer
-        infile.seek(0, 2)
-        j.start_job(infile.tell() // BUFSIZE)
-        infile.seek(0, 0)
-        try:
-            while True:
-                data = infile.read(BUFSIZE)
-                if not data:
-                    break
-                parser.feed(data)
-                j.add_progress()
-        except SAXException:
-            return
-        self.groups = handler.groups
-        for dupe_file in handler.marked:
+        group_elems = list(root.iterchildren('group'))
+        groups = []
+        marked = set()
+        for group_elem in j.iter_with_progress(group_elems, every=100):
+            group = engine.Group()
+            dupes = []
+            for file_elem in group_elem.iterchildren('file'):
+                path = file_elem.get('path')
+                words = file_elem.get('words', '')
+                if not path:
+                    continue
+                file = get_file(path)
+                if file is None:
+                    continue
+                file.words = words.split(',')
+                file.is_ref = file_elem.get('is_ref') == 'y'
+                dupes.append(file)
+                if file_elem.get('marked') == 'y':
+                    marked.add(file)
+            for match_elem in group_elem.iterchildren('match'):
+                try:
+                    attrs = match_elem.attrib
+                    first_file = dupes[int(attrs['first'])]
+                    second_file = dupes[int(attrs['second'])]
+                    percentage = int(attrs['percentage'])
+                    group.add_match(engine.Match(first_file, second_file, percentage))
+                except (IndexError, KeyError, ValueError): # Covers missing attr, non-int values and indexes out of bounds
+                    pass
+            if (not group.matches) and (len(dupes) >= 2):
+                do_match(dupes[0], dupes[1:], group)
+            group.prioritize(lambda x: dupes.index(x))
+            if len(group):
+                groups.append(group)    
+            j.add_progress()
+        self.groups = groups
+        for dupe_file in marked:
             self.mark(dupe_file)
     
     def make_ref(self, dupe):
@@ -256,13 +266,10 @@ class Results(Markable):
     
     def save_to_xml(self, outfile):
         self.apply_filter(None)
-        outfile, must_close = open_if_filename(outfile, 'wb')
-        writer = XMLGenerator(outfile, 'utf-8')
-        writer.startDocument()
-        empty_attrs = AttributesImpl({})
-        writer.startElement('results', empty_attrs)
+        root = etree.Element('results')
+        # writer = XMLGenerator(outfile, 'utf-8')
         for g in self.groups:
-            writer.startElement('group', empty_attrs)
+            group_elem = etree.SubElement(root, 'group')
             dupe2index = {}
             for index, d in enumerate(g):
                 dupe2index[d] = index
@@ -270,27 +277,19 @@ class Results(Markable):
                     words = engine.unpack_fields(d.words)
                 except AttributeError:
                     words = ()
-                attrs = AttributesImpl({
-                    'path': unicode(d.path),
-                    'is_ref': cond(d.is_ref, 'y', 'n'),
-                    'words': ','.join(words),
-                    'marked': cond(self.is_marked(d), 'y', 'n')
-                })
-                writer.startElement('file', attrs)
-                writer.endElement('file')
+                file_elem = etree.SubElement(group_elem, 'file')
+                file_elem.set('path', unicode(d.path))
+                file_elem.set('is_ref', ('y' if d.is_ref else 'n'))
+                file_elem.set('words', ','.join(words))
+                file_elem.set('marked', ('y' if self.is_marked(d) else 'n'))
             for match in g.matches:
-                attrs = AttributesImpl({
-                    'first': str(dupe2index[match.first]),
-                    'second': str(dupe2index[match.second]),
-                    'percentage': str(int(match.percentage)),
-                })
-                writer.startElement('match', attrs)
-                writer.endElement('match')
-            writer.endElement('group')
-        writer.endElement('results')
-        writer.endDocument()
-        if must_close:
-            outfile.close()
+                match_elem = etree.SubElement(group_elem, 'match')
+                match_elem.set('first', unicode(dupe2index[match.first]))
+                match_elem.set('second', unicode(dupe2index[match.second]))
+                match_elem.set('percentage', unicode(int(match.percentage)))
+        tree = etree.ElementTree(root)
+        with FileOrPath(outfile, 'wb') as fp:
+            tree.write(fp, encoding='utf-8')
     
     def sort_dupes(self, key, asc=True, delta=False):
         if not self.__dupes:
@@ -310,60 +309,3 @@ class Results(Markable):
     dupes     = property(__get_dupe_list)
     groups    = property(__get_groups, __set_groups)
     stat_line = property(__get_stat_line)
-
-class _ResultsHandler(handler.ContentHandler):
-    def __init__(self, get_file):
-        self.group = None
-        self.dupes = None
-        self.marked = set()
-        self.groups = []
-        self.get_file = get_file
-    
-    def startElement(self, name, attrs):
-        if name == 'group':
-            self.group = engine.Group()
-            self.dupes = []
-            return
-        if (name == 'file') and (self.group is not None):
-            if not (('path' in attrs) and ('words' in attrs)):
-                return
-            path = attrs['path']
-            file = self.get_file(path)
-            if file is None:
-                return
-            file.words = attrs['words'].split(',')
-            file.is_ref = attrs.get('is_ref') == 'y'
-            self.dupes.append(file)
-            if attrs.get('marked') == 'y':
-                self.marked.add(file)
-        if (name == 'match') and (self.group is not None):
-            try:
-                first_file = self.dupes[int(attrs['first'])]
-                second_file = self.dupes[int(attrs['second'])]
-                percentage = int(attrs['percentage'])
-                self.group.add_match(engine.Match(first_file, second_file, percentage))
-            except (IndexError, KeyError, ValueError): # Covers missing attr, non-int values and indexes out of bounds
-                pass
-    
-    def endElement(self, name):
-        def do_match(ref_file, other_files, group):
-            if not other_files:
-                return
-            for other_file in other_files:
-                group.add_match(engine.get_match(ref_file, other_file))
-            do_match(other_files[0], other_files[1:], group)
-        
-        if name == 'group':
-            group = self.group
-            self.group = None
-            dupes = self.dupes
-            self.dupes = []
-            if group is None:
-                return
-            if len(dupes) < 2:
-                return
-            if not group.matches: # <match> elements not present, do it manually, without %
-                do_match(dupes[0], dupes[1:], group)
-            group.prioritize(lambda x: dupes.index(x))
-            self.groups.append(group)
-    
diff --git a/core/tests/app_test.py b/core/tests/app_test.py
index 800d0d68..70533b83 100644
--- a/core/tests/app_test.py
+++ b/core/tests/app_test.py
@@ -248,7 +248,7 @@ class TCDupeGuruWithResults(TestCase):
         self.rtree.selected_paths = paths
         self.app.remove_selected()
         # The first 2 dupes have been removed. The 3rd one is a ref. it stays there, in first pos.
-        eq_(self.rtree.selected_paths, [[0]]) # no exception
+        eq_(self.rtree.selected_paths, [[0, 0]]) # no exception
     
     def test_selectResultNodePaths(self):
         app = self.app
@@ -366,10 +366,7 @@ class TCDupeGuruWithResults(TestCase):
         app = self.app
         self.rtree.selected_paths = [[0, 0], [1, 0]]
         app.remove_selected()
-        eq_(len(app.results.dupes), 1)
-        app.remove_selected()
-        eq_(len(app.results.dupes), 1)
-        self.rtree.selected_path = [0, 0]
+        eq_(len(app.results.dupes), 1) # the first path is now selected
         app.remove_selected()
         eq_(len(app.results.dupes), 0)
     
diff --git a/core/tests/engine_test.py b/core/tests/engine_test.py
index f51c9b7e..528843e2 100644
--- a/core/tests/engine_test.py
+++ b/core/tests/engine_test.py
@@ -229,10 +229,9 @@ class TCbuild_word_dict(TestCase):
         self.log = []
         s = "foo bar"
         build_word_dict([NamedObject(s, True), NamedObject(s, True), NamedObject(s, True)], j)
+        # We don't have intermediate log because iter_with_progress is called with every > 1
         self.assertEqual(0,self.log[0])
-        self.assertEqual(33,self.log[1])
-        self.assertEqual(66,self.log[2])
-        self.assertEqual(100,self.log[3])
+        self.assertEqual(100,self.log[1])
     
 
 class TCmerge_similar_words(TestCase):
diff --git a/core/tests/ignore_test.py b/core/tests/ignore_test.py
index 604482cd..4054a9cf 100644
--- a/core/tests/ignore_test.py
+++ b/core/tests/ignore_test.py
@@ -7,7 +7,7 @@
 # http://www.hardcoded.net/licenses/hs_license
 
 import cStringIO
-import xml.dom.minidom
+from lxml import etree
 
 from nose.tools import eq_
 
@@ -62,26 +62,25 @@ def test_save_to_xml():
     f = cStringIO.StringIO()
     il.save_to_xml(f)
     f.seek(0)
-    doc = xml.dom.minidom.parse(f)
-    root = doc.documentElement
-    eq_('ignore_list',root.nodeName)
-    children = [c for c in root.childNodes if c.localName]
-    eq_(2,len(children))
-    eq_(2,len([c for c in children if c.nodeName == 'file']))
-    f1,f2 = children
-    subchildren = [c for c in f1.childNodes if c.localName == 'file'] +\
-        [c for c in f2.childNodes if c.localName == 'file']
-    eq_(3,len(subchildren))
+    doc = etree.parse(f)
+    root = doc.getroot()
+    eq_(root.tag, 'ignore_list')
+    eq_(len(root), 2)
+    eq_(len([c for c in root if c.tag == 'file']), 2)
+    f1, f2 = root[:]
+    subchildren = [c for c in f1 if c.tag == 'file'] + [c for c in f2 if c.tag == 'file']
+    eq_(len(subchildren), 3)
 
 def test_SaveThenLoad():
     il = IgnoreList()
-    il.Ignore('foo','bar')
-    il.Ignore('foo','bleh')
-    il.Ignore('bleh','bar')
-    il.Ignore(u'\u00e9','bar')
+    il.Ignore('foo', 'bar')
+    il.Ignore('foo', 'bleh')
+    il.Ignore('bleh', 'bar')
+    il.Ignore(u'\u00e9', 'bar')
     f = cStringIO.StringIO()
     il.save_to_xml(f)
     f.seek(0)
+    f.seek(0)
     il = IgnoreList()
     il.load_from_xml(f)
     eq_(4,len(il))
@@ -129,9 +128,9 @@ def test_filter():
     assert not il.AreIgnored('foo','bar')
     assert il.AreIgnored('bar','baz')
 
-def test_save_with_non_ascii_non_unicode_items():
+def test_save_with_non_ascii_items():
     il = IgnoreList()
-    il.Ignore('\xac','\xbf')
+    il.Ignore(u'\xac', u'\xbf')
     f = cStringIO.StringIO()
     try:
         il.save_to_xml(f)
diff --git a/core/tests/results_test.py b/core/tests/results_test.py
index f7868799..2cdb43bd 100644
--- a/core/tests/results_test.py
+++ b/core/tests/results_test.py
@@ -7,10 +7,9 @@
 # which should be included with this package. The terms are also available at 
 # http://www.hardcoded.net/licenses/hs_license
 
-import unittest
 import StringIO
-import xml.dom.minidom
 import os.path as op
+from lxml import etree
 
 from hsutil.path import Path
 from hsutil.testcase import TestCase
@@ -18,7 +17,7 @@ from hsutil.misc import first
 
 from . import engine_test, data
 from .. import engine
-from ..results import *
+from ..results import Results
 
 class NamedObject(engine_test.NamedObject):
     path = property(lambda x:Path('basepath') + x.name)
@@ -65,9 +64,9 @@ class TCResultsEmpty(TestCase):
         f = StringIO.StringIO()
         self.results.save_to_xml(f)
         f.seek(0)
-        doc = xml.dom.minidom.parse(f)
-        root = doc.documentElement
-        self.assertEqual('results',root.nodeName)
+        doc = etree.parse(f)
+        root = doc.getroot()
+        self.assertEqual('results', root.tag)
     
 
 class TCResultsWithSomeGroups(TestCase):
@@ -321,16 +320,16 @@ class TCResultsMarkings(TestCase):
         f = StringIO.StringIO()
         self.results.save_to_xml(f)
         f.seek(0)
-        doc = xml.dom.minidom.parse(f)
-        root = doc.documentElement
-        g1,g2 = root.getElementsByTagName('group')
-        d1,d2,d3 = g1.getElementsByTagName('file')
-        self.assertEqual('n',d1.getAttributeNode('marked').nodeValue)
-        self.assertEqual('n',d2.getAttributeNode('marked').nodeValue)
-        self.assertEqual('y',d3.getAttributeNode('marked').nodeValue)
-        d1,d2 = g2.getElementsByTagName('file')
-        self.assertEqual('n',d1.getAttributeNode('marked').nodeValue)
-        self.assertEqual('y',d2.getAttributeNode('marked').nodeValue)
+        doc = etree.parse(f)
+        root = doc.getroot()
+        g1, g2 = root.iterchildren('group')
+        d1, d2, d3 = g1.iterchildren('file')
+        self.assertEqual('n', d1.get('marked'))
+        self.assertEqual('n', d2.get('marked'))
+        self.assertEqual('y', d3.get('marked'))
+        d1, d2 = g2.iterchildren('file')
+        self.assertEqual('n', d1.get('marked'))
+        self.assertEqual('y', d2.get('marked'))
     
     def test_LoadXML(self):
         def get_file(path):
@@ -366,38 +365,35 @@ class TCResultsXML(TestCase):
         f = StringIO.StringIO()
         self.results.save_to_xml(f)
         f.seek(0)
-        doc = xml.dom.minidom.parse(f)
-        root = doc.documentElement
-        self.assertEqual('results',root.nodeName)
-        children = [c for c in root.childNodes if c.localName]
-        self.assertEqual(2,len(children))
-        self.assertEqual(2,len([c for c in children if c.nodeName == 'group']))
-        g1,g2 = children
-        children = [c for c in g1.childNodes if c.localName]
-        self.assertEqual(6,len(children))
-        self.assertEqual(3,len([c for c in children if c.nodeName == 'file']))
-        self.assertEqual(3,len([c for c in children if c.nodeName == 'match']))
-        d1,d2,d3 = [c for c in children if c.nodeName == 'file']
-        self.assertEqual(op.join('basepath','foo bar'),d1.getAttributeNode('path').nodeValue)
-        self.assertEqual(op.join('basepath','bar bleh'),d2.getAttributeNode('path').nodeValue)
-        self.assertEqual(op.join('basepath','foo bleh'),d3.getAttributeNode('path').nodeValue)
-        self.assertEqual('y',d1.getAttributeNode('is_ref').nodeValue)
-        self.assertEqual('n',d2.getAttributeNode('is_ref').nodeValue)
-        self.assertEqual('n',d3.getAttributeNode('is_ref').nodeValue)
-        self.assertEqual('foo,bar',d1.getAttributeNode('words').nodeValue)
-        self.assertEqual('bar,bleh',d2.getAttributeNode('words').nodeValue)
-        self.assertEqual('foo,bleh',d3.getAttributeNode('words').nodeValue)
-        children = [c for c in g2.childNodes if c.localName]
-        self.assertEqual(3,len(children))
-        self.assertEqual(2,len([c for c in children if c.nodeName == 'file']))
-        self.assertEqual(1,len([c for c in children if c.nodeName == 'match']))
-        d1,d2 = [c for c in children if c.nodeName == 'file']
-        self.assertEqual(op.join('basepath','ibabtu'),d1.getAttributeNode('path').nodeValue)
-        self.assertEqual(op.join('basepath','ibabtu'),d2.getAttributeNode('path').nodeValue)
-        self.assertEqual('n',d1.getAttributeNode('is_ref').nodeValue)
-        self.assertEqual('n',d2.getAttributeNode('is_ref').nodeValue)
-        self.assertEqual('ibabtu',d1.getAttributeNode('words').nodeValue)
-        self.assertEqual('ibabtu',d2.getAttributeNode('words').nodeValue)
+        doc = etree.parse(f)
+        root = doc.getroot()
+        self.assertEqual('results', root.tag)
+        self.assertEqual(2, len(root))
+        self.assertEqual(2, len([c for c in root if c.tag == 'group']))
+        g1, g2 = root
+        self.assertEqual(6,len(g1))
+        self.assertEqual(3,len([c for c in g1 if c.tag == 'file']))
+        self.assertEqual(3,len([c for c in g1 if c.tag == 'match']))
+        d1, d2, d3 = [c for c in g1 if c.tag == 'file']
+        self.assertEqual(op.join('basepath','foo bar'),d1.get('path'))
+        self.assertEqual(op.join('basepath','bar bleh'),d2.get('path'))
+        self.assertEqual(op.join('basepath','foo bleh'),d3.get('path'))
+        self.assertEqual('y',d1.get('is_ref'))
+        self.assertEqual('n',d2.get('is_ref'))
+        self.assertEqual('n',d3.get('is_ref'))
+        self.assertEqual('foo,bar',d1.get('words'))
+        self.assertEqual('bar,bleh',d2.get('words'))
+        self.assertEqual('foo,bleh',d3.get('words'))
+        self.assertEqual(3,len(g2))
+        self.assertEqual(2,len([c for c in g2 if c.tag == 'file']))
+        self.assertEqual(1,len([c for c in g2 if c.tag == 'match']))
+        d1, d2 = [c for c in g2 if c.tag == 'file']
+        self.assertEqual(op.join('basepath','ibabtu'),d1.get('path'))
+        self.assertEqual(op.join('basepath','ibabtu'),d2.get('path'))
+        self.assertEqual('n',d1.get('is_ref'))
+        self.assertEqual('n',d2.get('is_ref'))
+        self.assertEqual('ibabtu',d1.get('words'))
+        self.assertEqual('ibabtu',d2.get('words'))
     
     def test_LoadXML(self):
         def get_file(path):
@@ -460,41 +456,41 @@ class TCResultsXML(TestCase):
         def get_file(path):
             return [f for f in self.objects if str(f.path) == path][0]
         
-        doc = xml.dom.minidom.Document()
-        root = doc.appendChild(doc.createElement('foobar')) #The root element shouldn't matter, really.
-        group_node = root.appendChild(doc.createElement('group'))
-        dupe_node = group_node.appendChild(doc.createElement('file')) #Perfectly correct file
-        dupe_node.setAttribute('path',op.join('basepath','foo bar'))
-        dupe_node.setAttribute('is_ref','y')
-        dupe_node.setAttribute('words','foo,bar')
-        dupe_node = group_node.appendChild(doc.createElement('file')) #is_ref missing, default to 'n'
-        dupe_node.setAttribute('path',op.join('basepath','foo bleh'))
-        dupe_node.setAttribute('words','foo,bleh')
-        dupe_node = group_node.appendChild(doc.createElement('file')) #words are missing, invalid.
-        dupe_node.setAttribute('path',op.join('basepath','bar bleh'))
-        dupe_node = group_node.appendChild(doc.createElement('file')) #path is missing, invalid.
-        dupe_node.setAttribute('words','foo,bleh')
-        dupe_node = group_node.appendChild(doc.createElement('foobar')) #Invalid element name
-        dupe_node.setAttribute('path',op.join('basepath','bar bleh'))
-        dupe_node.setAttribute('is_ref','y')
-        dupe_node.setAttribute('words','bar,bleh')
-        match_node = group_node.appendChild(doc.createElement('match')) # match pointing to a bad index
-        match_node.setAttribute('first', '42')
-        match_node.setAttribute('second', '45')
-        match_node = group_node.appendChild(doc.createElement('match')) # match with missing attrs
-        match_node = group_node.appendChild(doc.createElement('match')) # match with non-int values
-        match_node.setAttribute('first', 'foo')
-        match_node.setAttribute('second', 'bar')
-        match_node.setAttribute('percentage', 'baz')
-        group_node = root.appendChild(doc.createElement('foobar')) #invalid group
-        group_node = root.appendChild(doc.createElement('group')) #empty group
+        root = etree.Element('foobar') #The root element shouldn't matter, really.
+        group_node = etree.SubElement(root, 'group')
+        dupe_node = etree.SubElement(group_node, 'file') #Perfectly correct file
+        dupe_node.set('path', op.join('basepath','foo bar'))
+        dupe_node.set('is_ref', 'y')
+        dupe_node.set('words', 'foo,bar')
+        dupe_node = etree.SubElement(group_node, 'file') #is_ref missing, default to 'n'
+        dupe_node.set('path',op.join('basepath','foo bleh'))
+        dupe_node.set('words','foo,bleh')
+        dupe_node = etree.SubElement(group_node, 'file') #words are missing, valid.
+        dupe_node.set('path',op.join('basepath','bar bleh'))
+        dupe_node = etree.SubElement(group_node, 'file') #path is missing, invalid.
+        dupe_node.set('words','foo,bleh')
+        dupe_node = etree.SubElement(group_node, 'foobar') #Invalid element name
+        dupe_node.set('path',op.join('basepath','bar bleh'))
+        dupe_node.set('is_ref','y')
+        dupe_node.set('words','bar,bleh')
+        match_node = etree.SubElement(group_node, 'match') # match pointing to a bad index
+        match_node.set('first', '42')
+        match_node.set('second', '45')
+        match_node = etree.SubElement(group_node, 'match') # match with missing attrs
+        match_node = etree.SubElement(group_node, 'match') # match with non-int values
+        match_node.set('first', 'foo')
+        match_node.set('second', 'bar')
+        match_node.set('percentage', 'baz')
+        group_node = etree.SubElement(root, 'foobar') #invalid group
+        group_node = etree.SubElement(root, 'group') #empty group
         f = StringIO.StringIO()
-        doc.writexml(f,'\t','\t','\n',encoding='utf-8')
+        tree = etree.ElementTree(root)
+        tree.write(f, encoding='utf-8')
         f.seek(0)
         r = Results(data)
-        r.load_from_xml(f,get_file)
+        r.load_from_xml(f, get_file)
         self.assertEqual(1,len(r.groups))
-        self.assertEqual(2,len(r.groups[0]))
+        self.assertEqual(3,len(r.groups[0]))
     
     def test_xml_non_ascii(self):
         def get_file(path):