mirror of
https://github.com/arsenetar/dupeguru.git
synced 2025-03-10 05:34:36 +00:00
Use lxml everywhere for xml save/load (instead of ElementTree and minidom).
This commit is contained in:
parent
b17ca66f73
commit
47a6ceffbc
@ -6,7 +6,7 @@
|
|||||||
# which should be included with this package. The terms are also available at
|
# which should be included with this package. The terms are also available at
|
||||||
# http://www.hardcoded.net/licenses/hs_license
|
# http://www.hardcoded.net/licenses/hs_license
|
||||||
|
|
||||||
import xml.dom.minidom
|
from lxml import etree
|
||||||
|
|
||||||
from hsutil import io
|
from hsutil import io
|
||||||
from hsutil.files import FileOrPath
|
from hsutil.files import FileOrPath
|
||||||
@ -126,38 +126,38 @@ class Directories(object):
|
|||||||
|
|
||||||
def load_from_file(self, infile):
|
def load_from_file(self, infile):
|
||||||
try:
|
try:
|
||||||
doc = xml.dom.minidom.parse(infile)
|
root = etree.parse(infile).getroot()
|
||||||
except:
|
except:
|
||||||
return
|
return
|
||||||
root_path_nodes = doc.getElementsByTagName('root_directory')
|
for rdn in root.iterchildren('root_directory'):
|
||||||
for rdn in root_path_nodes:
|
attrib = rdn.attrib
|
||||||
if not rdn.getAttributeNode('path'):
|
if 'path' not in attrib:
|
||||||
continue
|
continue
|
||||||
path = rdn.getAttributeNode('path').nodeValue
|
path = attrib['path']
|
||||||
try:
|
try:
|
||||||
self.add_path(Path(path))
|
self.add_path(Path(path))
|
||||||
except (AlreadyThereError, InvalidPathError):
|
except (AlreadyThereError, InvalidPathError):
|
||||||
pass
|
pass
|
||||||
state_nodes = doc.getElementsByTagName('state')
|
for sn in root.iterchildren('state'):
|
||||||
for sn in state_nodes:
|
attrib = sn.attrib
|
||||||
if not (sn.getAttributeNode('path') and sn.getAttributeNode('value')):
|
if not ('path' in attrib and 'value' in attrib):
|
||||||
continue
|
continue
|
||||||
path = sn.getAttributeNode('path').nodeValue
|
path = attrib['path']
|
||||||
state = sn.getAttributeNode('value').nodeValue
|
state = attrib['value']
|
||||||
self.set_state(Path(path), int(state))
|
self.set_state(Path(path), int(state))
|
||||||
|
|
||||||
def save_to_file(self,outfile):
|
def save_to_file(self, outfile):
|
||||||
with FileOrPath(outfile, 'wb') as fp:
|
with FileOrPath(outfile, 'wb') as fp:
|
||||||
doc = xml.dom.minidom.Document()
|
root = etree.Element('directories')
|
||||||
root = doc.appendChild(doc.createElement('directories'))
|
|
||||||
for root_path in self:
|
for root_path in self:
|
||||||
root_path_node = root.appendChild(doc.createElement('root_directory'))
|
root_path_node = etree.SubElement(root, 'root_directory')
|
||||||
root_path_node.setAttribute('path', unicode(root_path).encode('utf-8'))
|
root_path_node.set('path', unicode(root_path))
|
||||||
for path, state in self.states.iteritems():
|
for path, state in self.states.iteritems():
|
||||||
state_node = root.appendChild(doc.createElement('state'))
|
state_node = etree.SubElement(root, 'state')
|
||||||
state_node.setAttribute('path', unicode(path).encode('utf-8'))
|
state_node.set('path', unicode(path))
|
||||||
state_node.setAttribute('value', str(state))
|
state_node.set('value', unicode(state))
|
||||||
doc.writexml(fp, '\t', '\t', '\n', encoding='utf-8')
|
tree = etree.ElementTree(root)
|
||||||
|
tree.write(fp, encoding='utf-8')
|
||||||
|
|
||||||
def set_state(self, path, state):
|
def set_state(self, path, state):
|
||||||
if self.get_state(path) == state:
|
if self.get_state(path) == state:
|
||||||
|
@ -6,9 +6,9 @@
|
|||||||
# which should be included with this package. The terms are also available at
|
# which should be included with this package. The terms are also available at
|
||||||
# http://www.hardcoded.net/licenses/hs_license
|
# http://www.hardcoded.net/licenses/hs_license
|
||||||
|
|
||||||
from hsutil.files import FileOrPath
|
from lxml import etree
|
||||||
|
|
||||||
import xml.dom.minidom
|
from hsutil.files import FileOrPath
|
||||||
|
|
||||||
class IgnoreList(object):
|
class IgnoreList(object):
|
||||||
"""An ignore list implementation that is iterable, filterable and exportable to XML.
|
"""An ignore list implementation that is iterable, filterable and exportable to XML.
|
||||||
@ -71,45 +71,38 @@ class IgnoreList(object):
|
|||||||
self._ignored[first] = matches
|
self._ignored[first] = matches
|
||||||
self._count += 1
|
self._count += 1
|
||||||
|
|
||||||
def load_from_xml(self,infile):
|
def load_from_xml(self, infile):
|
||||||
"""Loads the ignore list from a XML created with save_to_xml.
|
"""Loads the ignore list from a XML created with save_to_xml.
|
||||||
|
|
||||||
infile can be a file object or a filename.
|
infile can be a file object or a filename.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
doc = xml.dom.minidom.parse(infile)
|
root = etree.parse(infile).getroot()
|
||||||
except Exception:
|
except Exception:
|
||||||
return
|
return
|
||||||
file_nodes = doc.getElementsByTagName('file')
|
for fn in root.iterchildren('file'):
|
||||||
for fn in file_nodes:
|
file_path = fn.get('path')
|
||||||
if not fn.getAttributeNode('path'):
|
if not file_path:
|
||||||
continue
|
continue
|
||||||
file_path = fn.getAttributeNode('path').nodeValue
|
for sfn in fn.iterchildren('file'):
|
||||||
subfile_nodes = fn.getElementsByTagName('file')
|
subfile_path = sfn.get('path')
|
||||||
for sfn in subfile_nodes:
|
if subfile_path:
|
||||||
if not sfn.getAttributeNode('path'):
|
self.Ignore(file_path, subfile_path)
|
||||||
continue
|
|
||||||
subfile_path = sfn.getAttributeNode('path').nodeValue
|
|
||||||
self.Ignore(file_path,subfile_path)
|
|
||||||
|
|
||||||
def save_to_xml(self,outfile):
|
def save_to_xml(self, outfile):
|
||||||
"""Create a XML file that can be used by load_from_xml.
|
"""Create a XML file that can be used by load_from_xml.
|
||||||
|
|
||||||
outfile can be a file object or a filename.
|
outfile can be a file object or a filename.
|
||||||
"""
|
"""
|
||||||
doc = xml.dom.minidom.Document()
|
root = etree.Element('ignore_list')
|
||||||
root = doc.appendChild(doc.createElement('ignore_list'))
|
for filename, subfiles in self._ignored.items():
|
||||||
for file,subfiles in self._ignored.items():
|
file_node = etree.SubElement(root, 'file')
|
||||||
file_node = root.appendChild(doc.createElement('file'))
|
file_node.set('path', filename)
|
||||||
if isinstance(file,unicode):
|
for subfilename in subfiles:
|
||||||
file = file.encode('utf-8')
|
subfile_node = etree.SubElement(file_node, 'file')
|
||||||
file_node.setAttribute('path',file)
|
subfile_node.set('path', subfilename)
|
||||||
for subfile in subfiles:
|
tree = etree.ElementTree(root)
|
||||||
subfile_node = file_node.appendChild(doc.createElement('file'))
|
|
||||||
if isinstance(subfile,unicode):
|
|
||||||
subfile = subfile.encode('utf-8')
|
|
||||||
subfile_node.setAttribute('path',subfile)
|
|
||||||
with FileOrPath(outfile, 'wb') as fp:
|
with FileOrPath(outfile, 'wb') as fp:
|
||||||
doc.writexml(fp,'\t','\t','\n',encoding='utf-8')
|
tree.write(fp, encoding='utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
180
core/results.py
180
core/results.py
@ -8,16 +8,14 @@
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from xml.sax import handler, make_parser, SAXException
|
from lxml import etree
|
||||||
from xml.sax.saxutils import XMLGenerator
|
|
||||||
from xml.sax.xmlreader import AttributesImpl
|
|
||||||
|
|
||||||
from . import engine
|
from . import engine
|
||||||
from hsutil.job import nulljob
|
from hsutil.job import nulljob
|
||||||
from hsutil.markable import Markable
|
from hsutil.markable import Markable
|
||||||
from hsutil.misc import flatten, cond, nonone
|
from hsutil.misc import flatten, nonone
|
||||||
from hsutil.str import format_size
|
from hsutil.str import format_size
|
||||||
from hsutil.files import open_if_filename
|
from hsutil.files import FileOrPath
|
||||||
|
|
||||||
class Results(Markable):
|
class Results(Markable):
|
||||||
#---Override
|
#---Override
|
||||||
@ -168,42 +166,54 @@ class Results(Markable):
|
|||||||
is_markable = _is_markable
|
is_markable = _is_markable
|
||||||
|
|
||||||
def load_from_xml(self, infile, get_file, j=nulljob):
|
def load_from_xml(self, infile, get_file, j=nulljob):
|
||||||
|
def do_match(ref_file, other_files, group):
|
||||||
|
if not other_files:
|
||||||
|
return
|
||||||
|
for other_file in other_files:
|
||||||
|
group.add_match(engine.get_match(ref_file, other_file))
|
||||||
|
do_match(other_files[0], other_files[1:], group)
|
||||||
|
|
||||||
self.apply_filter(None)
|
self.apply_filter(None)
|
||||||
handler = _ResultsHandler(get_file)
|
|
||||||
try:
|
try:
|
||||||
parser = make_parser()
|
root = etree.parse(infile).getroot()
|
||||||
except Exception as e:
|
except Exception:
|
||||||
# This special handling is to try to figure out the cause of #47
|
|
||||||
# We don't silently return, because we want the user to send error report.
|
|
||||||
logging.exception(e)
|
|
||||||
try:
|
|
||||||
import xml.parsers.expat
|
|
||||||
logging.warning('importing xml.parsers.expat went ok, WTF?')
|
|
||||||
except Exception as e:
|
|
||||||
# This log should give a little more details about the cause of this all
|
|
||||||
logging.exception(e)
|
|
||||||
raise
|
|
||||||
raise
|
|
||||||
parser.setContentHandler(handler)
|
|
||||||
try:
|
|
||||||
infile, must_close = open_if_filename(infile)
|
|
||||||
except IOError:
|
|
||||||
return
|
return
|
||||||
BUFSIZE = 1024 * 1024 # 1mb buffer
|
group_elems = list(root.iterchildren('group'))
|
||||||
infile.seek(0, 2)
|
groups = []
|
||||||
j.start_job(infile.tell() // BUFSIZE)
|
marked = set()
|
||||||
infile.seek(0, 0)
|
for group_elem in j.iter_with_progress(group_elems, every=100):
|
||||||
|
group = engine.Group()
|
||||||
|
dupes = []
|
||||||
|
for file_elem in group_elem.iterchildren('file'):
|
||||||
|
path = file_elem.get('path')
|
||||||
|
words = file_elem.get('words', '')
|
||||||
|
if not path:
|
||||||
|
continue
|
||||||
|
file = get_file(path)
|
||||||
|
if file is None:
|
||||||
|
continue
|
||||||
|
file.words = words.split(',')
|
||||||
|
file.is_ref = file_elem.get('is_ref') == 'y'
|
||||||
|
dupes.append(file)
|
||||||
|
if file_elem.get('marked') == 'y':
|
||||||
|
marked.add(file)
|
||||||
|
for match_elem in group_elem.iterchildren('match'):
|
||||||
try:
|
try:
|
||||||
while True:
|
attrs = match_elem.attrib
|
||||||
data = infile.read(BUFSIZE)
|
first_file = dupes[int(attrs['first'])]
|
||||||
if not data:
|
second_file = dupes[int(attrs['second'])]
|
||||||
break
|
percentage = int(attrs['percentage'])
|
||||||
parser.feed(data)
|
group.add_match(engine.Match(first_file, second_file, percentage))
|
||||||
|
except (IndexError, KeyError, ValueError): # Covers missing attr, non-int values and indexes out of bounds
|
||||||
|
pass
|
||||||
|
if (not group.matches) and (len(dupes) >= 2):
|
||||||
|
do_match(dupes[0], dupes[1:], group)
|
||||||
|
group.prioritize(lambda x: dupes.index(x))
|
||||||
|
if len(group):
|
||||||
|
groups.append(group)
|
||||||
j.add_progress()
|
j.add_progress()
|
||||||
except SAXException:
|
self.groups = groups
|
||||||
return
|
for dupe_file in marked:
|
||||||
self.groups = handler.groups
|
|
||||||
for dupe_file in handler.marked:
|
|
||||||
self.mark(dupe_file)
|
self.mark(dupe_file)
|
||||||
|
|
||||||
def make_ref(self, dupe):
|
def make_ref(self, dupe):
|
||||||
@ -256,13 +266,10 @@ class Results(Markable):
|
|||||||
|
|
||||||
def save_to_xml(self, outfile):
|
def save_to_xml(self, outfile):
|
||||||
self.apply_filter(None)
|
self.apply_filter(None)
|
||||||
outfile, must_close = open_if_filename(outfile, 'wb')
|
root = etree.Element('results')
|
||||||
writer = XMLGenerator(outfile, 'utf-8')
|
# writer = XMLGenerator(outfile, 'utf-8')
|
||||||
writer.startDocument()
|
|
||||||
empty_attrs = AttributesImpl({})
|
|
||||||
writer.startElement('results', empty_attrs)
|
|
||||||
for g in self.groups:
|
for g in self.groups:
|
||||||
writer.startElement('group', empty_attrs)
|
group_elem = etree.SubElement(root, 'group')
|
||||||
dupe2index = {}
|
dupe2index = {}
|
||||||
for index, d in enumerate(g):
|
for index, d in enumerate(g):
|
||||||
dupe2index[d] = index
|
dupe2index[d] = index
|
||||||
@ -270,27 +277,19 @@ class Results(Markable):
|
|||||||
words = engine.unpack_fields(d.words)
|
words = engine.unpack_fields(d.words)
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
words = ()
|
words = ()
|
||||||
attrs = AttributesImpl({
|
file_elem = etree.SubElement(group_elem, 'file')
|
||||||
'path': unicode(d.path),
|
file_elem.set('path', unicode(d.path))
|
||||||
'is_ref': cond(d.is_ref, 'y', 'n'),
|
file_elem.set('is_ref', ('y' if d.is_ref else 'n'))
|
||||||
'words': ','.join(words),
|
file_elem.set('words', ','.join(words))
|
||||||
'marked': cond(self.is_marked(d), 'y', 'n')
|
file_elem.set('marked', ('y' if self.is_marked(d) else 'n'))
|
||||||
})
|
|
||||||
writer.startElement('file', attrs)
|
|
||||||
writer.endElement('file')
|
|
||||||
for match in g.matches:
|
for match in g.matches:
|
||||||
attrs = AttributesImpl({
|
match_elem = etree.SubElement(group_elem, 'match')
|
||||||
'first': str(dupe2index[match.first]),
|
match_elem.set('first', unicode(dupe2index[match.first]))
|
||||||
'second': str(dupe2index[match.second]),
|
match_elem.set('second', unicode(dupe2index[match.second]))
|
||||||
'percentage': str(int(match.percentage)),
|
match_elem.set('percentage', unicode(int(match.percentage)))
|
||||||
})
|
tree = etree.ElementTree(root)
|
||||||
writer.startElement('match', attrs)
|
with FileOrPath(outfile, 'wb') as fp:
|
||||||
writer.endElement('match')
|
tree.write(fp, encoding='utf-8')
|
||||||
writer.endElement('group')
|
|
||||||
writer.endElement('results')
|
|
||||||
writer.endDocument()
|
|
||||||
if must_close:
|
|
||||||
outfile.close()
|
|
||||||
|
|
||||||
def sort_dupes(self, key, asc=True, delta=False):
|
def sort_dupes(self, key, asc=True, delta=False):
|
||||||
if not self.__dupes:
|
if not self.__dupes:
|
||||||
@ -310,60 +309,3 @@ class Results(Markable):
|
|||||||
dupes = property(__get_dupe_list)
|
dupes = property(__get_dupe_list)
|
||||||
groups = property(__get_groups, __set_groups)
|
groups = property(__get_groups, __set_groups)
|
||||||
stat_line = property(__get_stat_line)
|
stat_line = property(__get_stat_line)
|
||||||
|
|
||||||
class _ResultsHandler(handler.ContentHandler):
|
|
||||||
def __init__(self, get_file):
|
|
||||||
self.group = None
|
|
||||||
self.dupes = None
|
|
||||||
self.marked = set()
|
|
||||||
self.groups = []
|
|
||||||
self.get_file = get_file
|
|
||||||
|
|
||||||
def startElement(self, name, attrs):
|
|
||||||
if name == 'group':
|
|
||||||
self.group = engine.Group()
|
|
||||||
self.dupes = []
|
|
||||||
return
|
|
||||||
if (name == 'file') and (self.group is not None):
|
|
||||||
if not (('path' in attrs) and ('words' in attrs)):
|
|
||||||
return
|
|
||||||
path = attrs['path']
|
|
||||||
file = self.get_file(path)
|
|
||||||
if file is None:
|
|
||||||
return
|
|
||||||
file.words = attrs['words'].split(',')
|
|
||||||
file.is_ref = attrs.get('is_ref') == 'y'
|
|
||||||
self.dupes.append(file)
|
|
||||||
if attrs.get('marked') == 'y':
|
|
||||||
self.marked.add(file)
|
|
||||||
if (name == 'match') and (self.group is not None):
|
|
||||||
try:
|
|
||||||
first_file = self.dupes[int(attrs['first'])]
|
|
||||||
second_file = self.dupes[int(attrs['second'])]
|
|
||||||
percentage = int(attrs['percentage'])
|
|
||||||
self.group.add_match(engine.Match(first_file, second_file, percentage))
|
|
||||||
except (IndexError, KeyError, ValueError): # Covers missing attr, non-int values and indexes out of bounds
|
|
||||||
pass
|
|
||||||
|
|
||||||
def endElement(self, name):
|
|
||||||
def do_match(ref_file, other_files, group):
|
|
||||||
if not other_files:
|
|
||||||
return
|
|
||||||
for other_file in other_files:
|
|
||||||
group.add_match(engine.get_match(ref_file, other_file))
|
|
||||||
do_match(other_files[0], other_files[1:], group)
|
|
||||||
|
|
||||||
if name == 'group':
|
|
||||||
group = self.group
|
|
||||||
self.group = None
|
|
||||||
dupes = self.dupes
|
|
||||||
self.dupes = []
|
|
||||||
if group is None:
|
|
||||||
return
|
|
||||||
if len(dupes) < 2:
|
|
||||||
return
|
|
||||||
if not group.matches: # <match> elements not present, do it manually, without %
|
|
||||||
do_match(dupes[0], dupes[1:], group)
|
|
||||||
group.prioritize(lambda x: dupes.index(x))
|
|
||||||
self.groups.append(group)
|
|
||||||
|
|
||||||
|
@ -248,7 +248,7 @@ class TCDupeGuruWithResults(TestCase):
|
|||||||
self.rtree.selected_paths = paths
|
self.rtree.selected_paths = paths
|
||||||
self.app.remove_selected()
|
self.app.remove_selected()
|
||||||
# The first 2 dupes have been removed. The 3rd one is a ref. it stays there, in first pos.
|
# The first 2 dupes have been removed. The 3rd one is a ref. it stays there, in first pos.
|
||||||
eq_(self.rtree.selected_paths, [[0]]) # no exception
|
eq_(self.rtree.selected_paths, [[0, 0]]) # no exception
|
||||||
|
|
||||||
def test_selectResultNodePaths(self):
|
def test_selectResultNodePaths(self):
|
||||||
app = self.app
|
app = self.app
|
||||||
@ -366,10 +366,7 @@ class TCDupeGuruWithResults(TestCase):
|
|||||||
app = self.app
|
app = self.app
|
||||||
self.rtree.selected_paths = [[0, 0], [1, 0]]
|
self.rtree.selected_paths = [[0, 0], [1, 0]]
|
||||||
app.remove_selected()
|
app.remove_selected()
|
||||||
eq_(len(app.results.dupes), 1)
|
eq_(len(app.results.dupes), 1) # the first path is now selected
|
||||||
app.remove_selected()
|
|
||||||
eq_(len(app.results.dupes), 1)
|
|
||||||
self.rtree.selected_path = [0, 0]
|
|
||||||
app.remove_selected()
|
app.remove_selected()
|
||||||
eq_(len(app.results.dupes), 0)
|
eq_(len(app.results.dupes), 0)
|
||||||
|
|
||||||
|
@ -229,10 +229,9 @@ class TCbuild_word_dict(TestCase):
|
|||||||
self.log = []
|
self.log = []
|
||||||
s = "foo bar"
|
s = "foo bar"
|
||||||
build_word_dict([NamedObject(s, True), NamedObject(s, True), NamedObject(s, True)], j)
|
build_word_dict([NamedObject(s, True), NamedObject(s, True), NamedObject(s, True)], j)
|
||||||
|
# We don't have intermediate log because iter_with_progress is called with every > 1
|
||||||
self.assertEqual(0,self.log[0])
|
self.assertEqual(0,self.log[0])
|
||||||
self.assertEqual(33,self.log[1])
|
self.assertEqual(100,self.log[1])
|
||||||
self.assertEqual(66,self.log[2])
|
|
||||||
self.assertEqual(100,self.log[3])
|
|
||||||
|
|
||||||
|
|
||||||
class TCmerge_similar_words(TestCase):
|
class TCmerge_similar_words(TestCase):
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
# http://www.hardcoded.net/licenses/hs_license
|
# http://www.hardcoded.net/licenses/hs_license
|
||||||
|
|
||||||
import cStringIO
|
import cStringIO
|
||||||
import xml.dom.minidom
|
from lxml import etree
|
||||||
|
|
||||||
from nose.tools import eq_
|
from nose.tools import eq_
|
||||||
|
|
||||||
@ -62,26 +62,25 @@ def test_save_to_xml():
|
|||||||
f = cStringIO.StringIO()
|
f = cStringIO.StringIO()
|
||||||
il.save_to_xml(f)
|
il.save_to_xml(f)
|
||||||
f.seek(0)
|
f.seek(0)
|
||||||
doc = xml.dom.minidom.parse(f)
|
doc = etree.parse(f)
|
||||||
root = doc.documentElement
|
root = doc.getroot()
|
||||||
eq_('ignore_list',root.nodeName)
|
eq_(root.tag, 'ignore_list')
|
||||||
children = [c for c in root.childNodes if c.localName]
|
eq_(len(root), 2)
|
||||||
eq_(2,len(children))
|
eq_(len([c for c in root if c.tag == 'file']), 2)
|
||||||
eq_(2,len([c for c in children if c.nodeName == 'file']))
|
f1, f2 = root[:]
|
||||||
f1,f2 = children
|
subchildren = [c for c in f1 if c.tag == 'file'] + [c for c in f2 if c.tag == 'file']
|
||||||
subchildren = [c for c in f1.childNodes if c.localName == 'file'] +\
|
eq_(len(subchildren), 3)
|
||||||
[c for c in f2.childNodes if c.localName == 'file']
|
|
||||||
eq_(3,len(subchildren))
|
|
||||||
|
|
||||||
def test_SaveThenLoad():
|
def test_SaveThenLoad():
|
||||||
il = IgnoreList()
|
il = IgnoreList()
|
||||||
il.Ignore('foo','bar')
|
il.Ignore('foo', 'bar')
|
||||||
il.Ignore('foo','bleh')
|
il.Ignore('foo', 'bleh')
|
||||||
il.Ignore('bleh','bar')
|
il.Ignore('bleh', 'bar')
|
||||||
il.Ignore(u'\u00e9','bar')
|
il.Ignore(u'\u00e9', 'bar')
|
||||||
f = cStringIO.StringIO()
|
f = cStringIO.StringIO()
|
||||||
il.save_to_xml(f)
|
il.save_to_xml(f)
|
||||||
f.seek(0)
|
f.seek(0)
|
||||||
|
f.seek(0)
|
||||||
il = IgnoreList()
|
il = IgnoreList()
|
||||||
il.load_from_xml(f)
|
il.load_from_xml(f)
|
||||||
eq_(4,len(il))
|
eq_(4,len(il))
|
||||||
@ -129,9 +128,9 @@ def test_filter():
|
|||||||
assert not il.AreIgnored('foo','bar')
|
assert not il.AreIgnored('foo','bar')
|
||||||
assert il.AreIgnored('bar','baz')
|
assert il.AreIgnored('bar','baz')
|
||||||
|
|
||||||
def test_save_with_non_ascii_non_unicode_items():
|
def test_save_with_non_ascii_items():
|
||||||
il = IgnoreList()
|
il = IgnoreList()
|
||||||
il.Ignore('\xac','\xbf')
|
il.Ignore(u'\xac', u'\xbf')
|
||||||
f = cStringIO.StringIO()
|
f = cStringIO.StringIO()
|
||||||
try:
|
try:
|
||||||
il.save_to_xml(f)
|
il.save_to_xml(f)
|
||||||
|
@ -7,10 +7,9 @@
|
|||||||
# which should be included with this package. The terms are also available at
|
# which should be included with this package. The terms are also available at
|
||||||
# http://www.hardcoded.net/licenses/hs_license
|
# http://www.hardcoded.net/licenses/hs_license
|
||||||
|
|
||||||
import unittest
|
|
||||||
import StringIO
|
import StringIO
|
||||||
import xml.dom.minidom
|
|
||||||
import os.path as op
|
import os.path as op
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
from hsutil.path import Path
|
from hsutil.path import Path
|
||||||
from hsutil.testcase import TestCase
|
from hsutil.testcase import TestCase
|
||||||
@ -18,7 +17,7 @@ from hsutil.misc import first
|
|||||||
|
|
||||||
from . import engine_test, data
|
from . import engine_test, data
|
||||||
from .. import engine
|
from .. import engine
|
||||||
from ..results import *
|
from ..results import Results
|
||||||
|
|
||||||
class NamedObject(engine_test.NamedObject):
|
class NamedObject(engine_test.NamedObject):
|
||||||
path = property(lambda x:Path('basepath') + x.name)
|
path = property(lambda x:Path('basepath') + x.name)
|
||||||
@ -65,9 +64,9 @@ class TCResultsEmpty(TestCase):
|
|||||||
f = StringIO.StringIO()
|
f = StringIO.StringIO()
|
||||||
self.results.save_to_xml(f)
|
self.results.save_to_xml(f)
|
||||||
f.seek(0)
|
f.seek(0)
|
||||||
doc = xml.dom.minidom.parse(f)
|
doc = etree.parse(f)
|
||||||
root = doc.documentElement
|
root = doc.getroot()
|
||||||
self.assertEqual('results',root.nodeName)
|
self.assertEqual('results', root.tag)
|
||||||
|
|
||||||
|
|
||||||
class TCResultsWithSomeGroups(TestCase):
|
class TCResultsWithSomeGroups(TestCase):
|
||||||
@ -321,16 +320,16 @@ class TCResultsMarkings(TestCase):
|
|||||||
f = StringIO.StringIO()
|
f = StringIO.StringIO()
|
||||||
self.results.save_to_xml(f)
|
self.results.save_to_xml(f)
|
||||||
f.seek(0)
|
f.seek(0)
|
||||||
doc = xml.dom.minidom.parse(f)
|
doc = etree.parse(f)
|
||||||
root = doc.documentElement
|
root = doc.getroot()
|
||||||
g1,g2 = root.getElementsByTagName('group')
|
g1, g2 = root.iterchildren('group')
|
||||||
d1,d2,d3 = g1.getElementsByTagName('file')
|
d1, d2, d3 = g1.iterchildren('file')
|
||||||
self.assertEqual('n',d1.getAttributeNode('marked').nodeValue)
|
self.assertEqual('n', d1.get('marked'))
|
||||||
self.assertEqual('n',d2.getAttributeNode('marked').nodeValue)
|
self.assertEqual('n', d2.get('marked'))
|
||||||
self.assertEqual('y',d3.getAttributeNode('marked').nodeValue)
|
self.assertEqual('y', d3.get('marked'))
|
||||||
d1,d2 = g2.getElementsByTagName('file')
|
d1, d2 = g2.iterchildren('file')
|
||||||
self.assertEqual('n',d1.getAttributeNode('marked').nodeValue)
|
self.assertEqual('n', d1.get('marked'))
|
||||||
self.assertEqual('y',d2.getAttributeNode('marked').nodeValue)
|
self.assertEqual('y', d2.get('marked'))
|
||||||
|
|
||||||
def test_LoadXML(self):
|
def test_LoadXML(self):
|
||||||
def get_file(path):
|
def get_file(path):
|
||||||
@ -366,38 +365,35 @@ class TCResultsXML(TestCase):
|
|||||||
f = StringIO.StringIO()
|
f = StringIO.StringIO()
|
||||||
self.results.save_to_xml(f)
|
self.results.save_to_xml(f)
|
||||||
f.seek(0)
|
f.seek(0)
|
||||||
doc = xml.dom.minidom.parse(f)
|
doc = etree.parse(f)
|
||||||
root = doc.documentElement
|
root = doc.getroot()
|
||||||
self.assertEqual('results',root.nodeName)
|
self.assertEqual('results', root.tag)
|
||||||
children = [c for c in root.childNodes if c.localName]
|
self.assertEqual(2, len(root))
|
||||||
self.assertEqual(2,len(children))
|
self.assertEqual(2, len([c for c in root if c.tag == 'group']))
|
||||||
self.assertEqual(2,len([c for c in children if c.nodeName == 'group']))
|
g1, g2 = root
|
||||||
g1,g2 = children
|
self.assertEqual(6,len(g1))
|
||||||
children = [c for c in g1.childNodes if c.localName]
|
self.assertEqual(3,len([c for c in g1 if c.tag == 'file']))
|
||||||
self.assertEqual(6,len(children))
|
self.assertEqual(3,len([c for c in g1 if c.tag == 'match']))
|
||||||
self.assertEqual(3,len([c for c in children if c.nodeName == 'file']))
|
d1, d2, d3 = [c for c in g1 if c.tag == 'file']
|
||||||
self.assertEqual(3,len([c for c in children if c.nodeName == 'match']))
|
self.assertEqual(op.join('basepath','foo bar'),d1.get('path'))
|
||||||
d1,d2,d3 = [c for c in children if c.nodeName == 'file']
|
self.assertEqual(op.join('basepath','bar bleh'),d2.get('path'))
|
||||||
self.assertEqual(op.join('basepath','foo bar'),d1.getAttributeNode('path').nodeValue)
|
self.assertEqual(op.join('basepath','foo bleh'),d3.get('path'))
|
||||||
self.assertEqual(op.join('basepath','bar bleh'),d2.getAttributeNode('path').nodeValue)
|
self.assertEqual('y',d1.get('is_ref'))
|
||||||
self.assertEqual(op.join('basepath','foo bleh'),d3.getAttributeNode('path').nodeValue)
|
self.assertEqual('n',d2.get('is_ref'))
|
||||||
self.assertEqual('y',d1.getAttributeNode('is_ref').nodeValue)
|
self.assertEqual('n',d3.get('is_ref'))
|
||||||
self.assertEqual('n',d2.getAttributeNode('is_ref').nodeValue)
|
self.assertEqual('foo,bar',d1.get('words'))
|
||||||
self.assertEqual('n',d3.getAttributeNode('is_ref').nodeValue)
|
self.assertEqual('bar,bleh',d2.get('words'))
|
||||||
self.assertEqual('foo,bar',d1.getAttributeNode('words').nodeValue)
|
self.assertEqual('foo,bleh',d3.get('words'))
|
||||||
self.assertEqual('bar,bleh',d2.getAttributeNode('words').nodeValue)
|
self.assertEqual(3,len(g2))
|
||||||
self.assertEqual('foo,bleh',d3.getAttributeNode('words').nodeValue)
|
self.assertEqual(2,len([c for c in g2 if c.tag == 'file']))
|
||||||
children = [c for c in g2.childNodes if c.localName]
|
self.assertEqual(1,len([c for c in g2 if c.tag == 'match']))
|
||||||
self.assertEqual(3,len(children))
|
d1, d2 = [c for c in g2 if c.tag == 'file']
|
||||||
self.assertEqual(2,len([c for c in children if c.nodeName == 'file']))
|
self.assertEqual(op.join('basepath','ibabtu'),d1.get('path'))
|
||||||
self.assertEqual(1,len([c for c in children if c.nodeName == 'match']))
|
self.assertEqual(op.join('basepath','ibabtu'),d2.get('path'))
|
||||||
d1,d2 = [c for c in children if c.nodeName == 'file']
|
self.assertEqual('n',d1.get('is_ref'))
|
||||||
self.assertEqual(op.join('basepath','ibabtu'),d1.getAttributeNode('path').nodeValue)
|
self.assertEqual('n',d2.get('is_ref'))
|
||||||
self.assertEqual(op.join('basepath','ibabtu'),d2.getAttributeNode('path').nodeValue)
|
self.assertEqual('ibabtu',d1.get('words'))
|
||||||
self.assertEqual('n',d1.getAttributeNode('is_ref').nodeValue)
|
self.assertEqual('ibabtu',d2.get('words'))
|
||||||
self.assertEqual('n',d2.getAttributeNode('is_ref').nodeValue)
|
|
||||||
self.assertEqual('ibabtu',d1.getAttributeNode('words').nodeValue)
|
|
||||||
self.assertEqual('ibabtu',d2.getAttributeNode('words').nodeValue)
|
|
||||||
|
|
||||||
def test_LoadXML(self):
|
def test_LoadXML(self):
|
||||||
def get_file(path):
|
def get_file(path):
|
||||||
@ -460,41 +456,41 @@ class TCResultsXML(TestCase):
|
|||||||
def get_file(path):
|
def get_file(path):
|
||||||
return [f for f in self.objects if str(f.path) == path][0]
|
return [f for f in self.objects if str(f.path) == path][0]
|
||||||
|
|
||||||
doc = xml.dom.minidom.Document()
|
root = etree.Element('foobar') #The root element shouldn't matter, really.
|
||||||
root = doc.appendChild(doc.createElement('foobar')) #The root element shouldn't matter, really.
|
group_node = etree.SubElement(root, 'group')
|
||||||
group_node = root.appendChild(doc.createElement('group'))
|
dupe_node = etree.SubElement(group_node, 'file') #Perfectly correct file
|
||||||
dupe_node = group_node.appendChild(doc.createElement('file')) #Perfectly correct file
|
dupe_node.set('path', op.join('basepath','foo bar'))
|
||||||
dupe_node.setAttribute('path',op.join('basepath','foo bar'))
|
dupe_node.set('is_ref', 'y')
|
||||||
dupe_node.setAttribute('is_ref','y')
|
dupe_node.set('words', 'foo,bar')
|
||||||
dupe_node.setAttribute('words','foo,bar')
|
dupe_node = etree.SubElement(group_node, 'file') #is_ref missing, default to 'n'
|
||||||
dupe_node = group_node.appendChild(doc.createElement('file')) #is_ref missing, default to 'n'
|
dupe_node.set('path',op.join('basepath','foo bleh'))
|
||||||
dupe_node.setAttribute('path',op.join('basepath','foo bleh'))
|
dupe_node.set('words','foo,bleh')
|
||||||
dupe_node.setAttribute('words','foo,bleh')
|
dupe_node = etree.SubElement(group_node, 'file') #words are missing, valid.
|
||||||
dupe_node = group_node.appendChild(doc.createElement('file')) #words are missing, invalid.
|
dupe_node.set('path',op.join('basepath','bar bleh'))
|
||||||
dupe_node.setAttribute('path',op.join('basepath','bar bleh'))
|
dupe_node = etree.SubElement(group_node, 'file') #path is missing, invalid.
|
||||||
dupe_node = group_node.appendChild(doc.createElement('file')) #path is missing, invalid.
|
dupe_node.set('words','foo,bleh')
|
||||||
dupe_node.setAttribute('words','foo,bleh')
|
dupe_node = etree.SubElement(group_node, 'foobar') #Invalid element name
|
||||||
dupe_node = group_node.appendChild(doc.createElement('foobar')) #Invalid element name
|
dupe_node.set('path',op.join('basepath','bar bleh'))
|
||||||
dupe_node.setAttribute('path',op.join('basepath','bar bleh'))
|
dupe_node.set('is_ref','y')
|
||||||
dupe_node.setAttribute('is_ref','y')
|
dupe_node.set('words','bar,bleh')
|
||||||
dupe_node.setAttribute('words','bar,bleh')
|
match_node = etree.SubElement(group_node, 'match') # match pointing to a bad index
|
||||||
match_node = group_node.appendChild(doc.createElement('match')) # match pointing to a bad index
|
match_node.set('first', '42')
|
||||||
match_node.setAttribute('first', '42')
|
match_node.set('second', '45')
|
||||||
match_node.setAttribute('second', '45')
|
match_node = etree.SubElement(group_node, 'match') # match with missing attrs
|
||||||
match_node = group_node.appendChild(doc.createElement('match')) # match with missing attrs
|
match_node = etree.SubElement(group_node, 'match') # match with non-int values
|
||||||
match_node = group_node.appendChild(doc.createElement('match')) # match with non-int values
|
match_node.set('first', 'foo')
|
||||||
match_node.setAttribute('first', 'foo')
|
match_node.set('second', 'bar')
|
||||||
match_node.setAttribute('second', 'bar')
|
match_node.set('percentage', 'baz')
|
||||||
match_node.setAttribute('percentage', 'baz')
|
group_node = etree.SubElement(root, 'foobar') #invalid group
|
||||||
group_node = root.appendChild(doc.createElement('foobar')) #invalid group
|
group_node = etree.SubElement(root, 'group') #empty group
|
||||||
group_node = root.appendChild(doc.createElement('group')) #empty group
|
|
||||||
f = StringIO.StringIO()
|
f = StringIO.StringIO()
|
||||||
doc.writexml(f,'\t','\t','\n',encoding='utf-8')
|
tree = etree.ElementTree(root)
|
||||||
|
tree.write(f, encoding='utf-8')
|
||||||
f.seek(0)
|
f.seek(0)
|
||||||
r = Results(data)
|
r = Results(data)
|
||||||
r.load_from_xml(f,get_file)
|
r.load_from_xml(f, get_file)
|
||||||
self.assertEqual(1,len(r.groups))
|
self.assertEqual(1,len(r.groups))
|
||||||
self.assertEqual(2,len(r.groups[0]))
|
self.assertEqual(3,len(r.groups[0]))
|
||||||
|
|
||||||
def test_xml_non_ascii(self):
|
def test_xml_non_ascii(self):
|
||||||
def get_file(path):
|
def get_file(path):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user