tools/PrettyXML.py (77 lines of code) (raw):
import glob
import inspect
import logging
from xml.etree import ElementTree as ET
import argparse
import sys
MIN_PYTHON = (3, 7)
if sys.version_info < MIN_PYTHON:
sys.exit("Python %s.%s or later is required.\n" % MIN_PYTHON)
def empty(s: str) -> bool: return not s or not s.strip()
def prettyPrint(elem: ET.Element, newLine: str = '\n', sort: str = None, singleIndent: str = ' ', level: int = 0):
"""
Sorts and indents the provided ElementTree
:param elem: the ElementTree to beautify
:param newLine: the string to create a new line (this is NOT a LF or CRLF, it is a platform-agnostic new line symbol)
:param sort: the name of the attribute which is used to sort sibling XML elements
:param singleIndent: the string of whitespaces used to create indentation
:param level: the current depth of recursive prettyPrint() call
"""
# Based on the code snipped published at http://effbot.org/zone/element-lib.htm#prettyprint
i = newLine + level * singleIndent
# since Python 3.7, dict preserves the order of the inserted elements yet it won't allow to reorder them
# since ElementTree uses a dict for element attributes' storage
# so we are going to empty that dict and then repopulate it in desired order:
# first is the 'sort' attribute then the rest in alphabet order
if sort and sort in elem.attrib.keys():
sortValue = elem.attrib.pop(sort) # first extract the sort order defining attribute
tmp = {sort: sortValue} # and add it as the first element
for key in sorted(iter(elem.attrib)): # then copy the remaining attributes in alphabetic order
tmp[key] = elem.attrib[key]
elem.attrib.clear() # now clear...
for key in iter(tmp): # ... and repopulate the dict of attributes
elem.attrib[key] = tmp[key] # in the desired order
if len(elem) == 0:
# if has no child elements
if level > 0 and empty(elem.tail):
elem.tail = i
else:
# if has child elements
# sort top level elements by tag name in reverse order
# sort all other levels' elements by the specified attribute's value
if level == 0:
if sort:
# sort by tag name
# for XML comment nodes child.tag is a function, not a string
# chr(0x10FFFF) is the last possible char
elem[:] = sorted(elem, key=lambda child:
child.tag if child.tag and not inspect.isfunction(child.tag)
else chr(0x10FFFF), reverse=True)
else:
if sort:
# sort by specified attribute's values
elem[:] = sorted(elem, key=lambda child: child.get(sort) if child.get(sort) else '')
if empty(elem.text):
elem.text = i + singleIndent
if empty(elem.tail):
elem.tail = i
# recursively pretty print each child element
lastChild = None
for child in elem:
prettyPrint(child, newLine, sort, singleIndent, level + 1)
lastChild = child
if empty(lastChild.tail):
lastChild.tail = i
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=
"Sorts and beautifies an XML file which defines an IDEA editor color scheme. "
"Top-level XML elements are ordered in the reverse alphabetical order. "
"All other XML elements are sorted in the right alphabetical order. "
"Sibling XML elements are sorted by the value of their \"name=\" attribute. "
"For each XML element its attributes are listed in alphabetical order, except "
"the \"name=\" attribute which is listed first.")
parser.add_argument('input', type=str, nargs='+', help='XML files to beautify; file name masks are accepted')
parser.add_argument('-o', '--output', type=str, help='resulting XML file if only a single input file is specified; '
'if not specified, or if there are many input files, '
'the input files are overwritten')
args = parser.parse_args()
numberOfFiles = 0
for path in args.input:
logging.info("Handling " + path)
# each path can be a single file or a mask
for file in glob.iglob(path):
logging.info(" opening " + file)
numberOfFiles += 1
try:
tree = ET.parse(file)
root = tree.getroot()
root.insert(0, ET.Comment(" This document is auto-generated, do not edit manually."))
prettyPrint(root, sort='name')
result = ET.tostring(root, encoding='unicode')
if args.output and numberOfFiles == 1:
logging.info(" writing to " + args.output)
f = open(args.output, "w", newline="\n") # Scheme XML files should use Unix LF as line separators
else:
logging.info(" writing to " + file)
f = open(file, "w", newline="\n") # Scheme XML files should use Unix LF as line separators
f.write(result)
f.close()
except ET.ParseError:
logging.warning(file + " is not an XML file, skipping")