vs-metadata/validate.py (111 lines of code) (raw):

#!/usr/bin/python import xml.etree.cElementTree as ET import sys import re import json node_splitter = re.compile(r'^\{(.*)\}(.*)$') xmlns = "{http://xml.vidispine.com/schema/vidispine}" def split_namespace(string): """ splits out the namespace and node name parts of an xml tag, if present :param string: the tag name to extract :return: a 2-tuple, first element being the tag and second being the namespace if present or None. """ matches = node_splitter.search(string) if matches: return matches.group(2), matches.group(1) else: return string, None def validate_root(root_node): """ checks that we have the expected root node and namespace :param root_node: :return: """ passing = True tagname, tagnamespace = split_namespace(root_node.tag) if tagname != "MetadataFieldGroupDocument": print("Root node was '{0}', not MetadataFieldGroupDocument".format(root_node.tag)) passing = False if tagnamespace != "http://xml.vidispine.com/schema/vidispine": print("XML namespace was incorrect") passing = False return passing def validate_node_single_nonempty(parent_node, node_name): """ checks that the name exists and is not an empty string :param root_node: :return: """ name_nodes = [x for x in parent_node.findall("{0}{1}".format(xmlns,node_name))] if len(name_nodes)==0: print("No <{0}> node under root".format(node_name)) return False elif len(name_nodes)>1: print("Multiple <{0}> nodes under root!".format(node_name)) return False if name_nodes[0].text=="": print("<{0}}> node existed but was empty".format(node_name)) return False return True def extract_extradata(field_node): """ extracts the extradata from the field, if it is present :param field_node: :return: either the extradata text or None """ for data_node in field_node.findall("{0}data".format(xmlns)): key_node = data_node.find("{0}key".format(xmlns)) if key_node is not None and key_node.text=="extradata": value_node = data_node.find("{0}value".format(xmlns)) if value_node is not None: return value_node.text return None def validate_field_block(counter, field_node): """ validates the contents of a single field block :param field_node: :return: """ passing = True if not validate_node_single_nonempty(field_node, "name"): print("Field {0} in the doc has no name".format(counter)) return False field_name = field_node.find("{0}name".format(xmlns)).text prefix = "Field {0} ({1})".format(counter, field_name) if not validate_node_single_nonempty(field_node,"schema"): print("{0} has no schema node".format(prefix)) passing = False else: schema_node = field_node.find("{0}schema".format(xmlns)) if not(schema_node.attrib.get("min")) or not(schema_node.attrib.get("max")) or not(schema_node.attrib.get("name")): print("{0} schema node is missing required arguments".format(prefix)) passing = False if schema_node.attrib.get("name") != field_name: print("{0} schema node name is not the same as field name. Expected {1}, got {2}".format(prefix, field_name, schema_node.attrib.get("name"))) if not validate_node_single_nonempty(field_node,"type"): print("{0} has no type node".format(prefix)) passing = False originNodes = [x for x in field_node.findall("{0}origin".format(xmlns))] if len(originNodes)>0: print("{0} should not have an origin node".format(prefix)) passing = False extradata = extract_extradata(field_node) if extradata: try: extradata_parsed = json.loads(extradata) readonly = extradata_parsed.get("readonly","") if not isinstance(readonly, bool): print("{0} Readonly should be a boolean".format(prefix)) passing = False # if readonly!="true" and readonly!="false": # print("{0} Readonly value was '{1}', not true or false".format(prefix, readonly)) # passing = False values = extradata_parsed.get("values",[]) if not isinstance(values, list): print("{0} values key was a {1}, not a list".format(prefix, str(values.__class__))) passing = False except Exception as e: print("{0} extradata was not valid json: {1}".format(prefix, e)) passing = False return passing else: print("{0} has no extradata present".format(prefix, counter, field_name)) return False def validate_fields(root_node): passing = True i = 0 for field_node in root_node.findall("{0}field".format(xmlns)): i+=1 if not validate_field_block(i, field_node): print("Field block for field {0} is not correct".format(i)) passing = False return passing ###START MAIN passing = True if sys.argv[1]=="": print("Validates the given vidispine xml metadata description. Usage: validate.py {xmlfile}") exit(1) with open(sys.argv[1],"r") as f: content = ET.fromstring(f.read()) if not validate_root(content): passing = False if not validate_node_single_nonempty(content, "name"): passing = False if not validate_fields(content): passing = False if passing: print("{0} passed validation".format(sys.argv[1])) else: print("{0} failed validation, see above".format(sys.argv[1])) exit(2)