def processURI()

in assets/lambda_helper_neptune/python/rdflib/plugins/parsers/pyRdfa/__init__.py [0:0]
150 lines of code
27 McCabe index (conditional complexity)

def processURI(uri, outputFormat, form={}) :
	"""The standard processing of an RDFa uri options in a form; used as an entry point from a CGI call.

	The call accepts extra form options (i.e., HTTP GET options) as follows:

	 - C{graph=[output|processor|output,processor|processor,output]} specifying which graphs are returned. Default: C{output}
	 - C{space_preserve=[true|false]} means that plain literals are normalized in terms of white spaces. Default: C{false}
	 - C{rfa_version} provides the RDFa version that should be used for distilling. The string should be of the form "1.0" or "1.1". Default is the highest version the current package implements, currently "1.1"
	 - C{host_language=[xhtml,html,xml]} : the host language. Used when files are uploaded or text is added verbatim, otherwise the HTTP return header should be used. Default C{xml}
	 - C{embedded_rdf=[true|false]} : whether embedded turtle or RDF/XML content should be added to the output graph. Default: C{false}
	 - C{vocab_expansion=[true|false]} : whether the vocabularies should be expanded through the restricted RDFS entailment. Default: C{false}
	 - C{vocab_cache=[true|false]} : whether vocab caching should be performed or whether it should be ignored and vocabulary files should be picked up every time. Default: C{false}
	 - C{vocab_cache_report=[true|false]} : whether vocab caching details should be reported. Default: C{false}
	 - C{vocab_cache_bypass=[true|false]} : whether vocab caches have to be regenerated every time. Default: C{false}
	 - C{rdfa_lite=[true|false]} : whether warnings should be generated for non RDFa Lite attribute usage. Default: C{false}

	@param uri: URI to access. Note that the C{text:} and C{uploaded:} fake URI values are treated separately; the former is for textual intput (in which case a StringIO is used to get the data) and the latter is for uploaded file, where the form gives access to the file directly.
	@param outputFormat: serialization format, as defined by the package. Currently "xml", "turtle", "nt", or "json". Default is "turtle", also used if any other string is given.
	@param form: extra call options (from the CGI call) to set up the local options
	@type form: cgi FieldStorage instance
	@return: serialized graph
	@rtype: string
	"""
	def _get_option(param, compare_value, default) :
		param_old = param.replace('_','-')
		if param in list(form.keys()) :
			val = form.getfirst(param).lower()
			return val == compare_value
		elif param_old in list(form.keys()) :
			# this is to ensure the old style parameters are still valid...
			# in the old days I used '-' in the parameters, the standard favours '_'
			val = form.getfirst(param_old).lower()
			return val == compare_value
		else :
			return default

	if uri == "uploaded:" :
		input	= form["uploaded"].file
		base	= ""
	elif uri == "text:" :
		input	= StringIO(form.getfirst("text"))
		base	= ""
	else :
		input	= uri
		base	= uri

	if "rdfa_version" in list(form.keys()) :
		rdfa_version = form.getfirst("rdfa_version")
	else :
		rdfa_version = None

	# working through the possible options
	# Host language: HTML, XHTML, or XML
	# Note that these options should be used for the upload and inline version only in case of a form
	# for real uris the returned content type should be used
	if "host_language" in list(form.keys()) :
		if form.getfirst("host_language").lower() == "xhtml" :
			media_type = MediaTypes.xhtml
		elif form.getfirst("host_language").lower() == "html" :
			media_type = MediaTypes.html
		elif form.getfirst("host_language").lower() == "svg" :
			media_type = MediaTypes.svg
		elif form.getfirst("host_language").lower() == "atom" :
			media_type = MediaTypes.atom
		else :
			media_type = MediaTypes.xml
	else :
		media_type = ""

	transformers = []

	check_lite = "rdfa_lite" in list(form.keys()) and form.getfirst("rdfa_lite").lower() == "true"

	# The code below is left for backward compatibility only. In fact, these options are not exposed any more,
	# they are not really in use
	if "extras" in list(form.keys()) and form.getfirst("extras").lower() == "true" :
		from .transform.metaname              	import meta_transform
		from .transform.OpenID                	import OpenID_transform
		from .transform.DublinCore            	import DC_transform
		for t in [OpenID_transform, DC_transform, meta_transform] :
			transformers.append(t)
	else :
		if "extra-meta" in list(form.keys()) and form.getfirst("extra-meta").lower() == "true" :
			from .transform.metaname import meta_transform
			transformers.append(meta_transform)
		if "extra-openid" in list(form.keys()) and form.getfirst("extra-openid").lower() == "true" :
			from .transform.OpenID import OpenID_transform
			transformers.append(OpenID_transform)
		if "extra-dc" in list(form.keys()) and form.getfirst("extra-dc").lower() == "true" :
			from .transform.DublinCore import DC_transform
			transformers.append(DC_transform)

	output_default_graph 	= True
	output_processor_graph 	= False
	# Note that I use the 'graph' and the 'rdfagraph' form keys here. Reason is that
	# I used 'graph' in the previous versions, including the RDFa 1.0 processor,
	# so if I removed that altogether that would create backward incompatibilities
	# On the other hand, the RDFa 1.1 doc clearly refers to 'rdfagraph' as the standard
	# key.
	a = None
	if "graph" in list(form.keys()) :
		a = form.getfirst("graph").lower()
	elif "rdfagraph" in list(form.keys()) :
		a = form.getfirst("rdfagraph").lower()
	if a != None :
		if a == "processor" :
			output_default_graph 	= False
			output_processor_graph 	= True
		elif a == "processor,output" or a == "output,processor" :
			output_processor_graph 	= True

	embedded_rdf        = _get_option( "embedded_rdf", "true", False)
	space_preserve      = _get_option( "space_preserve", "true", True)
	vocab_cache         = _get_option( "vocab_cache", "true", True)
	vocab_cache_report  = _get_option( "vocab_cache_report", "true", False)
	refresh_vocab_cache = _get_option( "vocab_cache_refresh", "true", False)
	vocab_expansion     = _get_option( "vocab_expansion", "true", False)
	if vocab_cache_report : output_processor_graph = True

	options = Options(output_default_graph   = output_default_graph,
					  output_processor_graph = output_processor_graph,
					  space_preserve         = space_preserve,
					  transformers           = transformers,
					  vocab_cache            = vocab_cache,
					  vocab_cache_report     = vocab_cache_report,
					  refresh_vocab_cache    = refresh_vocab_cache,
					  vocab_expansion        = vocab_expansion,
					  embedded_rdf           = embedded_rdf,
					  check_lite             = check_lite
					  )
	processor = pyRdfa(options = options, base = base, media_type = media_type, rdfa_version = rdfa_version)

	# Decide the output format; the issue is what should happen in case of a top level error like an inaccessibility of
	# the html source: should a graph be returned or an HTML page with an error message?

	# decide whether HTML or RDF should be sent.
	htmlOutput = False
	#if 'HTTP_ACCEPT' in os.environ :
	#	acc = os.environ['HTTP_ACCEPT']
	#	possibilities = ['text/html',
	#					 'application/rdf+xml',
	#					 'text/turtle; charset=utf-8',
	#					 'application/json',
	#					 'application/ld+json',
	#					 'text/rdf+n3']
	#
	#	# this nice module does content negotiation and returns the preferred format
	#	sg = acceptable_content_type(acc, possibilities)
	#	htmlOutput = (sg != None and sg[0] == content_type('text/html'))
	#	os.environ['rdfaerror'] = 'true'

	# This is really for testing purposes only, it is an unpublished flag to force RDF output no
	# matter what
	try :
		graph = processor.rdf_from_source(input, outputFormat, rdfOutput = ("forceRDFOutput" in list(form.keys())) or not htmlOutput)
		if outputFormat == "n3" :
			retval = 'Content-Type: text/rdf+n3; charset=utf-8\n'
		elif outputFormat == "nt" or outputFormat == "turtle" :
			retval = 'Content-Type: text/turtle; charset=utf-8\n'
		elif outputFormat == "json-ld" or outputFormat == "json" :
			retval = 'Content-Type: application/ld+json; charset=utf-8\n'
		else :
			retval = 'Content-Type: application/rdf+xml; charset=utf-8\n'
		retval += '\n'
		retval += graph
		return retval
	except HTTPError :
		(type,h,traceback) = sys.exc_info()
		import cgi

		retval = 'Content-type: text/html; charset=utf-8\nStatus: %s \n\n' % h.http_code
		retval += "<html>\n"
		retval += "<head>\n"
		retval += "<title>HTTP Error in distilling RDFa content</title>\n"
		retval += "</head><body>\n"
		retval += "<h1>HTTP Error in distilling RDFa content</h1>\n"
		retval += "<p>HTTP Error: %s (%s)</p>\n" % (h.http_code,h.msg)
		retval += "<p>On URI: <code>'%s'</code></p>\n" % cgi.escape(uri)
		retval +="</body>\n"
		retval +="</html>\n"
		return retval
	except :
		# This branch should occur only if an exception is really raised, ie, if it is not turned
		# into a graph value.
		(type,value,traceback) = sys.exc_info()

		import traceback, cgi

		retval = 'Content-type: text/html; charset=utf-8\nStatus: %s\n\n' % processor.http_status
		retval += "<html>\n"
		retval += "<head>\n"
		retval += "<title>Exception in RDFa processing</title>\n"
		retval += "</head><body>\n"
		retval += "<h1>Exception in distilling RDFa</h1>\n"
		retval += "<pre>\n"
		strio  = StringIO()
		traceback.print_exc(file=strio)
		retval += strio.getvalue()
		retval +="</pre>\n"
		retval +="<pre>%s</pre>\n" % value
		retval +="<h1>Distiller request details</h1>\n"
		retval +="<dl>\n"
		if uri == "text:" and "text" in form and form["text"].value != None and len(form["text"].value.strip()) != 0 :
			retval +="<dt>Text input:</dt><dd>%s</dd>\n" % cgi.escape(form["text"].value).replace('\n','<br/>')
		elif uri == "uploaded:" :
			retval +="<dt>Uploaded file</dt>\n"
		else :
			retval +="<dt>URI received:</dt><dd><code>'%s'</code></dd>\n" % cgi.escape(uri)
		if "host_language" in list(form.keys()) :
			retval +="<dt>Media Type:</dt><dd>%s</dd>\n" % media_type
		if "graph" in list(form.keys()) :
			retval +="<dt>Requested graphs:</dt><dd>%s</dd>\n" % form.getfirst("graph").lower()
		else :
			retval +="<dt>Requested graphs:</dt><dd>default</dd>\n"
		retval +="<dt>Output serialization format:</dt><dd> %s</dd>\n" % outputFormat
		if "space_preserve" in form : retval +="<dt>Space preserve:</dt><dd> %s</dd>\n" % form["space_preserve"].value
		retval +="</dl>\n"
		retval +="</body>\n"
		retval +="</html>\n"
		return retval