in src/graph_notebook/magics/graph_magic.py [0:0]
def sparql(self, line='', cell='', local_ns: dict = None):
parser = argparse.ArgumentParser()
parser.add_argument('query_mode', nargs='?', default='query',
help='query mode (default=query) [query|explain]')
parser.add_argument('--path', '-p', default='',
help='prefix path to sparql endpoint. For example, if "foo/bar" were specified, '
'the endpoint called would be host:port/foo/bar')
parser.add_argument('--expand-all', action='store_true')
parser.add_argument('--explain-type', type=str.lower, default='dynamic',
help=f'Explain mode to use when using the explain query mode. '
f'Expected values: {SPARQL_EXPLAIN_MODES}')
parser.add_argument('--explain-format', default='text/html', help='response format for explain query mode',
choices=['text/csv', 'text/html'])
parser.add_argument('-m', '--media-type', type=str, default='',
help='Response format for SELECT/CONSTRUCT/DESCRIBE queries. See '
'https://docs.aws.amazon.com/neptune/latest/userguide/sparql-media-type-support.html '
'for valid RDF media types supported by Neptune for each query type. Default for '
'Neptune and SELECT queries is application/sparql-results+json, otherwise no format '
'will be specified in the request.')
parser.add_argument('-g', '--group-by', type=str, default='',
help='Property used to group nodes.')
parser.add_argument('-gr', '--group-by-raw', action='store_true', default=False,
help="Group nodes by the raw binding")
parser.add_argument('-d', '--display-property', type=str, default='',
help='Property to display the value of on each node.')
parser.add_argument('-de', '--edge-display-property', type=str, default='',
help='Property to display the value of on each edge.')
parser.add_argument('-t', '--tooltip-property', type=str, default='',
help='Property to display the value of on each node tooltip.')
parser.add_argument('-te', '--edge-tooltip-property', type=str, default='',
help='Property to display the value of on each edge tooltip.')
parser.add_argument('-l', '--label-max-length', type=int, default=10,
help='Specifies max length of vertex labels, in characters. Default is 10')
parser.add_argument('-le', '--edge-label-max-length', type=int, default=10,
help='Specifies max length of edge labels, in characters. Default is 10')
parser.add_argument('--store-to', type=str, default='', help='store query result to this variable')
parser.add_argument('--store-format', type=str.lower, default='json',
help=f'Configures export type when using --store-to with base query mode. '
f'Valid inputs: {QUERY_STORE_TO_FORMATS}. Default is JSON')
parser.add_argument('--export-to', type=str, default='',
help='Export the base query mode CSV result to the provided file path.')
parser.add_argument('--ignore-groups', action='store_true', default=False, help="Ignore all grouping options")
parser.add_argument('-sp', '--stop-physics', action='store_true', default=False,
help="Disable visualization physics after the initial simulation stabilizes.")
parser.add_argument('-sd', '--simulation-duration', type=int, default=1500,
help='Specifies maximum duration of visualization physics simulation. Default is 1500ms')
parser.add_argument('--silent', action='store_true', default=False, help="Display no query output.")
parser.add_argument('-ct', '--connected-table', action='store_true', default=False,
help=f'Dynamically load jQuery and DataTables resources for iTables. For more information, see: '
f'https://mwouts.github.io/itables/quick_start.html#offline-mode-versus-connected-mode')
parser.add_argument('-r', '--results-per-page', type=int, default=10,
help='Specifies how many query results to display per page in the output. Default is 10')
parser.add_argument('--no-scroll', action='store_true', default=False,
help="Display the entire output without a scroll bar.")
parser.add_argument('--hide-index', action='store_true', default=False,
help="Hide the index column numbers when displaying the results.")
args = parser.parse_args(line.split())
mode = str_to_query_mode(args.query_mode)
if args.no_scroll:
sparql_layout = UNRESTRICTED_LAYOUT
sparql_scrollY = True
sparql_scrollCollapse = False
sparql_paging = False
else:
sparql_layout = DEFAULT_LAYOUT
sparql_scrollY = "475px"
sparql_scrollCollapse = True
sparql_paging = True
if not args.silent:
tab = widgets.Tab()
titles = []
children = []
first_tab_output = widgets.Output(layout=sparql_layout)
children.append(first_tab_output)
path = args.path if args.path != '' else self.graph_notebook_config.sparql.path
logger.debug(f'using mode={mode}')
results_df = None
if mode == QueryMode.EXPLAIN:
res = self.client.sparql_explain(cell, args.explain_type, args.explain_format, path=path)
res.raise_for_status()
explain_bytes = res.content.replace(b'\xcc', b'-')
explain_bytes = explain_bytes.replace(b'\xb6', b'')
explain = explain_bytes.decode('utf-8')
if not args.silent:
sparql_metadata = build_sparql_metadata_from_query(query_type='explain', res=res)
titles.append('Explain')
explain_bytes = explain.encode('ascii', 'ignore')
base64_str = base64.b64encode(explain_bytes).decode('ascii')
first_tab_html = sparql_explain_template.render(table=explain,
link=f"data:text/html;base64,{base64_str}")
else:
query_type = get_query_type(cell)
result_type = str(args.media_type).lower()
headers = {}
# Different graph DB services support different sets of results formats, some possibly custom, for each
# query type. We will only verify if media types are valid for Neptune
# (https://docs.aws.amazon.com/neptune/latest/userguide/sparql-media-type-support.html). For other
# databases, we will rely on the HTTP query response to tell if there is an issue with the format.
if is_allowed_neptune_host(self.graph_notebook_config.host, NEPTUNE_CONFIG_HOST_IDENTIFIERS):
if (query_type == 'SELECT' and result_type not in NEPTUNE_RDF_SELECT_FORMATS) \
or (query_type == 'ASK' and result_type not in NEPTUNE_RDF_ASK_FORMATS) \
or (query_type in ['CONSTRUCT', 'DESCRIBE']
and result_type not in NEPTUNE_RDF_CONSTRUCT_DESCRIBE_FORMATS) \
or result_type == '':
if result_type != '':
print(f"Invalid media type: {result_type} specified for Neptune {query_type} query. "
f"Defaulting to: {MEDIA_TYPE_SPARQL_JSON}.")
result_type = MEDIA_TYPE_SPARQL_JSON
headers = {'Accept': result_type}
elif result_type == '':
if query_type == 'SELECT':
result_type = MEDIA_TYPE_SPARQL_JSON
headers = {'Accept': MEDIA_TYPE_SPARQL_JSON}
else:
headers = {'Accept': result_type}
query_res = self.client.sparql(cell, path=path, headers=headers)
try:
query_res.raise_for_status()
except HTTPError:
# Catching all 400 response errors here to try and fix possible invalid media type for db in headers.
# Retry query once with RDF spec default media type.
result_type = MEDIA_TYPE_SPARQL_JSON if query_type == 'SELECT' else MEDIA_TYPE_NTRIPLES
query_res = self.client.sparql(cell, path=path, headers={'Accept': result_type})
query_res.raise_for_status()
try:
results = query_res.json()
except Exception:
results = query_res.content.decode('utf-8')
if not args.silent:
# Assign an empty value so we can always display to table output.
# We will only add it as a tab if the type of query allows it.
# Because of this, the table_output will only be displayed on the DOM if the query was of type SELECT.
first_tab_html = ""
query_type = get_query_type(cell)
if result_type != MEDIA_TYPE_SPARQL_JSON:
raw_output = widgets.Output(layout=sparql_layout)
with raw_output:
print(results)
children.append(raw_output)
titles.append('Raw')
else:
if query_type in ['SELECT', 'CONSTRUCT', 'DESCRIBE']:
# TODO: Serialize other result types to SPARQL JSON so we can create table and visualization
logger.debug('creating sparql network...')
titles.append('Table')
sn = SPARQLNetwork(group_by_property=args.group_by,
display_property=args.display_property,
edge_display_property=args.edge_display_property,
tooltip_property=args.tooltip_property,
edge_tooltip_property=args.edge_tooltip_property,
label_max_length=args.label_max_length,
edge_label_max_length=args.edge_label_max_length,
ignore_groups=args.ignore_groups,
expand_all=args.expand_all,
group_by_raw=args.group_by_raw)
sn.extract_prefix_declarations_from_query(cell)
try:
sn.add_results(results)
except ValueError as value_error:
logger.debug(value_error)
logger.debug(f'number of nodes is {len(sn.graph.nodes)}')
if len(sn.graph.nodes) > 0:
self.graph_notebook_vis_options['physics']['disablePhysicsAfterInitialSimulation'] \
= args.stop_physics
self.graph_notebook_vis_options['physics']['simulationDuration'] = args.simulation_duration
f = Force(network=sn, options=self.graph_notebook_vis_options)
titles.append('Graph')
children.append(f)
logger.debug('added sparql network to tabs')
rows_and_columns = sparql_get_rows_and_columns(results)
if rows_and_columns is not None:
results_df = pd.DataFrame(rows_and_columns['rows']).convert_dtypes()
results_df = results_df.astype(str)
results_df = results_df.map(lambda x: encode_html_chars(x))
results_df.insert(0, "#", range(1, len(results_df) + 1))
for col_index, col_name in enumerate(rows_and_columns['columns']):
try:
results_df.rename({results_df.columns[col_index + 1]: col_name},
axis='columns',
inplace=True)
except IndexError:
results_df.insert(col_index + 1, col_name, [])
# Handling CONSTRUCT and DESCRIBE on their own because we want to maintain the previous result
# pattern of showing a tsv with each line being a result binding in addition to new ones.
if query_type == 'CONSTRUCT' or query_type == 'DESCRIBE':
lines = []
for b in results['results']['bindings']:
lines.append(
f'{b["subject"]["value"]}\t{b["predicate"]["value"]}\t{b["object"]["value"]}')
raw_output = widgets.Output(layout=sparql_layout)
with raw_output:
html = sparql_construct_template.render(lines=lines)
display(HTML(html))
children.append(raw_output)
titles.append('Raw')
json_output = widgets.Output(layout=sparql_layout)
with json_output:
print(json.dumps(results, indent=2))
children.append(json_output)
titles.append('JSON')
sparql_metadata = build_sparql_metadata_from_query(query_type='query', res=query_res, results=results)
if not args.silent:
metadata_output = widgets.Output(layout=sparql_layout)
children.append(metadata_output)
titles.append('Query Metadata')
if first_tab_html == "" and results_df is None:
tab.children = children[1:] # the first tab is empty, remove it and proceed
else:
tab.children = children
for i in range(len(titles)):
tab.set_title(i, titles[i])
display(tab)
with metadata_output:
display(HTML(sparql_metadata.to_html()))
if results_df is not None:
with first_tab_output:
visible_results, final_pagination_options, final_pagination_menu = generate_pagination_vars(
args.results_per_page)
sparql_columndefs = [
{"type": "string", "targets": "_all"},
{"width": "5%", "targets": 0},
{"visible": True, "targets": 0},
{"searchable": False, "targets": 0},
{"className": "nowrap dt-left", "targets": "_all"},
{"createdCell": JavascriptFunction(index_col_js), "targets": 0},
{"createdCell": JavascriptFunction(cell_style_js), "targets": "_all"}
]
if args.hide_index:
sparql_columndefs[1]["visible"] = False
init_notebook_mode(connected=args.connected_table)
show(results_df,
connected=args.connected_table,
scrollX=True,
scrollY=sparql_scrollY,
columnDefs=sparql_columndefs,
paging=sparql_paging,
scrollCollapse=sparql_scrollCollapse,
lengthMenu=[final_pagination_options, final_pagination_menu],
pageLength=visible_results,
buttons=[
"pageLength",
{
"extend": "copyHtml5",
"text": "Copy",
"exportOptions": RESULTS_EXPORT_OPTIONS
},
{
"extend": "csvHtml5",
"title": SPARQL_RESULTS_FILENAME,
"text": "Download CSV",
"exportOptions": RESULTS_EXPORT_OPTIONS
},
{
"extend": "excelHtml5",
"filename": SPARQL_RESULTS_FILENAME,
"title": None,
"text": "Download XLSX",
"exportOptions": RESULTS_EXPORT_OPTIONS
}
]
)
elif first_tab_html != "":
with first_tab_output:
display(HTML(first_tab_html))
if args.query_mode == 'explain':
stored_results = explain
elif results_df is not None:
json_results = results
res_store_type = args.store_format
res_export_path = args.export_to
if res_store_type in PANDAS_FORMATS or res_export_path != '':
results_df = process_df_for_store(language='sparql',
results_df=results_df)
stored_results = get_results_for_store(store_type=res_store_type,
pandas_results=results_df,
json_results=json_results)
export_csv_results(export_path=res_export_path,
results_df=results_df)
else:
stored_results = results
store_to_ns(args.store_to, stored_results, local_ns)