def sparql()

in src/graph_notebook/magics/graph_magic.py [0:0]
269 lines of code
53 McCabe index (conditional complexity)

    def sparql(self, line='', cell='', local_ns: dict = None):
        parser = argparse.ArgumentParser()
        parser.add_argument('query_mode', nargs='?', default='query',
                            help='query mode (default=query) [query|explain]')
        parser.add_argument('--path', '-p', default='',
                            help='prefix path to sparql endpoint. For example, if "foo/bar" were specified, '
                                 'the endpoint called would be host:port/foo/bar')
        parser.add_argument('--expand-all', action='store_true')
        parser.add_argument('--explain-type', type=str.lower, default='dynamic',
                            help=f'Explain mode to use when using the explain query mode. '
                                 f'Expected values: {SPARQL_EXPLAIN_MODES}')
        parser.add_argument('--explain-format', default='text/html', help='response format for explain query mode',
                            choices=['text/csv', 'text/html'])
        parser.add_argument('-m', '--media-type', type=str, default='',
                            help='Response format for SELECT/CONSTRUCT/DESCRIBE queries. See '
                                 'https://docs.aws.amazon.com/neptune/latest/userguide/sparql-media-type-support.html '
                                 'for valid RDF media types supported by Neptune for each query type. Default for '
                                 'Neptune and SELECT queries is application/sparql-results+json, otherwise no format '
                                 'will be specified in the request.')
        parser.add_argument('-g', '--group-by', type=str, default='',
                            help='Property used to group nodes.')
        parser.add_argument('-gr', '--group-by-raw', action='store_true', default=False,
                            help="Group nodes by the raw binding")
        parser.add_argument('-d', '--display-property', type=str, default='',
                            help='Property to display the value of on each node.')
        parser.add_argument('-de', '--edge-display-property', type=str, default='',
                            help='Property to display the value of on each edge.')
        parser.add_argument('-t', '--tooltip-property', type=str, default='',
                            help='Property to display the value of on each node tooltip.')
        parser.add_argument('-te', '--edge-tooltip-property', type=str, default='',
                            help='Property to display the value of on each edge tooltip.')
        parser.add_argument('-l', '--label-max-length', type=int, default=10,
                            help='Specifies max length of vertex labels, in characters. Default is 10')
        parser.add_argument('-le', '--edge-label-max-length', type=int, default=10,
                            help='Specifies max length of edge labels, in characters. Default is 10')
        parser.add_argument('--store-to', type=str, default='', help='store query result to this variable')
        parser.add_argument('--store-format', type=str.lower, default='json',
                            help=f'Configures export type when using --store-to with base query mode. '
                                 f'Valid inputs: {QUERY_STORE_TO_FORMATS}. Default is JSON')
        parser.add_argument('--export-to', type=str, default='',
                            help='Export the base query mode CSV result to the provided file path.')
        parser.add_argument('--ignore-groups', action='store_true', default=False, help="Ignore all grouping options")
        parser.add_argument('-sp', '--stop-physics', action='store_true', default=False,
                            help="Disable visualization physics after the initial simulation stabilizes.")
        parser.add_argument('-sd', '--simulation-duration', type=int, default=1500,
                            help='Specifies maximum duration of visualization physics simulation. Default is 1500ms')
        parser.add_argument('--silent', action='store_true', default=False, help="Display no query output.")
        parser.add_argument('-ct', '--connected-table', action='store_true', default=False,
                            help=f'Dynamically load jQuery and DataTables resources for iTables. For more information, see: '
                                 f'https://mwouts.github.io/itables/quick_start.html#offline-mode-versus-connected-mode')
        parser.add_argument('-r', '--results-per-page', type=int, default=10,
                            help='Specifies how many query results to display per page in the output. Default is 10')
        parser.add_argument('--no-scroll', action='store_true', default=False,
                            help="Display the entire output without a scroll bar.")
        parser.add_argument('--hide-index', action='store_true', default=False,
                            help="Hide the index column numbers when displaying the results.")
        args = parser.parse_args(line.split())
        mode = str_to_query_mode(args.query_mode)

        if args.no_scroll:
            sparql_layout = UNRESTRICTED_LAYOUT
            sparql_scrollY = True
            sparql_scrollCollapse = False
            sparql_paging = False
        else:
            sparql_layout = DEFAULT_LAYOUT
            sparql_scrollY = "475px"
            sparql_scrollCollapse = True
            sparql_paging = True

        if not args.silent:
            tab = widgets.Tab()
            titles = []
            children = []

            first_tab_output = widgets.Output(layout=sparql_layout)
            children.append(first_tab_output)

        path = args.path if args.path != '' else self.graph_notebook_config.sparql.path
        logger.debug(f'using mode={mode}')
        results_df = None
        if mode == QueryMode.EXPLAIN:
            res = self.client.sparql_explain(cell, args.explain_type, args.explain_format, path=path)
            res.raise_for_status()
            explain_bytes = res.content.replace(b'\xcc', b'-')
            explain_bytes = explain_bytes.replace(b'\xb6', b'')
            explain = explain_bytes.decode('utf-8')
            if not args.silent:
                sparql_metadata = build_sparql_metadata_from_query(query_type='explain', res=res)
                titles.append('Explain')
                explain_bytes = explain.encode('ascii', 'ignore')
                base64_str = base64.b64encode(explain_bytes).decode('ascii')
                first_tab_html = sparql_explain_template.render(table=explain,
                                                                link=f"data:text/html;base64,{base64_str}")
        else:
            query_type = get_query_type(cell)

            result_type = str(args.media_type).lower()

            headers = {}

            # Different graph DB services support different sets of results formats, some possibly custom, for each
            # query type. We will only verify if media types are valid for Neptune
            # (https://docs.aws.amazon.com/neptune/latest/userguide/sparql-media-type-support.html). For other
            # databases, we will rely on the HTTP query response to tell if there is an issue with the format.
            if is_allowed_neptune_host(self.graph_notebook_config.host, NEPTUNE_CONFIG_HOST_IDENTIFIERS):
                if (query_type == 'SELECT' and result_type not in NEPTUNE_RDF_SELECT_FORMATS) \
                        or (query_type == 'ASK' and result_type not in NEPTUNE_RDF_ASK_FORMATS) \
                        or (query_type in ['CONSTRUCT', 'DESCRIBE']
                            and result_type not in NEPTUNE_RDF_CONSTRUCT_DESCRIBE_FORMATS) \
                        or result_type == '':
                    if result_type != '':
                        print(f"Invalid media type: {result_type} specified for Neptune {query_type} query. "
                              f"Defaulting to: {MEDIA_TYPE_SPARQL_JSON}.")
                    result_type = MEDIA_TYPE_SPARQL_JSON
                headers = {'Accept': result_type}
            elif result_type == '':
                if query_type == 'SELECT':
                    result_type = MEDIA_TYPE_SPARQL_JSON
                    headers = {'Accept': MEDIA_TYPE_SPARQL_JSON}
            else:
                headers = {'Accept': result_type}

            query_res = self.client.sparql(cell, path=path, headers=headers)

            try:
                query_res.raise_for_status()
            except HTTPError:
                # Catching all 400 response errors here to try and fix possible invalid media type for db in headers.
                # Retry query once with RDF spec default media type.
                result_type = MEDIA_TYPE_SPARQL_JSON if query_type == 'SELECT' else MEDIA_TYPE_NTRIPLES
                query_res = self.client.sparql(cell, path=path, headers={'Accept': result_type})
                query_res.raise_for_status()

            try:
                results = query_res.json()
            except Exception:
                results = query_res.content.decode('utf-8')

            if not args.silent:
                # Assign an empty value so we can always display to table output.
                # We will only add it as a tab if the type of query allows it.
                # Because of this, the table_output will only be displayed on the DOM if the query was of type SELECT.
                first_tab_html = ""
                query_type = get_query_type(cell)
                if result_type != MEDIA_TYPE_SPARQL_JSON:
                    raw_output = widgets.Output(layout=sparql_layout)
                    with raw_output:
                        print(results)
                    children.append(raw_output)
                    titles.append('Raw')
                else:
                    if query_type in ['SELECT', 'CONSTRUCT', 'DESCRIBE']:
                        # TODO: Serialize other result types to SPARQL JSON so we can create table and visualization
                        logger.debug('creating sparql network...')

                        titles.append('Table')

                        sn = SPARQLNetwork(group_by_property=args.group_by,
                                           display_property=args.display_property,
                                           edge_display_property=args.edge_display_property,
                                           tooltip_property=args.tooltip_property,
                                           edge_tooltip_property=args.edge_tooltip_property,
                                           label_max_length=args.label_max_length,
                                           edge_label_max_length=args.edge_label_max_length,
                                           ignore_groups=args.ignore_groups,
                                           expand_all=args.expand_all,
                                           group_by_raw=args.group_by_raw)

                        sn.extract_prefix_declarations_from_query(cell)
                        try:
                            sn.add_results(results)
                        except ValueError as value_error:
                            logger.debug(value_error)

                        logger.debug(f'number of nodes is {len(sn.graph.nodes)}')
                        if len(sn.graph.nodes) > 0:
                            self.graph_notebook_vis_options['physics']['disablePhysicsAfterInitialSimulation'] \
                                = args.stop_physics
                            self.graph_notebook_vis_options['physics']['simulationDuration'] = args.simulation_duration
                            f = Force(network=sn, options=self.graph_notebook_vis_options)
                            titles.append('Graph')
                            children.append(f)
                            logger.debug('added sparql network to tabs')

                        rows_and_columns = sparql_get_rows_and_columns(results)
                        if rows_and_columns is not None:
                            results_df = pd.DataFrame(rows_and_columns['rows']).convert_dtypes()
                            results_df = results_df.astype(str)
                            results_df = results_df.map(lambda x: encode_html_chars(x))
                            results_df.insert(0, "#", range(1, len(results_df) + 1))
                            for col_index, col_name in enumerate(rows_and_columns['columns']):
                                try:
                                    results_df.rename({results_df.columns[col_index + 1]: col_name},
                                                      axis='columns',
                                                      inplace=True)
                                except IndexError:
                                    results_df.insert(col_index + 1, col_name, [])

                        # Handling CONSTRUCT and DESCRIBE on their own because we want to maintain the previous result
                        # pattern of showing a tsv with each line being a result binding in addition to new ones.
                        if query_type == 'CONSTRUCT' or query_type == 'DESCRIBE':
                            lines = []
                            for b in results['results']['bindings']:
                                lines.append(
                                    f'{b["subject"]["value"]}\t{b["predicate"]["value"]}\t{b["object"]["value"]}')
                            raw_output = widgets.Output(layout=sparql_layout)
                            with raw_output:
                                html = sparql_construct_template.render(lines=lines)
                                display(HTML(html))
                            children.append(raw_output)
                            titles.append('Raw')

                    json_output = widgets.Output(layout=sparql_layout)
                    with json_output:
                        print(json.dumps(results, indent=2))
                    children.append(json_output)
                    titles.append('JSON')

                sparql_metadata = build_sparql_metadata_from_query(query_type='query', res=query_res, results=results)

        if not args.silent:
            metadata_output = widgets.Output(layout=sparql_layout)
            children.append(metadata_output)
            titles.append('Query Metadata')

            if first_tab_html == "" and results_df is None:
                tab.children = children[1:]  # the first tab is empty, remove it and proceed
            else:
                tab.children = children

            for i in range(len(titles)):
                tab.set_title(i, titles[i])

            display(tab)

            with metadata_output:
                display(HTML(sparql_metadata.to_html()))

            if results_df is not None:
                with first_tab_output:
                    visible_results, final_pagination_options, final_pagination_menu = generate_pagination_vars(
                        args.results_per_page)
                    sparql_columndefs = [
                        {"type": "string", "targets": "_all"},
                        {"width": "5%", "targets": 0},
                        {"visible": True, "targets": 0},
                        {"searchable": False, "targets": 0},
                        {"className": "nowrap dt-left", "targets": "_all"},
                        {"createdCell": JavascriptFunction(index_col_js), "targets": 0},
                        {"createdCell": JavascriptFunction(cell_style_js), "targets": "_all"}
                    ]
                    if args.hide_index:
                        sparql_columndefs[1]["visible"] = False
                    init_notebook_mode(connected=args.connected_table)
                    show(results_df,
                         connected=args.connected_table,
                         scrollX=True,
                         scrollY=sparql_scrollY,
                         columnDefs=sparql_columndefs,
                         paging=sparql_paging,
                         scrollCollapse=sparql_scrollCollapse,
                         lengthMenu=[final_pagination_options, final_pagination_menu],
                         pageLength=visible_results,
                         buttons=[
                             "pageLength",
                             {
                                 "extend": "copyHtml5",
                                 "text": "Copy",
                                 "exportOptions": RESULTS_EXPORT_OPTIONS
                             },
                             {
                                 "extend": "csvHtml5",
                                 "title": SPARQL_RESULTS_FILENAME,
                                 "text": "Download CSV",
                                 "exportOptions": RESULTS_EXPORT_OPTIONS
                             },
                             {
                                 "extend": "excelHtml5",
                                 "filename": SPARQL_RESULTS_FILENAME,
                                 "title": None,
                                 "text": "Download XLSX",
                                 "exportOptions": RESULTS_EXPORT_OPTIONS
                             }
                         ]
                         )
            elif first_tab_html != "":
                with first_tab_output:
                    display(HTML(first_tab_html))

        if args.query_mode == 'explain':
            stored_results = explain
        elif results_df is not None:
            json_results = results
            res_store_type = args.store_format
            res_export_path = args.export_to

            if res_store_type in PANDAS_FORMATS or res_export_path != '':
                results_df = process_df_for_store(language='sparql',
                                                  results_df=results_df)

            stored_results = get_results_for_store(store_type=res_store_type,
                                                   pandas_results=results_df,
                                                   json_results=json_results)

            export_csv_results(export_path=res_export_path,
                               results_df=results_df)
        else:
            stored_results = results

        store_to_ns(args.store_to, stored_results, local_ns)