def seed()

in src/graph_notebook/magics/graph_magic.py [0:0]


    def seed(self, line, local_ns: dict = None):
        """
        Provides a way to bulk insert data to your endpoint via Gremlin, openCypher, or SPARQL queries. Via the form
        generated by running %seed with no arguments, you can do either of the following:

        a) select a data model (property-graph or RDF), then choose from among a number of different sample data sets
        that Neptune provides.

        b) select a query language to load with, then provide a path to a local file with insert queries,
        or a directory containing multiple of these files.
        """
        parser = argparse.ArgumentParser()
        parser.add_argument('--model', type=str.lower, default='',
                            help='Specifies what data model you would like to load for. '
                                 'Accepted values: property_graph, rdf')
        parser.add_argument('--language', type=str.lower, default='',
                            help='Specifies what language you would like to load for. '
                                 'Accepted values: gremlin, sparql, opencypher')
        parser.add_argument('--dataset', type=str, default='',
                            help='Specifies what sample dataset you would like to load.')
        parser.add_argument('--source', type=str, default='',
                            help='Specifies the full path to a local file or directory that you would like to '
                                 'load from.')
        parser.add_argument('-f', '--full-file-query', action='store_true', default=False,
                            help='Read all content of a file as a single query, instead of per line')
        # TODO: Gremlin api paths are not yet supported.
        parser.add_argument('--path', '-p', default=SPARQL_ACTION,
                            help='prefix path to query endpoint. For example, "foo/bar". '
                                 'The queried path would then be host:port/foo/bar for sparql seed commands')
        parser.add_argument('--run', action='store_true')
        parser.add_argument('--ignore-errors', action='store_true', default=False,
                            help='Continue loading from the seed file on failure of any individual query.')
        args = parser.parse_args(line.split())

        output = widgets.Output()
        progress_output = widgets.Output()
        source_dropdown = widgets.Dropdown(
            options=SEED_SOURCE_OPTIONS,
            description='Source type:',
            disabled=False,
            style=SEED_WIDGET_STYLE
        )

        if self.client.is_analytics_domain():
            model_options = SEED_MODEL_OPTIONS_PG
            custom_language_options = SEED_LANGUAGE_OPTIONS_OC
            samples_pg_language_options = SEED_LANGUAGE_OPTIONS_OC
        else:
            model_options = SEED_MODEL_OPTIONS
            custom_language_options = SEED_LANGUAGE_OPTIONS
            samples_pg_language_options = SEED_LANGUAGE_OPTIONS_PG

        model_dropdown = widgets.Dropdown(
            options=model_options,
            description='Data model:',
            disabled=False,
            layout=widgets.Layout(display='none'),
            style=SEED_WIDGET_STYLE
        )

        custom_language_dropdown = widgets.Dropdown(
            options=custom_language_options,
            description='Language:',
            disabled=False,
            layout=widgets.Layout(display='none'),
            style=SEED_WIDGET_STYLE
        )

        samples_pg_language_dropdown = widgets.Dropdown(
            options=samples_pg_language_options,
            description='Language:',
            disabled=False,
            layout=widgets.Layout(display='none'),
            style=SEED_WIDGET_STYLE
        )

        data_set_drop_down = widgets.Dropdown(
            description='Data set:',
            disabled=False,
            layout=widgets.Layout(display='none'),
            style=SEED_WIDGET_STYLE
        )

        fullfile_option_dropdown = widgets.Dropdown(
            description='Full File Query:',
            options=[True, False],
            value=args.full_file_query,
            disabled=False,
            layout=widgets.Layout(display='none'),
            style=SEED_WIDGET_STYLE
        )

        location_option_dropdown = widgets.Dropdown(
            description='Location:',
            options=['Local', 'S3'],
            value='Local',
            disabled=False,
            layout=widgets.Layout(display='none'),
            style=SEED_WIDGET_STYLE
        )

        seed_file_location_text = widgets.Text(
            description='Source:',
            placeholder='path/to/seedfiles/directory',
            disabled=False,
            style=SEED_WIDGET_STYLE
        )

        seed_file_location = FileChooser()
        seed_file_location.layout.display = 'none'

        seed_file_location_text_hbox = widgets.HBox([seed_file_location_text])

        submit_button = widgets.Button(description="Submit")
        model_dropdown.layout.visibility = 'hidden'
        custom_language_dropdown.layout.visibility = 'hidden'
        samples_pg_language_dropdown.layout.visibility = 'hidden'
        data_set_drop_down.layout.visibility = 'hidden'
        fullfile_option_dropdown.layout.visibility = 'hidden'
        location_option_dropdown.layout.visibility = 'hidden'
        seed_file_location_text_hbox.layout.visibility = 'hidden'
        seed_file_location.layout.visibility = 'hidden'
        submit_button.layout.visibility = 'hidden'

        def hide_all_widgets():
            location_option_dropdown.layout.visibility = 'hidden'
            location_option_dropdown.layout.display = 'none'
            seed_file_location_text_hbox.layout.visibility = 'hidden'
            seed_file_location_text_hbox.layout.display = 'none'
            custom_language_dropdown.layout.visibility = 'hidden'
            custom_language_dropdown.layout.display = 'none'
            samples_pg_language_dropdown.layout.visibility = 'hidden'
            samples_pg_language_dropdown.layout.display = 'none'
            fullfile_option_dropdown.layout.visibility = 'hidden'
            fullfile_option_dropdown.layout.display = 'none'
            seed_file_location.layout.visibility = 'hidden'
            seed_file_location.layout.display = 'none'
            seed_file_location_text_hbox.layout.visibility = 'hidden'
            seed_file_location_text_hbox.layout.display = 'none'
            model_dropdown.layout.visibility = 'hidden'
            model_dropdown.layout.display = 'none'
            data_set_drop_down.layout.visibility = 'hidden'
            data_set_drop_down.layout.display = 'none'
            submit_button.layout.visibility = 'hidden'

        def on_source_value_change(change):
            hide_all_widgets()
            selected_source = change['new']
            if selected_source == 'custom':
                custom_language_dropdown.layout.visibility = 'visible'
                custom_language_dropdown.layout.display = 'flex'
                location_option_dropdown.layout.visibility = 'visible'
                location_option_dropdown.layout.display = 'flex'
                if custom_language_dropdown.value:
                    if custom_language_dropdown.value != 'sparql':
                        fullfile_option_dropdown.layout.visibility = 'visible'
                        fullfile_option_dropdown.layout.display = 'flex'
                # If textbox has a value, OR we are loading from S3, display textbox instead of the filepicker
                if seed_file_location_text.value or location_option_dropdown.value == 'S3':
                    seed_file_location_text_hbox.layout.visibility = 'visible'
                    seed_file_location_text_hbox.layout.display = 'flex'
                elif seed_file_location.value or location_option_dropdown.value == 'Local':
                    seed_file_location.layout.visibility = 'visible'
                    seed_file_location.layout.display = 'flex'
                if custom_language_dropdown.value \
                        and (seed_file_location_text.value or
                             (seed_file_location.value and location_option_dropdown.value == 'Local')):
                    submit_button.layout.visibility = 'visible'
            elif selected_source == 'samples':
                custom_language_dropdown.layout.visibility = 'hidden'
                custom_language_dropdown.layout.display = 'none'
                fullfile_option_dropdown.layout.visibility = 'hidden'
                fullfile_option_dropdown.layout.display = 'none'
                seed_file_location.layout.visibility = 'hidden'
                seed_file_location.layout.display = 'none'
                model_dropdown.layout.visibility = 'visible'
                model_dropdown.layout.display = 'flex'
                if model_dropdown.value:
                    show_dataset = False
                    if model_dropdown.value == 'propertygraph':
                        samples_pg_language_dropdown.layout.visibility = 'visible'
                        samples_pg_language_dropdown.layout.display = 'flex'
                        if samples_pg_language_dropdown.value != '':
                            show_dataset = True
                    else:
                        samples_pg_language_dropdown.layout.visibility = 'hidden'
                        samples_pg_language_dropdown.layout.display = 'none'
                        show_dataset = True
                    if show_dataset:
                        data_set_drop_down.layout.visibility = 'visible'
                        data_set_drop_down.layout.display = 'flex'
                        if data_set_drop_down.value and data_set_drop_down.value != SEED_NO_DATASETS_FOUND_MSG:
                            submit_button.layout.visibility = 'visible'
            else:
                custom_language_dropdown.layout.visibility = 'hidden'
                custom_language_dropdown.layout.display = 'none'
                samples_pg_language_dropdown.layout.visibility = 'hidden'
                samples_pg_language_dropdown.layout.display = 'none'
                fullfile_option_dropdown.layout.visibility = 'hidden'
                fullfile_option_dropdown.layout.display = 'none'
                seed_file_location.layout.visibility = 'hidden'
                seed_file_location.layout.display = 'none'
                seed_file_location_text.layout.visibility = 'hidden'
                seed_file_location_text.layout.display = 'none'
                model_dropdown.layout.visibility = 'hidden'
                model_dropdown.layout.display = 'none'
                data_set_drop_down.layout.visibility = 'hidden'
                data_set_drop_down.layout.display = 'none'
            return

        def change_datasets_widget(samples_lang):
            data_sets = get_data_sets(samples_lang)
            if data_sets:
                data_sets.sort()
                data_set_drop_down.options = [ds for ds in data_sets if
                                              ds != '__pycache__']  # being extra sure that we aren't passing __pycache__.
                data_set_drop_down.layout.visibility = 'visible'
                data_set_drop_down.layout.display = 'flex'
                submit_button.layout.visibility = 'visible'
            else:
                if samples_lang:
                    data_set_drop_down.options = [SEED_NO_DATASETS_FOUND_MSG]
                    data_set_drop_down.layout.visibility = 'visible'
                    data_set_drop_down.layout.display = 'flex'
                else:
                    data_set_drop_down.layout.visibility = 'hidden'
                    data_set_drop_down.layout.display = 'none'
                submit_button.layout.visibility = 'hidden'
            return

        def on_model_value_change(change):
            selected_model = change['new']
            samples_language = ''
            if selected_model == 'propertygraph':
                samples_pg_language_dropdown.layout.visibility = 'visible'
                samples_pg_language_dropdown.layout.display = 'flex'
                if samples_pg_language_dropdown.value != '':
                    samples_language = samples_pg_language_dropdown.value
            else:
                samples_pg_language_dropdown.layout.visibility = 'hidden'
                samples_pg_language_dropdown.layout.display = 'none'
                if selected_model == 'rdf':
                    samples_language = 'sparql'
            change_datasets_widget(samples_language)
            return

        def on_dataset_value_change(change):
            selected_dataset = change['new']
            if not selected_dataset:
                submit_button.layout.visibility = 'hidden'
            return

        def on_samples_pg_language_value_change(change):
            selected_pg_language = change['new']
            change_datasets_widget(selected_pg_language)
            return

        def on_custom_language_value_change(change):
            # Preserve the value/state of the text/selector widget if it's already rendered
            # Otherwise, display the default selector widget (file browser)
            selected_language = change['new']
            if selected_language != 'sparql':
                fullfile_option_dropdown.layout.visibility = 'visible'
                fullfile_option_dropdown.layout.display = 'flex'
            else:
                fullfile_option_dropdown.layout.visibility = 'hidden'
                fullfile_option_dropdown.layout.display = 'none'
            if not seed_file_location_text.value and seed_file_location_text_hbox.layout.visibility == 'hidden':
                seed_file_location.layout.visibility = 'visible'
                seed_file_location.layout.display = 'flex'
                submit_button.layout.visibility = 'visible'
            return

        def on_location_value_change(change):
            selected_location = change['new']
            if selected_location == 'Local' and not seed_file_location_text.value:
                seed_file_location_text_hbox.layout.visibility = 'hidden'
                seed_file_location_text_hbox.layout.display = 'none'
                seed_file_location.layout.visibility = 'visible'
                seed_file_location.layout.display = 'flex'
            else:
                seed_file_location.layout.visibility = 'hidden'
                seed_file_location.layout.display = 'none'
                seed_file_location_text_hbox.layout.visibility = 'visible'
                seed_file_location_text_hbox.layout.display = 'flex'
            return

        def on_seedfile_text_value_change(change):
            if seed_file_location_text.value:
                submit_button.layout.visibility = 'visible'
            else:
                submit_button.layout.visibility = 'hidden'
            return

        def on_seedfile_select_value_change(change):
            if seed_file_location.value:
                submit_button.layout.visibility = 'visible'
            else:
                submit_button.layout.visibility = 'hidden'
            return

        def disable_seed_widgets():
            source_dropdown.disabled = True
            model_dropdown.disabled = True
            custom_language_dropdown.disabled = True
            samples_pg_language_dropdown.disabled = True
            data_set_drop_down.disabled = True
            fullfile_option_dropdown.disabled = True
            location_option_dropdown.disabled = True
            seed_file_location_text.disabled = True
            seed_file_location.disabled = True
            submit_button.close()

        def process_gremlin_query_line(query_line, line_index, q):
            # Return a state here, with indication of any other variable states that need changing.
            #  return 0 = continue
            #  return 1 = continue, set any_errors_flag = True, error_count += 1
            #  return 2 = progress.close() and return, set any_errors_flag = True, error_count += 1
            if not query_line:
                logger.debug(f"Skipped blank query at line {line_index + 1} in seed file {q['name']}")
                return 0
            try:
                self.client.gremlin_query(query_line)
                return 0
            except GremlinServerError as gremlinEx:
                try:
                    error = json.loads(gremlinEx.args[0][5:])  # remove the leading error code.
                    content = json.dumps(error, indent=2)
                except Exception:
                    content = {
                        'error': gremlinEx
                    }
                logger.debug(f"GremlinServerError at line {line_index + 1} in seed file {q['name']}")
                logger.debug(content)
                if args.ignore_errors:
                    return 1
                else:
                    with output:
                        generate_seed_error_msg(content, q['name'], line_index + 1)
                    return 2
            except Exception as e:
                content = {
                    'error': e
                }
                logger.debug(f"Exception at line {line_index + 1} in seed file {q['name']}")
                logger.debug(content)
                if args.ignore_errors:
                    return 1
                else:
                    with output:
                        generate_seed_error_msg(content, q['name'], line_index + 1)
                    return 2

        def process_cypher_query_line(query_line, line_index, q):
            if not query_line:
                logger.debug(f"Skipped blank query at line {line_index + 1} in seed file {q['name']}")
                return 0
            try:
                cypher_res = self.client.opencypher_http(query_line)
                cypher_res.raise_for_status()
                return 0
            except HTTPError as httpEx:
                try:
                    error = json.loads(httpEx.response.content.decode('utf-8'))
                    content = json.dumps(error, indent=2)
                except Exception:
                    content = {
                        'error': httpEx
                    }
                logger.debug(content)
                if args.ignore_errors:
                    return 1
                else:
                    with output:
                        generate_seed_error_msg(content, q['name'])
                    return 2
            except Exception as ex:
                content = {
                    'error': str(ex)
                }
                logger.error(content)
                if args.ignore_errors:
                    return 1
                else:
                    with output:
                        generate_seed_error_msg(content, q['name'])
                    return 2

        def on_button_clicked(b=None):
            seed_file_location_text_hbox.children = (seed_file_location_text,)
            filename = None
            if source_dropdown.value == 'samples':
                data_set = data_set_drop_down.value.lower()
                fullfile_query = False
            else:
                if seed_file_location_text.value:
                    stall_with_warning = False
                    if location_option_dropdown.value == 'S3' and not (seed_file_location_text.value.startswith('s3://')
                                                                       and len(seed_file_location_text.value) > 7):
                        seed_file_location_text_validation_label = widgets.HTML(
                            '<p style="color:red;">S3 source URI must start with s3://</p>')
                        stall_with_warning = True
                    elif location_option_dropdown.value == 'Local' \
                            and not seed_file_location_text.value.startswith('/'):
                        seed_file_location_text_validation_label = widgets.HTML(
                            '<p style="color:red;">Local source URI must be a valid file path</p>')
                        stall_with_warning = True
                    if stall_with_warning:
                        seed_file_location_text_validation_label.style = DescriptionStyle(color='red')
                        seed_file_location_text_hbox.children += (seed_file_location_text_validation_label,)
                        return
                    filename = seed_file_location_text.value
                elif seed_file_location.value:
                    filename = seed_file_location.value
                else:
                    return
                data_set = filename
                fullfile_query = fullfile_option_dropdown.value
            disable_seed_widgets()
            if custom_language_dropdown.value and filename:
                model = normalize_model_name(custom_language_dropdown.value)
                seeding_language = normalize_language_name(custom_language_dropdown.value)
            else:
                model = normalize_model_name(model_dropdown.value)
                seeding_language = 'sparql' if model == 'rdf' else samples_pg_language_dropdown.value
            with output:
                print(f'Loading data set {data_set} for {seeding_language}')
            queries = get_queries(seeding_language, data_set, source_dropdown.value)
            if queries:
                if len(queries) < 1:
                    with output:
                        print('Did not find any queries for the given dataset')
                    return
            else:
                with output:
                    print('Query retrieval from files terminated with errors.')
                return

            load_index = 1  # start at 1 to have a non-empty progress bar
            progress = widgets.IntProgress(
                value=load_index,
                min=0,
                max=len(queries) + 1,  # len + 1 so we can start at index 1
                orientation='horizontal',
                bar_style='info',
                description='Loading:'
            )

            with progress_output:
                display(progress)

            error_count = 0
            any_errors_flag = False
            for q in queries:
                with output:
                    print(f'{progress.value}/{len(queries)}:\t{q["name"]}')
                if model == 'rdf':
                    try:
                        self.client.sparql(q['content'], path=args.path)
                    except HTTPError as httpEx:
                        # attempt to turn response into json
                        try:
                            error = json.loads(httpEx.response.content.decode('utf-8'))
                            content = json.dumps(error, indent=2)
                        except Exception:
                            any_errors_flag = True
                            error_count += 1
                            content = {
                                'error': httpEx
                            }
                        logger.debug(content)
                        if args.ignore_errors:
                            progress.value += 1
                            continue
                        else:
                            with output:
                                generate_seed_error_msg(content, q['name'])
                            progress.close()
                            return
                    except Exception as ex:
                        any_errors_flag = True
                        error_count += 1
                        content = {
                            'error': str(ex)
                        }
                        logger.error(content)
                        if args.ignore_errors:
                            progress.value += 1
                            continue
                        else:
                            with output:
                                generate_seed_error_msg(content, q['name'])
                            progress.close()
                            return
                else:  # gremlin and cypher
                    # treat entire file content as one query
                    if fullfile_query or (source_dropdown.value == 'samples' and 'full' in q['name']):
                        if seeding_language == 'opencypher':
                            query_status = process_cypher_query_line(q['content'], 0, q)
                        else:
                            query_status = process_gremlin_query_line(q['content'], 0, q)
                        if query_status == 2:
                            progress.close()
                            return
                        else:
                            if query_status == 1:
                                any_errors_flag = True
                                error_count += 1
                                progress.value += 1
                                continue
                    else:  # treat each line as its own query
                        for line_index, query_line in enumerate(q['content'].splitlines()):
                            if seeding_language == 'opencypher':
                                query_status = process_cypher_query_line(query_line, line_index, q)
                            else:
                                query_status = process_gremlin_query_line(query_line, line_index, q)
                            if query_status == 2:
                                progress.close()
                                return
                            else:
                                if query_status == 1:
                                    any_errors_flag = True
                                    error_count += 1

                progress.value += 1
            # Sleep for two seconds so the user sees the progress bar complete
            time.sleep(2)
            progress.close()
            with output:
                print('Done.')
                if any_errors_flag:
                    print(f'\n{error_count} individual queries were skipped due to errors. For more '
                          f'information, please rerun the query with debug logs enabled (%enable_debug).')
            return

        submit_button.on_click(on_button_clicked)
        source_dropdown.observe(on_source_value_change, names='value')
        model_dropdown.observe(on_model_value_change, names='value')
        data_set_drop_down.observe(on_dataset_value_change, names='value')
        custom_language_dropdown.observe(on_custom_language_value_change, names='value')
        samples_pg_language_dropdown.observe(on_samples_pg_language_value_change, names='value')
        location_option_dropdown.observe(on_location_value_change, names='value')
        seed_file_location_text.observe(on_seedfile_text_value_change, names='value')
        seed_file_location.observe(on_seedfile_select_value_change, names='value')

        display(source_dropdown, model_dropdown, custom_language_dropdown, samples_pg_language_dropdown,
                data_set_drop_down, fullfile_option_dropdown, location_option_dropdown, seed_file_location,
                seed_file_location_text_hbox, submit_button, progress_output, output)

        if (args.model != '' or args.language != '') and args.source == '':
            source_dropdown.value = 'samples'
            normed_model = normalize_model_name(args.model)
            normed_language = normalize_language_name(args.language)
            selected_model = None
            selected_language = None
            if normed_model != '' and normed_model in SEED_MODEL_OPTIONS:
                if normed_model == 'propertygraph':
                    selected_model = 'propertygraph'
                    if normed_language in ['gremlin', 'opencypher']:
                        selected_language = normed_language
                    elif normed_language == '':
                        selected_language = 'gremlin'
                else:
                    selected_model = 'rdf'
                    selected_language = 'sparql'
            elif normed_language != '' and normed_language in SEED_LANGUAGE_OPTIONS:
                if normed_language == 'sparql':
                    selected_model = 'rdf'
                    selected_language = 'sparql'
                else:
                    selected_model = 'propertygraph'
                    selected_language = normed_language
            if selected_model:
                model_dropdown.value = selected_model
                if selected_language:
                    if selected_language != 'sparql':
                        samples_pg_language_dropdown.value = selected_language
                    if args.dataset != '' and args.dataset in data_set_drop_down.options:
                        data_set_drop_down.value = args.dataset.lower()
                        if args.run:
                            on_button_clicked()
        elif args.source != '' or args.language != '':
            source_dropdown.value = 'custom'
            valid_language_value = False
            language = normalize_language_name(args.language)
            if language != '' and language in SEED_LANGUAGE_OPTIONS:
                custom_language_dropdown.value = language
                valid_language_value = True
            if args.source != '':
                seed_file_location_text.value = args.source
                seed_file_location_text_hbox.layout.visibility = 'visible'
                seed_file_location_text_hbox.layout.display = 'flex'
                if seed_file_location_text.value.startswith('s3://'):
                    location_option_dropdown.value = 'S3'
                location_option_dropdown.layout.visibility = 'visible'
                location_option_dropdown.layout.display = 'flex'
                seed_file_location.layout.visibility = 'hidden'
                seed_file_location.layout.display = 'none'
            if seed_file_location_text.value and valid_language_value and args.run:
                on_button_clicked()