in src/graph_notebook/magics/graph_magic.py [0:0]
def seed(self, line, local_ns: dict = None):
"""
Provides a way to bulk insert data to your endpoint via Gremlin, openCypher, or SPARQL queries. Via the form
generated by running %seed with no arguments, you can do either of the following:
a) select a data model (property-graph or RDF), then choose from among a number of different sample data sets
that Neptune provides.
b) select a query language to load with, then provide a path to a local file with insert queries,
or a directory containing multiple of these files.
"""
parser = argparse.ArgumentParser()
parser.add_argument('--model', type=str.lower, default='',
help='Specifies what data model you would like to load for. '
'Accepted values: property_graph, rdf')
parser.add_argument('--language', type=str.lower, default='',
help='Specifies what language you would like to load for. '
'Accepted values: gremlin, sparql, opencypher')
parser.add_argument('--dataset', type=str, default='',
help='Specifies what sample dataset you would like to load.')
parser.add_argument('--source', type=str, default='',
help='Specifies the full path to a local file or directory that you would like to '
'load from.')
parser.add_argument('-f', '--full-file-query', action='store_true', default=False,
help='Read all content of a file as a single query, instead of per line')
# TODO: Gremlin api paths are not yet supported.
parser.add_argument('--path', '-p', default=SPARQL_ACTION,
help='prefix path to query endpoint. For example, "foo/bar". '
'The queried path would then be host:port/foo/bar for sparql seed commands')
parser.add_argument('--run', action='store_true')
parser.add_argument('--ignore-errors', action='store_true', default=False,
help='Continue loading from the seed file on failure of any individual query.')
args = parser.parse_args(line.split())
output = widgets.Output()
progress_output = widgets.Output()
source_dropdown = widgets.Dropdown(
options=SEED_SOURCE_OPTIONS,
description='Source type:',
disabled=False,
style=SEED_WIDGET_STYLE
)
if self.client.is_analytics_domain():
model_options = SEED_MODEL_OPTIONS_PG
custom_language_options = SEED_LANGUAGE_OPTIONS_OC
samples_pg_language_options = SEED_LANGUAGE_OPTIONS_OC
else:
model_options = SEED_MODEL_OPTIONS
custom_language_options = SEED_LANGUAGE_OPTIONS
samples_pg_language_options = SEED_LANGUAGE_OPTIONS_PG
model_dropdown = widgets.Dropdown(
options=model_options,
description='Data model:',
disabled=False,
layout=widgets.Layout(display='none'),
style=SEED_WIDGET_STYLE
)
custom_language_dropdown = widgets.Dropdown(
options=custom_language_options,
description='Language:',
disabled=False,
layout=widgets.Layout(display='none'),
style=SEED_WIDGET_STYLE
)
samples_pg_language_dropdown = widgets.Dropdown(
options=samples_pg_language_options,
description='Language:',
disabled=False,
layout=widgets.Layout(display='none'),
style=SEED_WIDGET_STYLE
)
data_set_drop_down = widgets.Dropdown(
description='Data set:',
disabled=False,
layout=widgets.Layout(display='none'),
style=SEED_WIDGET_STYLE
)
fullfile_option_dropdown = widgets.Dropdown(
description='Full File Query:',
options=[True, False],
value=args.full_file_query,
disabled=False,
layout=widgets.Layout(display='none'),
style=SEED_WIDGET_STYLE
)
location_option_dropdown = widgets.Dropdown(
description='Location:',
options=['Local', 'S3'],
value='Local',
disabled=False,
layout=widgets.Layout(display='none'),
style=SEED_WIDGET_STYLE
)
seed_file_location_text = widgets.Text(
description='Source:',
placeholder='path/to/seedfiles/directory',
disabled=False,
style=SEED_WIDGET_STYLE
)
seed_file_location = FileChooser()
seed_file_location.layout.display = 'none'
seed_file_location_text_hbox = widgets.HBox([seed_file_location_text])
submit_button = widgets.Button(description="Submit")
model_dropdown.layout.visibility = 'hidden'
custom_language_dropdown.layout.visibility = 'hidden'
samples_pg_language_dropdown.layout.visibility = 'hidden'
data_set_drop_down.layout.visibility = 'hidden'
fullfile_option_dropdown.layout.visibility = 'hidden'
location_option_dropdown.layout.visibility = 'hidden'
seed_file_location_text_hbox.layout.visibility = 'hidden'
seed_file_location.layout.visibility = 'hidden'
submit_button.layout.visibility = 'hidden'
def hide_all_widgets():
location_option_dropdown.layout.visibility = 'hidden'
location_option_dropdown.layout.display = 'none'
seed_file_location_text_hbox.layout.visibility = 'hidden'
seed_file_location_text_hbox.layout.display = 'none'
custom_language_dropdown.layout.visibility = 'hidden'
custom_language_dropdown.layout.display = 'none'
samples_pg_language_dropdown.layout.visibility = 'hidden'
samples_pg_language_dropdown.layout.display = 'none'
fullfile_option_dropdown.layout.visibility = 'hidden'
fullfile_option_dropdown.layout.display = 'none'
seed_file_location.layout.visibility = 'hidden'
seed_file_location.layout.display = 'none'
seed_file_location_text_hbox.layout.visibility = 'hidden'
seed_file_location_text_hbox.layout.display = 'none'
model_dropdown.layout.visibility = 'hidden'
model_dropdown.layout.display = 'none'
data_set_drop_down.layout.visibility = 'hidden'
data_set_drop_down.layout.display = 'none'
submit_button.layout.visibility = 'hidden'
def on_source_value_change(change):
hide_all_widgets()
selected_source = change['new']
if selected_source == 'custom':
custom_language_dropdown.layout.visibility = 'visible'
custom_language_dropdown.layout.display = 'flex'
location_option_dropdown.layout.visibility = 'visible'
location_option_dropdown.layout.display = 'flex'
if custom_language_dropdown.value:
if custom_language_dropdown.value != 'sparql':
fullfile_option_dropdown.layout.visibility = 'visible'
fullfile_option_dropdown.layout.display = 'flex'
# If textbox has a value, OR we are loading from S3, display textbox instead of the filepicker
if seed_file_location_text.value or location_option_dropdown.value == 'S3':
seed_file_location_text_hbox.layout.visibility = 'visible'
seed_file_location_text_hbox.layout.display = 'flex'
elif seed_file_location.value or location_option_dropdown.value == 'Local':
seed_file_location.layout.visibility = 'visible'
seed_file_location.layout.display = 'flex'
if custom_language_dropdown.value \
and (seed_file_location_text.value or
(seed_file_location.value and location_option_dropdown.value == 'Local')):
submit_button.layout.visibility = 'visible'
elif selected_source == 'samples':
custom_language_dropdown.layout.visibility = 'hidden'
custom_language_dropdown.layout.display = 'none'
fullfile_option_dropdown.layout.visibility = 'hidden'
fullfile_option_dropdown.layout.display = 'none'
seed_file_location.layout.visibility = 'hidden'
seed_file_location.layout.display = 'none'
model_dropdown.layout.visibility = 'visible'
model_dropdown.layout.display = 'flex'
if model_dropdown.value:
show_dataset = False
if model_dropdown.value == 'propertygraph':
samples_pg_language_dropdown.layout.visibility = 'visible'
samples_pg_language_dropdown.layout.display = 'flex'
if samples_pg_language_dropdown.value != '':
show_dataset = True
else:
samples_pg_language_dropdown.layout.visibility = 'hidden'
samples_pg_language_dropdown.layout.display = 'none'
show_dataset = True
if show_dataset:
data_set_drop_down.layout.visibility = 'visible'
data_set_drop_down.layout.display = 'flex'
if data_set_drop_down.value and data_set_drop_down.value != SEED_NO_DATASETS_FOUND_MSG:
submit_button.layout.visibility = 'visible'
else:
custom_language_dropdown.layout.visibility = 'hidden'
custom_language_dropdown.layout.display = 'none'
samples_pg_language_dropdown.layout.visibility = 'hidden'
samples_pg_language_dropdown.layout.display = 'none'
fullfile_option_dropdown.layout.visibility = 'hidden'
fullfile_option_dropdown.layout.display = 'none'
seed_file_location.layout.visibility = 'hidden'
seed_file_location.layout.display = 'none'
seed_file_location_text.layout.visibility = 'hidden'
seed_file_location_text.layout.display = 'none'
model_dropdown.layout.visibility = 'hidden'
model_dropdown.layout.display = 'none'
data_set_drop_down.layout.visibility = 'hidden'
data_set_drop_down.layout.display = 'none'
return
def change_datasets_widget(samples_lang):
data_sets = get_data_sets(samples_lang)
if data_sets:
data_sets.sort()
data_set_drop_down.options = [ds for ds in data_sets if
ds != '__pycache__'] # being extra sure that we aren't passing __pycache__.
data_set_drop_down.layout.visibility = 'visible'
data_set_drop_down.layout.display = 'flex'
submit_button.layout.visibility = 'visible'
else:
if samples_lang:
data_set_drop_down.options = [SEED_NO_DATASETS_FOUND_MSG]
data_set_drop_down.layout.visibility = 'visible'
data_set_drop_down.layout.display = 'flex'
else:
data_set_drop_down.layout.visibility = 'hidden'
data_set_drop_down.layout.display = 'none'
submit_button.layout.visibility = 'hidden'
return
def on_model_value_change(change):
selected_model = change['new']
samples_language = ''
if selected_model == 'propertygraph':
samples_pg_language_dropdown.layout.visibility = 'visible'
samples_pg_language_dropdown.layout.display = 'flex'
if samples_pg_language_dropdown.value != '':
samples_language = samples_pg_language_dropdown.value
else:
samples_pg_language_dropdown.layout.visibility = 'hidden'
samples_pg_language_dropdown.layout.display = 'none'
if selected_model == 'rdf':
samples_language = 'sparql'
change_datasets_widget(samples_language)
return
def on_dataset_value_change(change):
selected_dataset = change['new']
if not selected_dataset:
submit_button.layout.visibility = 'hidden'
return
def on_samples_pg_language_value_change(change):
selected_pg_language = change['new']
change_datasets_widget(selected_pg_language)
return
def on_custom_language_value_change(change):
# Preserve the value/state of the text/selector widget if it's already rendered
# Otherwise, display the default selector widget (file browser)
selected_language = change['new']
if selected_language != 'sparql':
fullfile_option_dropdown.layout.visibility = 'visible'
fullfile_option_dropdown.layout.display = 'flex'
else:
fullfile_option_dropdown.layout.visibility = 'hidden'
fullfile_option_dropdown.layout.display = 'none'
if not seed_file_location_text.value and seed_file_location_text_hbox.layout.visibility == 'hidden':
seed_file_location.layout.visibility = 'visible'
seed_file_location.layout.display = 'flex'
submit_button.layout.visibility = 'visible'
return
def on_location_value_change(change):
selected_location = change['new']
if selected_location == 'Local' and not seed_file_location_text.value:
seed_file_location_text_hbox.layout.visibility = 'hidden'
seed_file_location_text_hbox.layout.display = 'none'
seed_file_location.layout.visibility = 'visible'
seed_file_location.layout.display = 'flex'
else:
seed_file_location.layout.visibility = 'hidden'
seed_file_location.layout.display = 'none'
seed_file_location_text_hbox.layout.visibility = 'visible'
seed_file_location_text_hbox.layout.display = 'flex'
return
def on_seedfile_text_value_change(change):
if seed_file_location_text.value:
submit_button.layout.visibility = 'visible'
else:
submit_button.layout.visibility = 'hidden'
return
def on_seedfile_select_value_change(change):
if seed_file_location.value:
submit_button.layout.visibility = 'visible'
else:
submit_button.layout.visibility = 'hidden'
return
def disable_seed_widgets():
source_dropdown.disabled = True
model_dropdown.disabled = True
custom_language_dropdown.disabled = True
samples_pg_language_dropdown.disabled = True
data_set_drop_down.disabled = True
fullfile_option_dropdown.disabled = True
location_option_dropdown.disabled = True
seed_file_location_text.disabled = True
seed_file_location.disabled = True
submit_button.close()
def process_gremlin_query_line(query_line, line_index, q):
# Return a state here, with indication of any other variable states that need changing.
# return 0 = continue
# return 1 = continue, set any_errors_flag = True, error_count += 1
# return 2 = progress.close() and return, set any_errors_flag = True, error_count += 1
if not query_line:
logger.debug(f"Skipped blank query at line {line_index + 1} in seed file {q['name']}")
return 0
try:
self.client.gremlin_query(query_line)
return 0
except GremlinServerError as gremlinEx:
try:
error = json.loads(gremlinEx.args[0][5:]) # remove the leading error code.
content = json.dumps(error, indent=2)
except Exception:
content = {
'error': gremlinEx
}
logger.debug(f"GremlinServerError at line {line_index + 1} in seed file {q['name']}")
logger.debug(content)
if args.ignore_errors:
return 1
else:
with output:
generate_seed_error_msg(content, q['name'], line_index + 1)
return 2
except Exception as e:
content = {
'error': e
}
logger.debug(f"Exception at line {line_index + 1} in seed file {q['name']}")
logger.debug(content)
if args.ignore_errors:
return 1
else:
with output:
generate_seed_error_msg(content, q['name'], line_index + 1)
return 2
def process_cypher_query_line(query_line, line_index, q):
if not query_line:
logger.debug(f"Skipped blank query at line {line_index + 1} in seed file {q['name']}")
return 0
try:
cypher_res = self.client.opencypher_http(query_line)
cypher_res.raise_for_status()
return 0
except HTTPError as httpEx:
try:
error = json.loads(httpEx.response.content.decode('utf-8'))
content = json.dumps(error, indent=2)
except Exception:
content = {
'error': httpEx
}
logger.debug(content)
if args.ignore_errors:
return 1
else:
with output:
generate_seed_error_msg(content, q['name'])
return 2
except Exception as ex:
content = {
'error': str(ex)
}
logger.error(content)
if args.ignore_errors:
return 1
else:
with output:
generate_seed_error_msg(content, q['name'])
return 2
def on_button_clicked(b=None):
seed_file_location_text_hbox.children = (seed_file_location_text,)
filename = None
if source_dropdown.value == 'samples':
data_set = data_set_drop_down.value.lower()
fullfile_query = False
else:
if seed_file_location_text.value:
stall_with_warning = False
if location_option_dropdown.value == 'S3' and not (seed_file_location_text.value.startswith('s3://')
and len(seed_file_location_text.value) > 7):
seed_file_location_text_validation_label = widgets.HTML(
'<p style="color:red;">S3 source URI must start with s3://</p>')
stall_with_warning = True
elif location_option_dropdown.value == 'Local' \
and not seed_file_location_text.value.startswith('/'):
seed_file_location_text_validation_label = widgets.HTML(
'<p style="color:red;">Local source URI must be a valid file path</p>')
stall_with_warning = True
if stall_with_warning:
seed_file_location_text_validation_label.style = DescriptionStyle(color='red')
seed_file_location_text_hbox.children += (seed_file_location_text_validation_label,)
return
filename = seed_file_location_text.value
elif seed_file_location.value:
filename = seed_file_location.value
else:
return
data_set = filename
fullfile_query = fullfile_option_dropdown.value
disable_seed_widgets()
if custom_language_dropdown.value and filename:
model = normalize_model_name(custom_language_dropdown.value)
seeding_language = normalize_language_name(custom_language_dropdown.value)
else:
model = normalize_model_name(model_dropdown.value)
seeding_language = 'sparql' if model == 'rdf' else samples_pg_language_dropdown.value
with output:
print(f'Loading data set {data_set} for {seeding_language}')
queries = get_queries(seeding_language, data_set, source_dropdown.value)
if queries:
if len(queries) < 1:
with output:
print('Did not find any queries for the given dataset')
return
else:
with output:
print('Query retrieval from files terminated with errors.')
return
load_index = 1 # start at 1 to have a non-empty progress bar
progress = widgets.IntProgress(
value=load_index,
min=0,
max=len(queries) + 1, # len + 1 so we can start at index 1
orientation='horizontal',
bar_style='info',
description='Loading:'
)
with progress_output:
display(progress)
error_count = 0
any_errors_flag = False
for q in queries:
with output:
print(f'{progress.value}/{len(queries)}:\t{q["name"]}')
if model == 'rdf':
try:
self.client.sparql(q['content'], path=args.path)
except HTTPError as httpEx:
# attempt to turn response into json
try:
error = json.loads(httpEx.response.content.decode('utf-8'))
content = json.dumps(error, indent=2)
except Exception:
any_errors_flag = True
error_count += 1
content = {
'error': httpEx
}
logger.debug(content)
if args.ignore_errors:
progress.value += 1
continue
else:
with output:
generate_seed_error_msg(content, q['name'])
progress.close()
return
except Exception as ex:
any_errors_flag = True
error_count += 1
content = {
'error': str(ex)
}
logger.error(content)
if args.ignore_errors:
progress.value += 1
continue
else:
with output:
generate_seed_error_msg(content, q['name'])
progress.close()
return
else: # gremlin and cypher
# treat entire file content as one query
if fullfile_query or (source_dropdown.value == 'samples' and 'full' in q['name']):
if seeding_language == 'opencypher':
query_status = process_cypher_query_line(q['content'], 0, q)
else:
query_status = process_gremlin_query_line(q['content'], 0, q)
if query_status == 2:
progress.close()
return
else:
if query_status == 1:
any_errors_flag = True
error_count += 1
progress.value += 1
continue
else: # treat each line as its own query
for line_index, query_line in enumerate(q['content'].splitlines()):
if seeding_language == 'opencypher':
query_status = process_cypher_query_line(query_line, line_index, q)
else:
query_status = process_gremlin_query_line(query_line, line_index, q)
if query_status == 2:
progress.close()
return
else:
if query_status == 1:
any_errors_flag = True
error_count += 1
progress.value += 1
# Sleep for two seconds so the user sees the progress bar complete
time.sleep(2)
progress.close()
with output:
print('Done.')
if any_errors_flag:
print(f'\n{error_count} individual queries were skipped due to errors. For more '
f'information, please rerun the query with debug logs enabled (%enable_debug).')
return
submit_button.on_click(on_button_clicked)
source_dropdown.observe(on_source_value_change, names='value')
model_dropdown.observe(on_model_value_change, names='value')
data_set_drop_down.observe(on_dataset_value_change, names='value')
custom_language_dropdown.observe(on_custom_language_value_change, names='value')
samples_pg_language_dropdown.observe(on_samples_pg_language_value_change, names='value')
location_option_dropdown.observe(on_location_value_change, names='value')
seed_file_location_text.observe(on_seedfile_text_value_change, names='value')
seed_file_location.observe(on_seedfile_select_value_change, names='value')
display(source_dropdown, model_dropdown, custom_language_dropdown, samples_pg_language_dropdown,
data_set_drop_down, fullfile_option_dropdown, location_option_dropdown, seed_file_location,
seed_file_location_text_hbox, submit_button, progress_output, output)
if (args.model != '' or args.language != '') and args.source == '':
source_dropdown.value = 'samples'
normed_model = normalize_model_name(args.model)
normed_language = normalize_language_name(args.language)
selected_model = None
selected_language = None
if normed_model != '' and normed_model in SEED_MODEL_OPTIONS:
if normed_model == 'propertygraph':
selected_model = 'propertygraph'
if normed_language in ['gremlin', 'opencypher']:
selected_language = normed_language
elif normed_language == '':
selected_language = 'gremlin'
else:
selected_model = 'rdf'
selected_language = 'sparql'
elif normed_language != '' and normed_language in SEED_LANGUAGE_OPTIONS:
if normed_language == 'sparql':
selected_model = 'rdf'
selected_language = 'sparql'
else:
selected_model = 'propertygraph'
selected_language = normed_language
if selected_model:
model_dropdown.value = selected_model
if selected_language:
if selected_language != 'sparql':
samples_pg_language_dropdown.value = selected_language
if args.dataset != '' and args.dataset in data_set_drop_down.options:
data_set_drop_down.value = args.dataset.lower()
if args.run:
on_button_clicked()
elif args.source != '' or args.language != '':
source_dropdown.value = 'custom'
valid_language_value = False
language = normalize_language_name(args.language)
if language != '' and language in SEED_LANGUAGE_OPTIONS:
custom_language_dropdown.value = language
valid_language_value = True
if args.source != '':
seed_file_location_text.value = args.source
seed_file_location_text_hbox.layout.visibility = 'visible'
seed_file_location_text_hbox.layout.display = 'flex'
if seed_file_location_text.value.startswith('s3://'):
location_option_dropdown.value = 'S3'
location_option_dropdown.layout.visibility = 'visible'
location_option_dropdown.layout.display = 'flex'
seed_file_location.layout.visibility = 'hidden'
seed_file_location.layout.display = 'none'
if seed_file_location_text.value and valid_language_value and args.run:
on_button_clicked()