in generate_commands_from_synopsis.py [0:0]
def main(args):
nl2bash = pd.read_json(args.nl2bash).T
graph = defaultdict(lambda: set())
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
tqdm.pandas(desc="Extracting utilities graph")
nl2bash['cmd'].progress_apply(partial(update_graph, graph=graph))
count_utilities = Counter()
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
tqdm.pandas(desc="Extracting utilities from examples")
nl2bash['cmd'].progress_apply(partial(add_utilities, counter=count_utilities))
all_commands = list(bashlint.grammar.bg.grammar.keys())
count_utilities.update(all_commands)
commands = pd.DataFrame.from_dict(count_utilities, orient='index', columns=["count"]).reset_index() \
.rename(columns={'index': 'cmd'}).sort_values('count').reset_index(drop=True)
commands['required'] = commands['cmd'].apply(number_of_required_arguments)
print(f"Found {len(commands)} total utilities")
manpage = pd.read_json(args.manpage, lines=True)
commands = commands.merge(manpage[['name', 'synopsis']], left_on='cmd', right_on='name', how='left')
commands.loc[commands['synopsis'].isna(), 'synopsis'] = ''
alias_to_idx = defaultdict(lambda: [])
def get_aliases(x):
idx = x.name
for y in x['aliases']:
y = y[0]
alias_to_idx[y].append(idx)
manpage.apply(get_aliases, axis=1)
commands.drop_duplicates(inplace=True)
print(f"Now {len(commands)} utilities")
commands['options'] = commands['cmd'].apply(partial(get_options, manpage=manpage, alias_to_idx=alias_to_idx))
del manpage
results = []
for t in tqdm(range(args.size), desc="Generating examples"):
results.append(list(generate_commands(commands, graph)))
results = pd.DataFrame(results, columns=["cmd", "query"])
results.to_csv(args.output, index=False)