in find_examples_manpage_data.py [0:0]
def main(args):
if not os.path.isfile(args.input):
raise ValueError(f"Can't find file '{args.input}'")
chunk_size = args.chunk_size
data = pd.read_json(args.input, lines=True, chunksize=chunk_size)
TOTAL_LINES = 36668
total_iterations = (TOTAL_LINES + chunk_size - 1) // chunk_size
examples = []
examples_with_query = 0
with tqdm(data, total=total_iterations) as progress_bar:
for chunk in progress_bar:
for i, command in chunk.iterrows():
new_examples = get_examples(command)
for ex in new_examples:
if ex[2]:
examples_with_query += 1
examples.extend(new_examples)
progress_bar.set_postfix({"examples": len(examples), "examples with query": examples_with_query})
examples = pd.DataFrame(examples, columns=["name", "command", "context"])
examples.to_csv(args.output, index=False)