def main()

in find_examples_manpage_data.py [0:0]


def main(args):
    if not os.path.isfile(args.input):
        raise ValueError(f"Can't find file '{args.input}'")
    chunk_size = args.chunk_size
    data = pd.read_json(args.input, lines=True, chunksize=chunk_size)

    TOTAL_LINES = 36668
    total_iterations = (TOTAL_LINES + chunk_size - 1) // chunk_size

    examples = []
    examples_with_query = 0
    with tqdm(data, total=total_iterations) as progress_bar:
        for chunk in progress_bar:
            for i, command in chunk.iterrows():
                new_examples = get_examples(command)

                for ex in new_examples:
                    if ex[2]:
                        examples_with_query += 1

                examples.extend(new_examples)
                progress_bar.set_postfix({"examples": len(examples), "examples with query": examples_with_query})

    examples = pd.DataFrame(examples, columns=["name", "command", "context"])
    examples.to_csv(args.output, index=False)