in pdf-splitter-python/main.py [0:0]
def main(args: argparse.Namespace) -> int:
"""This project splits a PDF document using the Document AI API to identify split points"""
if not args.project_id:
_, project_id = google.auth.default()
args.project_id = project_id
file_path = os.path.abspath(args.input)
if not os.path.isfile(file_path):
print(f"Could not find file at {file_path}")
return 1
if PDF_EXTENSION not in args.input:
print(f"Input file {args.input} is not a PDF")
return 1
if not args.output_dir:
args.output_dir = os.path.dirname(file_path)
client = DocumentProcessorServiceClient(
client_options=ClientOptions(
api_endpoint=f"{args.multi_region_location}-documentai.googleapis.com"
)
)
processor_name = get_or_create_processor(
client, args.project_id, args.multi_region_location, args.split_processor_type
)
print(
"Using:\n"
f'* Project ID: "{args.project_id}"\n'
f'* Location: "{args.multi_region_location}"\n'
f'* Processor Name "{processor_name}"\n'
f'* Input PDF "{os.path.basename(file_path)}"\n'
f'* Output directory: "{args.output_dir}"\n'
)
document = online_process(client, processor_name, file_path)
document_json = write_document_json(document, file_path, output_dir=args.output_dir)
print(f"Document AI Output: {document_json}")
split_pdf(document.entities, file_path, output_dir=args.output_dir)
print("Done.")
return 0