bq-connector/docai_bq_connector/helper/pdf_util.py (7 lines of code) (raw):
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from io import BytesIO
from pypdf import PdfReader
def get_pdf_page_cnt(pdf_doc: bytes):
"""
Get the page count of the PDFs
Args:
pdf_doc:
Returns:
integer
"""
pdf_file = BytesIO(pdf_doc)
reader = PdfReader(pdf_file)
num_pages = len(reader.pages)
return num_pages