pyiceberg/utils/truncate.py (25 lines of code) (raw):

# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from typing import Optional def truncate_upper_bound_text_string(value: str, trunc_length: Optional[int]) -> Optional[str]: result = value[:trunc_length] if result != value: chars = [*result] for i in range(-1, -len(result) - 1, -1): try: to_inc = ord(chars[i]) # will raise exception if the highest unicode code is reached _next = chr(to_inc + 1) chars[i] = _next return "".join(chars) except ValueError: pass return None # didn't find a valid upper bound return result def truncate_upper_bound_binary_string(value: bytes, trunc_length: Optional[int]) -> Optional[bytes]: result = value[:trunc_length] if result != value: _bytes = [*result] for i in range(-1, -len(result) - 1, -1): if _bytes[i] < 255: _bytes[i] += 1 return b"".join([i.to_bytes(1, byteorder="little") for i in _bytes]) return None return result