def get_chunks()

in chunking/chunkers/nl2sql_chunker.py [0:0]
27 lines of code
1 McCabe index (conditional complexity)

    def get_chunks(self):
        chunks = []
        logging.info(f"[nl2sql_chunker][{self.filename}] Running get_chunks.")

        blob_data = self.document_bytes
        # Decode the bytes into text (assuming it's UTF-8 encoded)
        text = self.decode_to_utf8(blob_data)

        # Parse the JSON data
        try:
            json_data = json.loads(text)
            logging.debug(f"[nl2sql_chunker][{self.filename}] Successfully parsed JSON data.")
        except json.JSONDecodeError as e:
            logging.error(f"[nl2sql_chunker][{self.filename}] Failed to parse JSON data: {e}")
            return chunks

        chunk_id = 0
        for query_id, data in json_data.items():
            chunk_id += 1
            content = json.dumps(data, indent=4, ensure_ascii=False)
            chunk_size = self.token_estimator.estimate_tokens(content)
            if chunk_size > self.max_chunk_size:
                logging.warning(f"[nl2sql_chunker][{self.filename}] Chunk {chunk_id} size {chunk_size} exceeds max_chunk_size {self.max_chunk_size}.")
                # Since each chunk corresponds to a single 'query', truncation might not be feasible without data loss.
                # Proceeding with the chunk as is.
            embedding_text = data.get("question", "")
            chunk_dict = self._create_chunk(
                chunk_id=chunk_id,
                content=content,
                embedding_text=embedding_text,
                summary=None
            )
            chunks.append(chunk_dict)

        return chunks