def run_pipeline()

in src/data_prep_ner.py [0:0]


    def run_pipeline(self):
        """Run the complete data preparation pipeline."""
        logger.info("Starting the data preparation pipeline...")
        
        # Load and sample the data
        data = self.get_data()
        
        # Split into train and validation sets
        dataset_dict = self.prepare_train_test_datasets(data)
        logger.info(dataset_dict)
        
        # Upload the dataset to Hugging Face Hub
        self.upload_to_hf(dataset_dict, NER_DATASET_REPO_ID)
        
        logger.info("Pipeline completed successfully.")