def load_dataset()

in obelics/visualization/web_document_and_filtering_visualization.py [0:0]


    def load_dataset(self):
        st.header("Select the size of the dataset")

        self.full_dataset = load_from_disk(self.path_web_documents_dataset)

        # Useful the first time we load the full dataset to add a column
        # indicating the original IDs of the documents
        # self.full_dataset = self.full_dataset.add_column("original_idx", [i for i in range(len(self.full_dataset))])
        # self.full_dataset.save_to_disk(self.path_web_documents_dataset)

        opt_sizes = ["100", "300", "1000", "3000", "10000"]
        size_dataset = st.selectbox(
            "Select the size of the dataset",
            options=opt_sizes,
        )

        for opt_size in opt_sizes:
            if size_dataset == opt_size:
                self.full_dataset = self.full_dataset.select(
                    [_ for _ in range(min(int(opt_size), self.full_dataset.num_rows))]
                )
                if "retained_web_document_dataset" not in st.session_state:
                    st.session_state.retained_web_document_dataset = None
                if "discarded_web_document_dataset" not in st.session_state:
                    st.session_state.discarded_web_document_dataset = None