def _generate_examples()

in src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py [0:0]


    def _generate_examples(self, files, metadata_files, add_metadata, add_labels):
        sample_idx = 0
        if add_metadata:
            feature_paths = []

            def find_feature_path(feature, feature_path):
                nonlocal feature_paths
                if feature_path and isinstance(feature, self.BASE_FEATURE):
                    feature_paths.append(feature_path)

            _visit_with_path(self.info.features, find_feature_path)

            for original_metadata_file, downloaded_metadata_file in metadata_files:
                metadata_ext = os.path.splitext(original_metadata_file or downloaded_metadata_file)[-1]
                downloaded_metadata_dir = os.path.dirname(downloaded_metadata_file)

                def set_feature(item, feature_path: _VisitPath):
                    if len(feature_path) == 2 and isinstance(feature_path[0], str) and feature_path[1] == 0:
                        item[feature_path[0]] = item.pop("file_names", None) or item.pop(
                            feature_path[0] + "_file_names", None
                        )
                    elif len(feature_path) == 1 and isinstance(feature_path[0], str):
                        item[feature_path[0]] = item.pop("file_name", None) or item.pop(
                            feature_path[0] + "_file_name", None
                        )
                    elif len(feature_path) == 0:
                        file_relpath = os.path.normpath(item).replace("\\", "/")
                        item = os.path.join(downloaded_metadata_dir, file_relpath)
                    return item

                for pa_metadata_table in self._read_metadata(downloaded_metadata_file, metadata_ext=metadata_ext):
                    for sample in pa_metadata_table.to_pylist():
                        for feature_path in feature_paths:
                            _nested_apply(sample, feature_path, set_feature)
                        yield sample_idx, sample
                        sample_idx += 1
        else:
            if self.config.filters is not None:
                filter_expr = (
                    pq.filters_to_expression(self.config.filters)
                    if isinstance(self.config.filters, list)
                    else self.config.filters
                )
            for original_file, downloaded_file_or_dir in files:
                downloaded_files = [downloaded_file_or_dir] if original_file else downloaded_file_or_dir
                for downloaded_file in downloaded_files:
                    original_file_ext = os.path.splitext(original_file or downloaded_file)[-1]
                    if original_file_ext.lower() not in self.EXTENSIONS:
                        continue
                    sample = {self.BASE_COLUMN_NAME: downloaded_file}
                    if add_labels:
                        sample["label"] = os.path.basename(os.path.dirname(original_file or downloaded_file))
                    if self.config.filters is not None:
                        pa_table = pa.Table.from_pylist([sample]).filter(filter_expr)
                        if len(pa_table) == 0:
                            continue
                    yield sample_idx, sample
                    sample_idx += 1