utils/create_notebook_table.py (54 lines of code) (raw):
import pandas as pd
GITHUB_PATH_PREFIX = "huggingface/setfit/blob/main/notebooks/"
CHAPTER_TO_NB = {
"Text Classification (Multiclass)": "text-classification",
"Text Classification (Multilabel)": "text-classification_multilabel",
"Zero-Shot Text Classification": "zero-shot-classification",
"Hyperparameter search": "text-classification_hyperparameter-search",
}
def _find_text_in_file(filename, start_prompt, end_prompt):
"""
Find the text in `filename` between a line beginning with `start_prompt` and before `end_prompt`, removing empty
lines.
Copied from: https://github.com/huggingface/transformers/blob/16f0b7d72c6d4e122957392c342b074aa2c5c519/utils/check_table.py#L30
"""
with open(filename, "r", encoding="utf-8", newline="\n") as f:
lines = f.readlines()
# Find the start prompt.
start_index = 0
while not lines[start_index].startswith(start_prompt):
start_index += 1
start_index += 1
end_index = start_index
while not lines[end_index].startswith(end_prompt):
end_index += 1
end_index -= 1
while len(lines[start_index]) <= 1:
start_index += 1
while len(lines[end_index]) <= 1:
end_index -= 1
end_index += 1
return "".join(lines[start_index:end_index]), start_index, end_index, lines
def create_table():
data = {"Notebook": [], "Colab": [], "Kaggle": [], "Gradient": [], "Studio Lab": []}
for title, nb in CHAPTER_TO_NB.items():
nb_path = f"{GITHUB_PATH_PREFIX}{nb}.ipynb"
data["Notebook"].append(title)
data["Colab"].append(
f"[](https://colab.research.google.com/github/{nb_path})"
)
data["Kaggle"].append(
f"[](https://kaggle.com/kernels/welcome?src=https://github.com/{nb_path})"
)
data["Gradient"].append(
f"[](https://console.paperspace.com/github/{nb_path})"
)
data["Studio Lab"].append(
f"[](https://studiolab.sagemaker.aws/import/github/{nb_path})"
)
return pd.DataFrame(data).to_markdown(index=False) + "\n"
def main():
table = create_table()
_, start_index, end_index, lines = _find_text_in_file(
filename="notebooks/README.md",
start_prompt="<!--This table is automatically generated, do not fill manually!-->",
end_prompt="<!--End of table-->",
)
with open("notebooks/README.md", "w", encoding="utf-8", newline="\n") as f:
f.writelines(lines[:start_index] + [table] + lines[end_index:])
if __name__ == "__main__":
main()