in utils/generate_notebooks.py [0:0]
def build_notebook(fname, title, output_dir="."):
"""
Build the notebook for fname with a given title in output_dir.
"""
sections = read_and_split_frameworks(fname)
sections_with_accelerate = [
"chapter3/3", # "Fine-tuning a model with the Trainer API or Keras",
"chapter3/4", # "A full training",
"chapter7/2_pt", # "Token classification (PyTorch)",
"chapter7/3_pt", # "Fine-tuning a masked language model (PyTorch)"
"chapter7/4_pt", # "Translation (PyTorch)"
"chapter7/5_pt", # "Summarization (PyTorch)",
"chapter7/6_pt", # "Training a causal language model from scratch (PyTorch)"
"chapter7/7_pt", # "Question answering (PyTorch)"
]
sections_with_hf_hub = [
"chapter4/3_pt", # "Sharing pretrained models (PyTorch)"
"chapter4/3_tf", # "Sharing pretrained models (TensorFlow)"
"chapter5/5", # "Creating your own dataset"
"chapter7/2_pt", # "Token classification (PyTorch)"
"chapter7/2_tf", # "Token classification (TensorFlow)"
"chapter6/2", # "Training a new tokenizer from an old one"
"chapter7/3_pt", # "Fine-tuning a masked language model (PyTorch)"
"chapter7/3_tf", # "Fine-tuning a masked language model (TensorFlow)"
"chapter7/4_pt", # "Translation (PyTorch)"
"chapter7/4_tf", # "Translation (TensorFlow)"
"chapter7/5_pt", # "Summarization (PyTorch)"
"chapter7/5_tf", # "Summarization (TensorFlow)"
"chapter7/6_pt", # "Training a causal language model from scratch (PyTorch)"
"chapter7/6_tf", # "Training a causal language model from scratch (TensorFlow)"
"chapter7/7_pt", # "Question answering (PyTorch)"
"chapter7/7_tf", # "Question answering (TensorFlow)"
"chapter8/2", # "What to do when you get an error"
]
sections_with_faiss = [
"chapter5/6_pt", # "Semantic search with FAISS (PyTorch)"
"chapter5/6_tf", # "Semantic search with FAISS (TensorFlow)"
]
sections_with_gradio = [
"chapter9/2", # "Building your first demo"
"chapter9/3", # "Understanding the Interface class"
"chapter9/4", # "Sharing demos with others"
"chapter9/5", # "Integrations with the Hugging Face Hub"
"chapter9/6", # "Advanced Interface features"
"chapter9/7", # "Introduction to Blocks"
]
stem = Path(fname).stem
if not isinstance(sections, dict):
contents = [sections]
titles = [title]
fnames = [f"section{stem}.ipynb"]
section_names = [f"{Path(fname).parent.stem}/{stem}"]
else:
contents = []
titles = []
fnames = []
section_names = []
for key, section in sections.items():
contents.append(section)
titles.append(f"{title} ({frameworks[key]})")
fnames.append(f"section{stem}_{key}.ipynb")
section_names.append(f"{Path(fname).parent.stem}/{stem}_{key}")
for title, content, fname, section_name in zip(titles, contents, fnames, section_names):
cells = extract_cells(content)
if len(cells) == 0:
continue
nb_cells = [
nb_cell(f"# {title}", code=False),
nb_cell("Install the Transformers, Datasets, and Evaluate libraries to run this notebook.", code=False),
]
# Install cell
installs = ["!pip install datasets evaluate transformers[sentencepiece]"]
if section_name in sections_with_accelerate:
installs.append("!pip install accelerate")
installs.append("# To run the training on TPU, you will need to uncomment the following line:")
installs.append(
"# !pip install cloud-tpu-client==0.10 torch==1.9.0 https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.9-cp37-cp37m-linux_x86_64.whl"
)
if section_name in sections_with_hf_hub:
installs.append("!apt install git-lfs")
if section_name in sections_with_faiss:
installs.append("!pip install faiss-gpu")
if section_name in sections_with_gradio:
installs.append("!pip install gradio")
nb_cells.append(nb_cell("\n".join(installs)))
if section_name in sections_with_hf_hub:
nb_cells.extend(
[
nb_cell(
"You will need to setup git, adapt your email and name in the following cell.", code=False
),
nb_cell(
'!git config --global user.email "you@example.com"\n!git config --global user.name "Your Name"'
),
nb_cell(
"You will also need to be logged in to the Hugging Face Hub. Execute the following and enter your credentials.",
code=False,
),
nb_cell("from huggingface_hub import notebook_login\n\nnotebook_login()"),
]
)
nb_cells += [convert_to_nb_cell(cell) for cell in cells]
metadata = {"colab": {"name": title, "provenance": []}}
nb_dict = {"cells": nb_cells, "metadata": metadata, "nbformat": 4, "nbformat_minor": 4}
notebook = nbformat.notebooknode.NotebookNode(nb_dict)
os.makedirs(output_dir, exist_ok=True)
nbformat.write(notebook, os.path.join(output_dir, fname), version=4)