### <font color='#4285f4'>Overview</font>

Generates customer reviews for menu items, encompassing positive, neutral, and negative sentiments.

Process Flow:

1. Use Gemini to create customer based upon random countries near France
    * a. Customer id
    * b. Customer name
    * c. Customer year of birth
    * d. Customer email
    * e. Customer inception date
    * f. Customer country code
2. Verify that there are no gaps
3. Use Gemini to create Python Faker code
    * a. Faker will generate the same customer fields. It does not do as good as job as Gemini, but is much faster
    * b. Do a BigQuery bulk insert of the data

Cost:
* Low: Gemini, BigQuery
* Medium: Remember to stop your Colab Enterprise Notebook Runtime

Author: 
* Adam Paternostro

### <font color='#4285f4'>License</font>

In [None]:
##################################################################################
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
###################################################################################

### <font color='#4285f4'>Pip Installs</font>

In [None]:
# PIP Installs
import sys

# https://PLACEHOLDER.com/index.html
!{sys.executable} -m pip install faker

### <font color='#4285f4'>Initialize</font>

In [None]:
from PIL import Image
import IPython.display
import google.auth
import requests
import json
import uuid
import base64
import os
import cv2
import random
import time
import datetime
import base64
import random

import logging
from tenacity import retry, wait_exponential, stop_after_attempt, before_sleep_log, retry_if_exception

In [None]:
# Set these (run this cell to verify the output)

bigquery_location = "${bigquery_location}"
region = "${region}"
location = "${location}"
storage_account = "${chocolate_ai_bucket}"
public_storage_storage_account = "data-analytics-golden-demo"
table_name = "customer"
dataset_name = "${bigquery_chocolate_ai_dataset}"

# Get the current date and time
now = datetime.datetime.now()

# Format the date and time as desired
formatted_date = now.strftime("%Y-%m-%d-%H-%M")

# Get some values using gcloud
project_id = !(gcloud config get-value project)
user = !(gcloud auth list --filter=status:ACTIVE --format="value(account)")

if len(project_id) != 1:
  raise RuntimeError(f"project_id is not set: {project_id}")
project_id = project_id[0]

if len(user) != 1:
  raise RuntimeError(f"user is not set: {user}")
user = user[0]

print(f"project_id = {project_id}")
print(f"user = {user}")

### <font color='#4285f4'>Helper Methods</font>

#### restAPIHelper
Calls the Google Cloud REST API using the current users credentials.

In [None]:
def restAPIHelper(url: str, http_verb: str, request_body: str) -> str:
  """Calls the Google Cloud REST API passing in the current users credentials"""

  import requests
  import google.auth
  import json

  # Get an access token based upon the current user
  creds, project = google.auth.default()
  auth_req = google.auth.transport.requests.Request()
  creds.refresh(auth_req)
  access_token=creds.token

  headers = {
    "Content-Type" : "application/json",
    "Authorization" : "Bearer " + access_token
  }

  if http_verb == "GET":
    response = requests.get(url, headers=headers)
  elif http_verb == "POST":
    response = requests.post(url, json=request_body, headers=headers)
  elif http_verb == "PUT":
    response = requests.put(url, json=request_body, headers=headers)
  elif http_verb == "PATCH":
    response = requests.patch(url, json=request_body, headers=headers)
  elif http_verb == "DELETE":
    response = requests.delete(url, headers=headers)
  else:
    raise RuntimeError(f"Unknown HTTP verb: {http_verb}")

  if response.status_code == 200:
    return json.loads(response.content)
    #image_data = json.loads(response.content)["predictions"][0]["bytesBase64Encoded"]
  else:
    error = f"Error restAPIHelper -> ' Status: '{response.status_code}' Text: '{response.text}'"
    raise RuntimeError(error)

#### RetryCondition (for retrying LLM calls)

In [None]:
def RetryCondition(error):
  error_string = str(error)
  print(error_string)

  retry_errors = [
      "RESOURCE_EXHAUSTED",
      "No content in candidate",
      # Add more error messages here as needed
  ]

  for retry_error in retry_errors:
    if retry_error in error_string:
      print("Retrying...")
      return True

  return False

#### Gemini LLM (Pro 1.0 , Pro 1.5)

In [None]:
@retry(wait=wait_exponential(multiplier=1, min=1, max=60), stop=stop_after_attempt(10), retry=retry_if_exception(RetryCondition), before_sleep=before_sleep_log(logging.getLogger(), logging.INFO))
def GeminiLLM(prompt, model = "gemini-2.0-flash", response_schema = None,
                 temperature = 1, topP = 1, topK = 32):

  # https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#supported_models
  # model = "gemini-2.0-flash"

  llm_response = None
  if temperature < 0:
    temperature = 0

  creds, project = google.auth.default()
  auth_req = google.auth.transport.requests.Request() # required to acess access token
  creds.refresh(auth_req)
  access_token=creds.token

  headers = {
      "Content-Type" : "application/json",
      "Authorization" : "Bearer " + access_token
  }

  # https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference
  url = f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/publishers/google/models/{model}:generateContent"

  generation_config = {
    "temperature": temperature,
    "topP": topP,
    "maxOutputTokens": 8192,
    "candidateCount": 1,
    "responseMimeType": "application/json",
  }

  # Add inthe response schema for when it is provided
  if response_schema is not None:
    generation_config["responseSchema"] = response_schema

  if model == "gemini-2.0-flash":
    generation_config["topK"] = topK

  payload = {
    "contents": {
      "role": "user",
      "parts": {
          "text": prompt
      },
    },
    "generation_config": {
      **generation_config
    },
    "safety_settings": {
      "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
      "threshold": "BLOCK_LOW_AND_ABOVE"
    }
  }

  response = requests.post(url, json=payload, headers=headers)

  if response.status_code == 200:
    try:
      json_response = json.loads(response.content)
    except Exception as error:
      raise RuntimeError(f"An error occurred parsing the JSON: {error}")

    if "candidates" in json_response:
      candidates = json_response["candidates"]
      if len(candidates) > 0:
        candidate = candidates[0]
        if "content" in candidate:
          content = candidate["content"]
          if "parts" in content:
            parts = content["parts"]
            if len(parts):
              part = parts[0]
              if "text" in part:
                text = part["text"]
                llm_response = text
              else:
                raise RuntimeError("No text in part: {response.content}")
            else:
              raise RuntimeError("No parts in content: {response.content}")
          else:
            raise RuntimeError("No parts in content: {response.content}")
        else:
          raise RuntimeError("No content in candidate: {response.content}")
      else:
        raise RuntimeError("No candidates: {response.content}")
    else:
      raise RuntimeError("No candidates: {response.content}")

    # Remove some typically response characters (if asking for a JSON reply)
    llm_response = llm_response.replace("```json","")
    llm_response = llm_response.replace("```","")
    llm_response = llm_response.replace("\n","")

    return llm_response

  else:
    raise RuntimeError(f"Error with prompt:'{prompt}'  Status:'{response.status_code}' Text:'{response.text}'")

#### Gemini LLM - Multimodal

In [None]:
@retry(wait=wait_exponential(multiplier=1, min=1, max=60), stop=stop_after_attempt(10), retry=retry_if_exception(RetryCondition), before_sleep=before_sleep_log(logging.getLogger(), logging.INFO))
def GeminiLLM_Multimodal(multimodal_prompt_list, model = "gemini-2.0-flash", response_schema = None,
                 temperature = 1, topP = 1, topK = 32):

  # https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#supported_models
  # model = "gemini-2.0-flash"

  llm_response = None
  if temperature < 0:
    temperature = 0

  creds, project = google.auth.default()
  auth_req = google.auth.transport.requests.Request() # required to acess access token
  creds.refresh(auth_req)
  access_token=creds.token

  headers = {
      "Content-Type" : "application/json",
      "Authorization" : "Bearer " + access_token
  }

  # https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference
  url = f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/publishers/google/models/{model}:generateContent"

  generation_config = {
    "temperature": temperature,
    "topP": topP,
    "maxOutputTokens": 8192,
    "candidateCount": 1,
    "responseMimeType": "application/json",
  }

  # Add inthe response schema for when it is provided
  if response_schema is not None:
    generation_config["responseSchema"] = response_schema

  if model == "gemini-2.0-flash":
    generation_config["topK"] = topK

  payload = {
    "contents": {
      "role": "user",
      "parts": multimodal_prompt_list
    },
    "generation_config": {
      **generation_config
    },
    "safety_settings": {
      "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
      "threshold": "BLOCK_LOW_AND_ABOVE"
    }
  }

  response = requests.post(url, json=payload, headers=headers)

  if response.status_code == 200:
    try:
      json_response = json.loads(response.content)
    except Exception as error:
      raise RuntimeError(f"An error occurred parsing the JSON: {error}")

    if "candidates" in json_response:
      candidates = json_response["candidates"]
      if len(candidates) > 0:
        candidate = candidates[0]
        if "content" in candidate:
          content = candidate["content"]
          if "parts" in content:
            parts = content["parts"]
            if len(parts):
              part = parts[0]
              if "text" in part:
                text = part["text"]
                llm_response = text
              else:
                raise RuntimeError("No text in part: {response.content}")
            else:
              raise RuntimeError("No parts in content: {response.content}")
          else:
            raise RuntimeError("No parts in content: {response.content}")
        else:
          raise RuntimeError("No content in candidate: {response.content}")
      else:
        raise RuntimeError("No candidates: {response.content}")
    else:
      raise RuntimeError("No candidates: {response.content}")

    # Remove some typically response characters (if asking for a JSON reply)
    llm_response = llm_response.replace("```json","")
    llm_response = llm_response.replace("```","")
    llm_response = llm_response.replace("\n","")

    return llm_response

  else:
    raise RuntimeError(f"Error with prompt:'{prompt}'  Status:'{response.status_code}' Text:'{response.text}'")

#### Imagen3 Image Generation

In [None]:
def ImageGen(prompt):
  creds, project = google.auth.default()
  auth_req = google.auth.transport.requests.Request()
  creds.refresh(auth_req)
  access_token=creds.token

  headers = {
      "Content-Type" : "application/json",
      "Authorization" : "Bearer " + access_token
  }

  model_version = "imagen-3.0-generate-001" # imagen-3.0-fast-generate-001
  #model_version = "imagen-3.0-generate-preview-0611" # Preview Access Model

  # https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/image-generation
  # url = f"https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/google/models/imagegeneration:predict"
  url = f"https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/google/models/{model_version}:predict"

  payload = {
    "instances": [
      {
        "prompt": prompt
      }
    ],
    "parameters": {
      "sampleCount": 1,
      "personGeneration" : "dont_allow"  # change to allow_adult for people generation
    }
  }

  response = requests.post(url, json=payload, headers=headers)

  if response.status_code == 200:
    response_json = json.loads(response.content)
    # print(f"Imagen3 response_json: {response_json}")

    if "blocked" in response_json:
      print(f"Blocked: {response_json['blocked']}")

    if "predictions" in response_json:
      image_data = response_json["predictions"][0]["bytesBase64Encoded"]
      image_data = base64.b64decode(image_data)
      filename= str(uuid.uuid4()) + ".png"
      with open(filename, "wb") as f:
        f.write(image_data)
      print(f"Image generated OK.")
      return filename
    else:
      raise RuntimeError(f"No predictions in response: {response.content}")
  else:
    error = f"Error with prompt:'{prompt}'  Status:'{response.status_code}' Text:'{response.text}'"
    raise RuntimeError(error)

#### Helper Functions

In [None]:
def RunQuery(sql):
  import time
  from google.cloud import bigquery
  client = bigquery.Client()

  if (sql.startswith("SELECT") or sql.startswith("WITH")):
      df_result = client.query(sql).to_dataframe()
      return df_result
  else:
    job_config = bigquery.QueryJobConfig(priority=bigquery.QueryPriority.INTERACTIVE)
    query_job = client.query(sql, job_config=job_config)

    # Check on the progress by getting the job's updated state.
    query_job = client.get_job(
        query_job.job_id, location=query_job.location
    )
    print("Job {} is currently in state {} with error result of {}".format(query_job.job_id, query_job.state, query_job.error_result))

    while query_job.state != "DONE":
      time.sleep(2)
      query_job = client.get_job(
          query_job.job_id, location=query_job.location
          )
      print("Job {} is currently in state {} with error result of {}".format(query_job.job_id, query_job.state, query_job.error_result))

    if query_job.error_result == None:
      return True
    else:
      raise Exception(query_job.error_result)

In [None]:
def GetTableSchema(project_id, dataset_name, table_name):
  import io
  from google.cloud import bigquery

  client = bigquery.Client()

  dataset_ref = client.dataset(dataset_name, project=project_id)
  table_ref = dataset_ref.table(table_name)
  table = client.get_table(table_ref)

  f = io.StringIO("")
  client.schema_to_json(table.schema, f)
  return f.getvalue()

In [None]:
def GetDistinctValues(project_id, dataset_name, table_name, field_name):
  from google.cloud import bigquery

  client = bigquery.Client()

  sql = f"""
  SELECT STRING_AGG(DISTINCT {field_name}, "," ) AS result
    FROM `{project_id}.{dataset_name}.{table_name}`
  """

  df_result = client.query(sql).to_dataframe()
  result_str = df_result['result'].iloc[0]
  if result_str is None:
    return ""
  else:
    return result_str

In [None]:
def GetStartingValue(project_id, dataset_name, table_name, field_name):
  from google.cloud import bigquery

  client = bigquery.Client()

  sql = f"""
  SELECT IFNULL(MAX({field_name}),0) + 1 AS result
    FROM `{project_id}.{dataset_name}.{table_name}`
  """

  df_result = client.query(sql).to_dataframe()
  return df_result['result'].iloc[0]

In [None]:
def PrettyPrintJson(json_string):
  json_object = json.loads(json_string)
  json_formatted_str = json.dumps(json_object, indent=2)
  #print(json_formatted_str)
  return json_formatted_str

In [None]:
# This was generated by GenAI

def copy_file_to_gcs(local_file_path, bucket_name, destination_blob_name):
  """Copies a file from a local drive to a GCS bucket.

  Args:
      local_file_path: The full path to the local file.
      bucket_name: The name of the GCS bucket to upload to.
      destination_blob_name: The desired name of the uploaded file in the bucket.

  Returns:
      None
  """

  import os
  from google.cloud import storage

  # Ensure the file exists locally
  if not os.path.exists(local_file_path):
      raise FileNotFoundError(f"Local file '{local_file_path}' not found.")

  # Create a storage client
  storage_client = storage.Client()

  # Get a reference to the bucket
  bucket = storage_client.bucket(bucket_name)

  # Create a blob object with the desired destination path
  blob = bucket.blob(destination_blob_name)

  # Upload the file from the local filesystem
  content_type = ""
  if local_file_path.endswith(".html"):
    content_type = "text/html; charset=utf-8"

  if local_file_path.endswith(".json"):
    content_type = "application/json; charset=utf-8"

  if content_type == "":
    blob.upload_from_filename(local_file_path)
  else:
    blob.upload_from_filename(local_file_path, content_type = content_type)

  print(f"File '{local_file_path}' uploaded to GCS bucket '{bucket_name}' as '{destination_blob_name}.  Content-Type: {content_type}'.")

### <font color='#4285f4'>BigQuery Table</font>

In [None]:
%%bigquery

--drop table if exists `chocolate_ai.chocolate_ai`;
CREATE TABLE IF NOT EXISTS `chocolate_ai.customer`
(
    customer_id INTEGER NOT NULL OPTIONS(description="Primary key."),
    customer_name STRING NOT NULL OPTIONS(description="Name of the customer."),
    customer_yob INT NOT NULL OPTIONS(description="Customer year of birth"),
    customer_email STRING NOT NULL OPTIONS(description="Customer's email address"),
    customer_inception_date DATE NOT NULL OPTIONS(description="Date of first customer interaction date."),
    country_code STRING NOT NULL OPTIONS(description="Country code of the customer"),
)
CLUSTER BY customer_id;

### <font color='#4285f4'>Generate Customer Names</font>

In [None]:
# Write me the json in Â OpenAPI 3.0 schema object for the below object.
# Make all fields required.
#  {
#    "customer_name" : "text",
#    "customer_yob" : 2000,
#    "customer_email" : "text",
#    "customer_inception_date" : "2024-09-19"
#  }
response_schema = {
  "type": "object",
  "required": [
    "customer_name",
    "customer_yob",
    "customer_email",
    "customer_inception_date",
    "country_code"
  ],
  "properties": {
    "customer_name": {
      "type": "string"
    },
    "customer_yob": {
      "type": "integer"
    },
    "customer_email": {
      "type": "string"
    },
    "customer_inception_date": {
      "type": "string",
      "format": "date"
    }
  }
}
table_schema = GetTableSchema(project_id, dataset_name, table_name)
customer_id = GetStartingValue(project_id, dataset_name, table_name, "customer_id")
existing_customer_names = GetDistinctValues(project_id, dataset_name, table_name, "customer_name")

# 11,000 customers to generate

# The generate specific customer ids or to regenerate a customer run this code:
# customer_id = 1
# for customer_id in range(customer_id, customer_id + 1):

for customer_id in range(customer_id, customer_id + 10):
  success = False
  while not success:
    try:
      rand_int = random.randint(1, 100)
      country = "France"
      country_code = "FR"

      if rand_int >= 1 and rand_int <= 50:
        country = "France"
        country_code = "FR"
      elif rand_int > 50 and rand_int <= 75:
        country = "United Kingdom"
        country_code = "GB"
      elif rand_int > 75 and rand_int <= 85:
        country = "Spain"
        country_code = "ES"
      elif rand_int > 85 and rand_int <= 90:
        country = "Switzerland"
        country_code = "CH"
      elif rand_int > 90 and rand_int <= 95:
        country = "Italy"
        country_code = "IT"
      else:
        country = "American"
        country_code = "US"

      prompt = f"""You are a database engineer and need to generate data for a table for the below schema.
      I need you to generate a customer name based in the country {country}.
      The customer name should be unique and should reflect a native name in the customer's country.
      Read the description of each field for valid values.
      The customer_inception_date is a date should be between 2020 and 2024.
      The customer_inception_date should be in the Google Cloud BigQuery format of yyyy-mm-dd.
      The customer_yob shold be between 1950 and 2006.
      Encourage unconventional ideas and fresh perspectives and inspires unique variations when creating the customer's name.

      Here is the table schema:
      <schema>
      {table_schema}
      </schema>

      Here are the existing customer name, do not reuse any of these names:
      <existing_customer_names>
      {existing_customer_names}
      </existing_customer_names>
      """

      # Use LLM to generate data
      customer_response = GeminiLLM(prompt, response_schema=response_schema)

      # Parse response (we know the JSON since we passed it to our LLM)
      customer_response = json.loads(customer_response)
      print(json.dumps(customer_response, indent=2))
      customer_name = customer_response["customer_name"].replace("'","\\'").replace("\n", " ")
      customer_yob = customer_response["customer_yob"]
      customer_email = customer_response["customer_email"].replace("'", "\\'").replace("\n", " ")
      customer_inception_date = customer_response["customer_inception_date"].replace("'", "\\'").replace("\n", " ")

      # Optional, only needed if regenerating a customer
      sql = f"DELETE FROM `{project_id}.{dataset_name}.{table_name}` WHERE customer_id = {customer_id}"
      #RunQuery(sql)

      # Insert to BigQuery
      # Returning a known json schema and then generating an insert statement seems more reliable then having the LLM generating the SQL
      sql = f"""INSERT INTO `{project_id}.{dataset_name}.{table_name}`
      (customer_id, customer_name, customer_yob, customer_email, customer_inception_date, country_code)
      VALUES ({customer_id}, '{customer_name}', {customer_yob}, '{customer_email}', '{customer_inception_date}', '{country_code}')"""

      RunQuery(sql)

      existing_customer_names = existing_customer_names + f",{customer_name}"

      success = True
    except Exception as error:
      print(f"An error occurred: {error}")

### <font color='#4285f4'>Verify Customers</font>

In [None]:
%%bigquery

-- Make sure there are no gaps (for 250 customers or 10,000 - depends on how many you generated)
WITH sequence_table AS (
SELECT id
    FROM UNNEST(GENERATE_ARRAY(1, 250)) AS id
)
SELECT id , customer.customer_id
  FROM sequence_table
        LEFT JOIN `chocolate_ai.customer` as customer
               ON sequence_table.id = customer.customer_id
  WHERE customer.customer_id is null

In [None]:
%%bigquery

-- Check for duplicate names
SELECT *
  FROM `chocolate_ai.customer`
 WHERE customer_name in (SELECT customer_name FROM `chocolate_ai.customer` GROUP BY ALL HAVING COUNT(*) > 1)
 ORDER BY customer_name, customer_id

### <font color='#4285f4'>Faker code</font>

In [None]:
import faker
import random
from datetime import datetime

# Initialize Faker with specific locales
fake_fr = faker.Faker('fr_FR')
fake_gb = faker.Faker('en_GB')
fake_es = faker.Faker('es_ES')
fake_ch = faker.Faker('de_CH')
fake_it = faker.Faker('it_IT')
fake_us = faker.Faker('en_US')

fake_customer_list = []
customer_id = GetStartingValue(project_id, dataset_name, table_name, "customer_id")
date_format = "%Y-%m-%d"

for customer_id in range(customer_id, customer_id + 10):
    rand_int = random.randint(1, 100)

    if rand_int >= 1 and rand_int <= 50:
        fake = fake_fr
        country_code = "FR"
    elif rand_int > 50 and rand_int <= 75:
        fake = fake_gb
        country_code = "GB"
    elif rand_int > 75 and rand_int <= 85:
        fake = fake_es
        country_code = "ES"
    elif rand_int > 85 and rand_int <= 90:
        fake = fake_ch
        country_code = "CH"
    elif rand_int > 90 and rand_int <= 95:
        fake = fake_it
        country_code = "IT"
    else:
        fake = fake_us
        country_code = "US"

    # Generate data using Faker
    customer_name = fake.name()
    customer_yob = random.randint(1950, 2006)
    customer_email = fake.email()

    customer_inception_date = fake.date_between(start_date=datetime.strptime('2020-01-01', date_format), end_date=datetime.strptime('2024-12-31', date_format))

    fake_customer_list.append(
        {
            "customer_id": customer_id,
            "customer_name": customer_name,
            "customer_yob": customer_yob,
            "customer_email": customer_email,
            "customer_inception_date": customer_inception_date,
            "country_code" : country_code
        }
    )

In [None]:
if len(fake_customer_list) > 0 :
  import pandas as pd
  from google.cloud import bigquery

  # Load the events table (in bulk)
  table_id = f"{project_id}.chocolate_ai.{table_name}"

  dataframe = pd.DataFrame(
      pd.DataFrame(fake_customer_list), # Your source data
      columns=[
          "customer_id",
          "customer_name",
          "customer_yob",
          "customer_email",
          "customer_inception_date",
          "country_code"
      ],
  )

  job_config = bigquery.LoadJobConfig(
      schema=[
          bigquery.SchemaField("customer_id", bigquery.enums.SqlTypeNames.INT64, mode="REQUIRED"),
          bigquery.SchemaField("customer_name", bigquery.enums.SqlTypeNames.STRING, mode="REQUIRED"),
          bigquery.SchemaField("customer_yob", bigquery.enums.SqlTypeNames.INT64, mode="REQUIRED"),
          bigquery.SchemaField("customer_email", bigquery.enums.SqlTypeNames.STRING, mode="REQUIRED"),
          bigquery.SchemaField("customer_inception_date", bigquery.enums.SqlTypeNames.DATE, mode="REQUIRED"),
          bigquery.SchemaField("country_code", bigquery.enums.SqlTypeNames.STRING, mode="REQUIRED"),
      ],
      write_disposition="WRITE_APPEND",
  )

  load_client = bigquery.Client()
  job = load_client.load_table_from_dataframe(dataframe, table_id, job_config=job_config)
  job.result()  # Wait for the job to complete.

  table = load_client.get_table(table_id)  # Make an API request.
  print("Loaded {} rows and {} columns to {}".format(len(fake_customer_list), len(table.schema), table_id))

### <font color='#4285f4'>Clean Up</font>

In [None]:
# Placeholder

### <font color='#4285f4'>Reference Links</font>

- [Google.com](https://www.google.com)