utils/topic-model-label-evaluation/topic_model_label_eval.sql (74 lines of code) (raw):
-- Declare all variables at the start
DECLARE project_id STRING;
DECLARE dataset_id STRING;
DECLARE table_name STRING;
DECLARE insights_table STRING;
DECLARE query_string STRING;
-- Set variables
SET project_id = 'gsd-ccai-insights-offering';
SET dataset_id = 'ccai_insights_export';
SET table_name = 'test_export';
-- Concatenate to create fully qualified table name
SET insights_table = CONCAT(project_id, '.', dataset_id, '.', table_name);
-- Construct the query string
SET query_string = '''
WITH IssuesAgg AS (
SELECT
conversationName,
ARRAY_AGG(issues_unnested ORDER BY issues_unnested.score DESC) AS issues_array
FROM
''' || insights_table || ''',
UNNEST(issues) AS issues_unnested
GROUP BY
conversationName
),
LabelsAgg AS (
SELECT
conversationName,
STRING_AGG(TO_JSON_STRING(labels_unnested)) AS labels_json
FROM
''' || insights_table || ''',
UNNEST(labels) AS labels_unnested
GROUP BY
conversationName
)
SELECT
main.conversationName,
transcript,
turnCount,
agent.agentId,
-- Extract top topic name from the first element in the ordered array
JSON_EXTRACT_SCALAR(TO_JSON_STRING(issues_array[SAFE_OFFSET(0)]), '$.name') AS Top_Topic,
-- Extract top topic score from the first element in the ordered array and cast to FLOAT64
CAST(JSON_EXTRACT_SCALAR(TO_JSON_STRING(issues_array[SAFE_OFFSET(0)]), '$.score') AS FLOAT64) AS Top_Topic_scores,
-- Extract names of other topics and concatenate them into a single string
ARRAY_TO_STRING(
ARRAY(
SELECT
JSON_EXTRACT_SCALAR(TO_JSON_STRING(issue), '$.name')
FROM
UNNEST(issues_array) AS issue
WHERE
issue != issues_array[SAFE_OFFSET(0)] -- Exclude the top topic
),
', '
) AS Other_topics,
-- Extract scores of other topics, convert to string, and concatenate into a single string
ARRAY_TO_STRING(
ARRAY(
SELECT
CAST(JSON_EXTRACT_SCALAR(TO_JSON_STRING(issue), '$.score') AS STRING)
FROM
UNNEST(issues_array) AS issue
WHERE
issue != issues_array[SAFE_OFFSET(0)] -- Exclude the top topic
),
', '
) AS Other_topics_scores,
l.labels_json,
FARM_FINGERPRINT(main.conversationName) AS random_seed -- Create a random seed based on conversationName
FROM
''' || insights_table || ''' main
LEFT JOIN
UNNEST(main.agents) AS agent -- Unnest the agents array to access agentId
LEFT JOIN
IssuesAgg i ON main.conversationName = i.conversationName
LEFT JOIN
LabelsAgg l ON main.conversationName = l.conversationName
ORDER BY
random_seed -- Use the deterministic random seed for ordering
LIMIT 150;
''';
-- Execute the dynamically constructed query
EXECUTE IMMEDIATE query_string;