gemini/sample-apps/genwealth/api/prospectus.ts (115 lines of code) (raw):

import { Storage } from '@google-cloud/storage'; import { SearchServiceClient, DocumentServiceClient, protos } from '@google-cloud/discoveryengine'; import { v4 as uuidv4 } from 'uuid'; import { Database } from './database'; /** */ export class Prospectus { private readonly storageClient: Storage; private readonly searchClient: SearchServiceClient; private readonly bucketName: string; private readonly metadataBucketName: string; constructor(private db: Database) { this.storageClient = new Storage(); this.searchClient = new SearchServiceClient(); this.bucketName = process.env['PROSPECTUS_BUCKET'] ?? ''; if (this.bucketName === '') throw new Error("PROSPECTUS_BUCKET environment variable not set"); this.metadataBucketName = this.bucketName + "-metadata"; } /** Upload a prospectus and generate the metadata for indexing in Vertex AI Search & Conversation. */ async upload(buffer: Buffer, filename: string, ticker: string) { ticker = ticker.toUpperCase(); const prospectusBlob = this.storageClient.bucket(this.bucketName).file(filename); await prospectusBlob.save(buffer); const metadata = this.getMetadata(prospectusBlob.cloudStorageURI.href, ticker); const metadataBlob = this.storageClient.bucket(this.metadataBucketName).file(`${ticker}.jsonl`); await metadataBlob.save(JSON.stringify(metadata)); console.log(`Uploaded ${filename} to ${this.bucketName}`); this.importDocument(metadataBlob.cloudStorageURI.href); } async search(query: string, ticker: string) { ticker = ticker.toUpperCase(); const request = { pageSize: 5, query: query, contentSearchSpec: { summarySpec: { summaryResultCount: 5, ignoreAdversarialQuery: true, includeCitations: false, modelSpec: { version: 'preview', }, }, snippetSpec: { returnSnippet: false }, extractiveContentSpec: { maxExtractiveAnswerCount: 1 } }, filter: `ticker: ANY(\"${ticker}\")`, servingConfig: await this.getParent(), }; // Perform search request const response = await this.searchClient.search(request, { autoPaginate: false, }); const summaryObject = response[2].summary; const summary: string = summaryObject?.summaryText ?? ''; console.log(summary); return summary; } async getTickers(): Promise<string[]> { const query = 'SELECT DISTINCT(ticker) FROM langchain_vector_store'; try { const rows = await this.db.query(query); return rows.map((row) => row.ticker); } catch (error) { throw new Error(`getTickers errored with query: ${query}.\nError: ${(error as Error)?.message}`); } } private getMetadata(gcsPath: string, ticker: string) { return { id: uuidv4(), structData: { ticker: ticker }, content: { mimeType: "application/pdf", uri: gcsPath } }; } private async importDocument(gsUri: string) { const docsClient = new DocumentServiceClient(); const importMode = protos.google.cloud.discoveryengine.v1.ImportDocumentsRequest.ReconciliationMode.INCREMENTAL const projectId = process.env['PROJECT_ID'] ?? await this.searchClient.getProjectId(); const dataStoreId = process.env['DATASTORE_ID']; if (!dataStoreId) { throw new Error('DATASTORE_ID environment variable not set'); } const parent = `projects/${projectId}/locations/global/dataStores/${dataStoreId}/branches/default_branch`; const request = { parent: parent, gcsSource: { dataSchema: "document", inputUris: [gsUri] }, reconciliationMode: importMode }; await docsClient.importDocuments(request); console.log('imported', gsUri); } private async getParent() { const projectId = process.env['PROJECT_ID'] ?? await this.searchClient.getProjectId(); console.log('search using project id', projectId ); const dataStoreId = process.env['DATASTORE_ID']; if (!dataStoreId) { throw new Error('DATASTORE_ID environment variable not set'); } const searchServingConfig = this.searchClient.projectLocationCollectionDataStoreServingConfigPath( projectId, "global", "default_collection", dataStoreId, "default_search" ); return searchServingConfig; } }