async function inspectGcsFileSampling()

in dlp/inspectGcsFileWithSampling.js [53:145]


  async function inspectGcsFileSampling() {
    // Specify the GCS file to be inspected and sampling configuration
    const storageItemConfig = {
      cloudStorageOptions: {
        fileSet: {url: gcsUri},
        bytesLimitPerFile: 200,
        filesLimitPercent: 90,
        fileTypes: [DLP.protos.google.privacy.dlp.v2.FileType.TEXT_FILE],
        sampleMethod:
          DLP.protos.google.privacy.dlp.v2.CloudStorageOptions.SampleMethod
            .RANDOM_START,
      },
    };

    // Specify how the content should be inspected.
    const inspectConfig = {
      infoTypes: infoTypes,
      minLikelihood: DLP.protos.google.privacy.dlp.v2.Likelihood.POSSIBLE,
      includeQuote: true,
      excludeInfoTypes: true,
    };

    // Specify the action that is triggered when the job completes.
    const actions = [
      {
        pubSub: {
          topic: `projects/${projectId}/topics/${topicId}`,
        },
      },
    ];

    // Create the request for the job configured above.
    const request = {
      parent: `projects/${projectId}/locations/global`,
      inspectJob: {
        inspectConfig: inspectConfig,
        storageConfig: storageItemConfig,
        actions: actions,
      },
    };

    // Use the client to send the request.
    const [topicResponse] = await pubsub.topic(topicId).get();

    // Verify the Pub/Sub topic and listen for job notifications via an
    // existing subscription.
    const subscription = await topicResponse.subscription(subscriptionId);

    const [jobsResponse] = await dlp.createDlpJob(request);
    const jobName = jobsResponse.name;
    // Watch the Pub/Sub topic until the DLP job finishes
    await new Promise((resolve, reject) => {
      // Set up the timeout
      const timer = setTimeout(() => {
        reject(new Error('Timeout'));
      }, DLP_JOB_WAIT_TIME);

      const messageHandler = message => {
        if (message.attributes && message.attributes.DlpJobName === jobName) {
          message.ack();
          subscription.removeListener('message', messageHandler);
          subscription.removeListener('error', errorHandler);
          clearTimeout(timer);
          resolve(jobName);
        } else {
          message.nack();
        }
      };

      const errorHandler = err => {
        subscription.removeListener('message', messageHandler);
        subscription.removeListener('error', errorHandler);
        clearTimeout(timer);
        reject(err);
      };

      subscription.on('message', messageHandler);
      subscription.on('error', errorHandler);
    });
    const [job] = await dlp.getDlpJob({name: jobName});
    console.log(`Job ${job.name} status: ${job.state}`);

    const infoTypeStats = job.inspectDetails.result.infoTypeStats;
    if (infoTypeStats.length > 0) {
      infoTypeStats.forEach(infoTypeStat => {
        console.log(
          `  Found ${infoTypeStat.count} instance(s) of infoType ${infoTypeStat.infoType.name}.`
        );
      });
    } else {
      console.log('No findings.');
    }
  }