async getContent_()

in marketing-analytics/activation/data-tasks-coordinator/src/tasks/gmc/gmc_webpage_fetcher.js [74:118]


  async getContent_(parameters) {
    const records = parameters.records.split('\n');
    const { userAgent, option = {} } = this.config.source;
    const numberOfThreads =
      getProperValue(option.numberOfThreads, NUMBER_OF_THREADS, false);
    const qps = getProperValue(option.qps, QUERIES_PER_SECOND, false);
    const mergeBatchResults = (batchResults) => batchResults;
    const managedSend = apiSpeedControl(
      1, numberOfThreads, qps, mergeBatchResults);

    const fetchSinglePage = async (line, batchId) => {
      const { link, id } = JSON.parse(line);
      const requestOptions = { url: link };
      if (userAgent) {
        requestOptions.headers = { 'User-Agent': userAgent };
      }
      const result = { id, retry: 0, error: '' };
      let retried = false;
      while (true) {
        try {
          const response = await request(requestOptions);
          const dom = new JSDOM(response.data);
          const offer =
            dom.window.document.querySelector('[itemtype="http://schema.org/Offer"]');
          for (let i = 0; i < offer.children.length; i++) {
            const key = offer.children[i].getAttribute('itemprop');
            const value = offer.children[i].getAttribute('content').trim();
            result[key] = value;
          }
          break;
        } catch (error) {
          if (result.retry < 3) {
            result.retry += 1;
            continue;
          }
          result.error = error.message || JSON.stringify(error);
          this.logger.error(result);
          break;
        }
      }
      return JSON.stringify(result);
    };
    const results = await managedSend(fetchSinglePage, records, '');
    return results.join('\n');
  }