protected void run()

in gobblin-runtime/src/main/java/org/apache/gobblin/runtime/StreamModelTaskRunner.java [79:170]


  protected void run() throws Exception {
    long maxWaitInMinute = taskState.getPropAsLong(ConfigurationKeys.FORK_MAX_WAIT_MININUTES, ConfigurationKeys.DEFAULT_FORK_MAX_WAIT_MININUTES);
    long initialDelay = taskState.getPropAsLong(ConfigurationKeys.FORK_FINISHED_CHECK_INTERVAL, ConfigurationKeys.DEFAULT_FORK_FINISHED_CHECK_INTERVAL);

    // Get the fork operator. By default IdentityForkOperator is used with a single branch.
    ForkOperator forkOperator = closer.register(this.taskContext.getForkOperator());

    RecordStreamWithMetadata<?, ?> stream = this.extractor.recordStream(this.shutdownRequested);
    // This prevents emitting records until a connect() call is made on the connectable stream
    ConnectableFlowable connectableStream = stream.getRecordStream().publish();

    // The cancel is not propagated to the extractor's record generator when it has been turned into a hot Flowable
    // by publish, and in the case that extractor stuck in reading record when cancel get called,
    // we directly call shutdown to force it instead of setting the shutdownRequested flag on cancel to stop the extractor
    Flowable streamWithShutdownOnCancel = connectableStream.doOnCancel(this.extractor::shutdown);

    stream = stream.withRecordStream(streamWithShutdownOnCancel);

    stream = stream.mapRecords(r -> {
      this.task.onRecordExtract();
      return r;
    });
    if (this.task.isStreamingTask()) {

      // Start watermark manager and tracker
      if (this.watermarkTracker.isPresent()) {
        this.watermarkTracker.get().start();
      }
      this.watermarkManager.get().start();

      ((StreamingExtractor) this.taskContext.getRawSourceExtractor()).start(this.watermarkStorage.get());

      stream = stream.mapRecords(r -> {
        AcknowledgableWatermark ackableWatermark = new AcknowledgableWatermark(r.getWatermark());
        if (watermarkTracker.isPresent()) {
          watermarkTracker.get().track(ackableWatermark);
        }
        r.addCallBack(ackableWatermark);
        return r;
      });
    }

    // Use the recordStreamProcessor list if it is configured. This list can contain both all RecordStreamProcessor types
    if (!this.recordStreamProcessors.isEmpty()) {
      for (RecordStreamProcessor streamProcessor : this.recordStreamProcessors) {
        stream = streamProcessor.processStream(stream, this.taskState);
      }
    } else {
      if (this.converter instanceof MultiConverter) {
        // if multiconverter, unpack it
        for (Converter cverter : ((MultiConverter) this.converter).getConverters()) {
          stream = cverter.processStream(stream, this.taskState);
        }
      } else {
        stream = this.converter.processStream(stream, this.taskState);
      }
    }
    stream = this.rowChecker.processStream(stream, this.taskState);

    Forker.ForkedStream<?, ?> forkedStreams = new Forker().forkStream(stream, forkOperator, this.taskState);

    boolean isForkAsync = !this.task.areSingleBranchTasksSynchronous(this.taskContext) || forkedStreams.getForkedStreams().size() > 1;
    int bufferSize =
        this.taskState.getPropAsInt(ConfigurationKeys.FORK_RECORD_QUEUE_CAPACITY_KEY, ConfigurationKeys.DEFAULT_FORK_RECORD_QUEUE_CAPACITY);

    for (int fidx = 0; fidx < forkedStreams.getForkedStreams().size(); fidx ++) {
      RecordStreamWithMetadata<?, ?> forkedStream = forkedStreams.getForkedStreams().get(fidx);
      if (forkedStream != null) {
        if (isForkAsync) {
          forkedStream = forkedStream.mapStream(f -> f.observeOn(Schedulers.from(this.taskExecutor.getForkExecutor()), false, bufferSize));
        }
        Fork fork = new Fork(this.taskContext, forkedStream.getGlobalMetadata().getSchema(), forkedStreams.getForkedStreams().size(), fidx, this.taskMode);
        fork.consumeRecordStream(forkedStream);
        this.forks.put(Optional.of(fork), Optional.of(Futures.immediateFuture(null)));
        this.task.configureStreamingFork(fork);
      }
    }
    Thread thread = new Thread(() -> connectableStream.connect());
    thread.setName(this.getClass().getSimpleName());
    //Log uncaught exceptions (e.g.OOMEs) to prevent threads from dying silently
    LoggingUncaughtExceptionHandler exceptionHandler = new LoggingUncaughtExceptionHandler(Optional.absent());
    thread.setUncaughtExceptionHandler(exceptionHandler);
    thread.start();
    if (!ExponentialBackoff.awaitCondition().callable(() ->
        this.forks.keySet().stream().map(Optional::get).allMatch(Fork::isDone) || exceptionHandler.getException() != null).
        initialDelay(initialDelay).maxDelay(initialDelay).maxWait(TimeUnit.MINUTES.toMillis(maxWaitInMinute)).await()) {
      throw new TimeoutException("Forks did not finish withing specified timeout.");
    }
    if (exceptionHandler.getException() != null) {
      throw Throwables.propagate(exceptionHandler.getException());
    }
  }