public ObjectStream create()

in opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStreamFactory.java [104:169]


  public ObjectStream<NameSample> create(String[] args) {
    if (args == null) {
      throw new IllegalArgumentException("Passed args must not be null!");
    }
    Parameters params = ArgumentParser.parse(args, Parameters.class);

    if (notNull(params.getRuleBasedTokenizer(), params.getTokenizerModel())) {
      throw new TerminateToolException(-1, "Either use rule based or statistical tokenizer!");
    }

    AnnotationConfiguration annConfig;
    try {
      annConfig = AnnotationConfiguration.parse(params.getAnnotationConfig());
    } catch (IOException e) {
      throw new TerminateToolException(1, "Failed to parse annotation.conf file!");
    }

    // TODO: Add an optional parameter to search recursive
    // TODO: How to handle the error here ? terminate the tool? not nice if used by API!
    ObjectStream<BratDocument> samples;
    try {
      samples = new BratDocumentStream(annConfig,
          params.getBratDataDir(), params.getRecursive(), null);
    } catch (IOException e) {
      throw new TerminateToolException(-1, e.getMessage());
    }

    SentenceDetector sentDetector;
    if (params.getSentenceDetectorModel() != null) {
      try {
        sentDetector = new SentenceDetectorME(new SentenceModel(params.getSentenceDetectorModel()));
      } catch (IOException e) {
        throw new TerminateToolException(-1, "Failed to load sentence detector model!", e);
      }
    } else {
      sentDetector = new NewlineSentenceDetector();
    }

    Tokenizer tokenizer = WhitespaceTokenizer.INSTANCE;
    if (params.getTokenizerModel() != null) {
      try {
        tokenizer = new TokenizerME(new TokenizerModel(params.getTokenizerModel()));
      } catch (IOException e) {
        throw new TerminateToolException(-1, "Failed to load tokenizer model!", e);
      }
    } else if (params.getRuleBasedTokenizer() != null) {
      String tokenizerName = params.getRuleBasedTokenizer();
      if ("simple".equals(tokenizerName)) {
        tokenizer = SimpleTokenizer.INSTANCE;
      } else if ("whitespace".equals(tokenizerName)) {
        tokenizer = WhitespaceTokenizer.INSTANCE;
      } else {
        throw new TerminateToolException(-1, "Unknown tokenizer: " + tokenizerName);
      }
    }

    Set<String> nameTypes = null;
    if (params.getNameTypes() != null) {
      String[] nameTypesArr = params.getNameTypes().split(",");
      if (nameTypesArr.length > 0) {
        nameTypes = Arrays.stream(nameTypesArr).map(String::trim).collect(Collectors.toSet());
      }
    }

    return new BratNameSampleStream(sentDetector, tokenizer, samples, nameTypes);
  }