public ConlluSentence read()

in opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluStream.java [62:143]


  public ConlluSentence read() throws IOException {
    String sentence = sentenceStream.read();

    if (sentence != null) {
      List<ConlluWordLine> wordLines = new ArrayList<>();

      BufferedReader reader = new BufferedReader(new StringReader(sentence));

      boolean newDocument = false;
      boolean newParagraph = false;
      String documentId = null;
      String paragraphId = null;
      String sentenceId = null;
      String text = null;
      Map<Locale, String> textLang = null;
      String translit = null;

      String line;
      while ((line = reader.readLine())  != null) {
        // # indicates a comment line and contains additional data
        if (line.trim().startsWith("#")) {
          String commentLine = line.trim().substring(1);

          int separator = commentLine.indexOf('=');

          if (separator != -1) {
            String firstPart = commentLine.substring(0, separator).trim();
            String secondPart = commentLine.substring(separator + 1, commentLine.length()).trim();

            if (!secondPart.isEmpty()) {
              switch (firstPart) {
                case "newdoc id":
                  newDocument = true;
                  documentId = secondPart;
                  break;
                case "newpar id":
                  newParagraph = true;
                  paragraphId = secondPart;
                  break;
                case "sent_id":
                  sentenceId = secondPart;
                  break;
                case "text":
                  text = secondPart;
                  break;
                case "translit":
                  translit = secondPart;
                  break;
              }
            }

            if (firstPart.startsWith("text_")) {
              if (textLang == null) {
                textLang = new HashMap<>();
              }
              addTextLang(firstPart, secondPart, textLang);
            }
          }
          else {
            switch (commentLine.trim()) {
              case "newdoc":
                newDocument = true;
                break;
              case "newpar":
                newParagraph = true;
                break;
            }
          }
        }
        else {
          wordLines.add(new ConlluWordLine(line));
        }
      }

      wordLines = postProcessContractions(wordLines);

      return new ConlluSentence(wordLines, sentenceId, text, newDocument, documentId, newParagraph,
              paragraphId, textLang, translit);
    }

    return null;
  }