opennlp-grpc/opennlp-grpc-api/opennlp.proto (106 lines of code) (raw):
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
syntax = "proto3";
option java_package = "opennlp";
option java_outer_classname = "OpenNLPService";
package opennlp;
service PosTaggerService {
// Assigns the sentence of tokens POS tags.
rpc Tag(TagRequest) returns (StringList);
// Assigns the sentence of tokens POS tags with additional (string-based) context.
rpc TagWithContext(TagWithContextRequest) returns (StringList);
// Returns the available models which can be used for POS tagging.
rpc GetAvailableModels(Empty) returns (AvailableModels);
}
message TagRequest {
repeated string sentence = 1;
POSTagFormat format = 2;
string model_hash = 3;
}
message TagWithContextRequest {
repeated string sentence = 1;
repeated string additional_context = 2;
POSTagFormat format = 3;
string model_hash = 4;
}
service TokenizerTaggerService {
// Splits a sentence into its atomic parts.
rpc Tokenize(TokenizeRequest) returns (StringList);
// Finds the boundaries of atomic parts in a string.
rpc TokenizePos(TokenizePosRequest) returns (SpanList);
// Returns the available models which can be used for tokenization tagging.
rpc GetAvailableModels(Empty) returns (AvailableModels);
}
message TokenizeRequest {
string sentence = 1;
string model_hash = 2;
}
message TokenizePosRequest {
string sentence = 1;
string model_hash = 2;
}
service SentenceDetectorService {
// Detects sentences in a character sequence.
rpc sentDetect(SentDetectRequest) returns (StringList);
// Detects sentences in a character sequence.
rpc sentPosDetect(SentDetectPosRequest) returns (SpanList);
// Returns the available models which can be used for sentence detection.
rpc GetAvailableModels(Empty) returns (AvailableModels);
}
message SentDetectRequest {
string sentence = 1;
string model_hash = 2;
}
message SentDetectPosRequest {
string sentence = 1;
string model_hash = 2;
}
// Generic parts
message StringList {
repeated string values = 1;
}
message SpanList {
repeated Span values = 1;
}
message Span {
int32 start = 1;
int32 end = 2;
double prob = 3;
string type = 4;
}
message AvailableModels {
repeated Model models = 1;
}
enum POSTagFormat {
// Universal Dependencies format (current opennlp-models)
UD = 0;
// Penn Treebank format (deprecated). Use UD instead. Converting from UD to PENN isn't a lossless operation.
PENN = 1;
// Unknown tag format
UNKNOWN = 2;
// Custom-defined tag format
CUSTOM = 3;
}
message Model{
string hash = 1;
string name = 2;
string locale = 3;
}
message Empty {
}