in modules/ml-ext/ml/spark-model-parser/src/main/java/org/apache/ignite/ml/sparkmodelparser/SparkModelParser.java [99:207]
private static Model extractModel(String pathToMdl, SupportedSparkModels parsedSparkMdl,
LearningEnvironment learningEnvironment) {
File mdlDir = IgniteUtils.resolveIgnitePath(pathToMdl);
if (mdlDir == null) {
String msg = "Directory not found or empty [directory_path=" + pathToMdl + "]";
learningEnvironment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
throw new IllegalArgumentException(msg);
}
if (!mdlDir.isDirectory()) {
String msg = "Spark Model Parser supports loading from directory only. " +
"The specified path " + pathToMdl + " is not the path to directory.";
learningEnvironment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
throw new IllegalArgumentException(msg);
}
String[] files = mdlDir.list();
if (files.length == 0) {
String msg = "Directory contain 0 files and sub-directories [directory_path=" + pathToMdl + "]";
learningEnvironment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
throw new IllegalArgumentException(msg);
}
if (Arrays.stream(files).noneMatch("data"::equals)) {
String msg = "Directory should contain data sub-directory [directory_path=" + pathToMdl + "]";
learningEnvironment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
throw new IllegalArgumentException(msg);
}
if (Arrays.stream(files).noneMatch("metadata"::equals)) {
String msg = "Directory should contain metadata sub-directory [directory_path=" + pathToMdl + "]";
learningEnvironment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
throw new IllegalArgumentException(msg);
}
String pathToData = pathToMdl + File.separator + "data";
File dataDir = IgniteUtils.resolveIgnitePath(pathToData);
File[] dataParquetFiles = dataDir.listFiles((dir, name) -> name.matches("^part-.*\\.snappy\\.parquet$"));
if (dataParquetFiles.length == 0) {
String msg = "Directory should contain parquet file " +
"with model [directory_path=" + pathToData + "]";
learningEnvironment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
throw new IllegalArgumentException(msg);
}
if (dataParquetFiles.length > 1) {
String msg = "Directory should contain only one parquet file " +
"with model [directory_path=" + pathToData + "]";
learningEnvironment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
throw new IllegalArgumentException(msg);
}
String pathToMdlFile = dataParquetFiles[0].getPath();
String pathToMetadata = pathToMdl + File.separator + "metadata";
File metadataDir = IgniteUtils.resolveIgnitePath(pathToMetadata);
String[] metadataFiles = metadataDir.list();
if (Arrays.stream(metadataFiles).noneMatch("part-00000"::equals)) {
String msg = "Directory should contain json file with model metadata " +
"with name part-00000 [directory_path=" + pathToMetadata + "]";
learningEnvironment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
throw new IllegalArgumentException(msg);
}
try {
validateMetadata(pathToMetadata, parsedSparkMdl, learningEnvironment);
}
catch (FileNotFoundException e) {
String msg = "Directory should contain json file with model metadata " +
"with name part-00000 [directory_path=" + pathToMetadata + "]";
learningEnvironment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
throw new IllegalArgumentException(msg);
}
if (shouldContainTreeMetadataSubDirectory(parsedSparkMdl)) {
if (Arrays.stream(files).noneMatch("treesMetadata"::equals)) {
String msg = "Directory should contain treeMetadata sub-directory [directory_path=" + pathToMdl + "]";
learningEnvironment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
throw new IllegalArgumentException(msg);
}
String pathToTreesMetadata = pathToMdl + File.separator + "treesMetadata";
File treesMetadataDir = IgniteUtils.resolveIgnitePath(pathToTreesMetadata);
File[] treesMetadataParquetFiles = treesMetadataDir.listFiles((dir, name) -> name.matches("^part-.*\\.snappy\\.parquet$"));
if (treesMetadataParquetFiles.length == 0) {
String msg = "Directory should contain parquet file " +
"with model treesMetadata [directory_path=" + pathToTreesMetadata + "]";
learningEnvironment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
throw new IllegalArgumentException(msg);
}
if (treesMetadataParquetFiles.length > 1) {
String msg = "Directory should contain only one parquet file " +
"with model [directory_path=" + pathToTreesMetadata + "]";
learningEnvironment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
throw new IllegalArgumentException(msg);
}
String pathToTreesMetadataFile = treesMetadataParquetFiles[0].getPath();
return parseDataWithMetadata(pathToMdlFile, pathToTreesMetadataFile, parsedSparkMdl, learningEnvironment);
}
else
return parseData(pathToMdlFile, parsedSparkMdl, learningEnvironment);
}