private static Model extractModel()

in modules/ml-ext/ml/spark-model-parser/src/main/java/org/apache/ignite/ml/sparkmodelparser/SparkModelParser.java [99:207]


    private static Model extractModel(String pathToMdl, SupportedSparkModels parsedSparkMdl,
        LearningEnvironment learningEnvironment) {
        File mdlDir = IgniteUtils.resolveIgnitePath(pathToMdl);

        if (mdlDir == null) {
            String msg = "Directory not found or empty [directory_path=" + pathToMdl + "]";
            learningEnvironment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
            throw new IllegalArgumentException(msg);
        }

        if (!mdlDir.isDirectory()) {
            String msg = "Spark Model Parser supports loading from directory only. " +
                "The specified path " + pathToMdl + " is not the path to directory.";
            learningEnvironment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
            throw new IllegalArgumentException(msg);
        }

        String[] files = mdlDir.list();
        if (files.length == 0) {
            String msg = "Directory contain 0 files and sub-directories [directory_path=" + pathToMdl + "]";
            learningEnvironment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
            throw new IllegalArgumentException(msg);
        }

        if (Arrays.stream(files).noneMatch("data"::equals)) {
            String msg = "Directory should contain data sub-directory [directory_path=" + pathToMdl + "]";
            learningEnvironment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
            throw new IllegalArgumentException(msg);
        }

        if (Arrays.stream(files).noneMatch("metadata"::equals)) {
            String msg = "Directory should contain metadata sub-directory [directory_path=" + pathToMdl + "]";
            learningEnvironment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
            throw new IllegalArgumentException(msg);
        }

        String pathToData = pathToMdl + File.separator + "data";
        File dataDir = IgniteUtils.resolveIgnitePath(pathToData);

        File[] dataParquetFiles = dataDir.listFiles((dir, name) -> name.matches("^part-.*\\.snappy\\.parquet$"));
        if (dataParquetFiles.length == 0) {
            String msg = "Directory should contain parquet file " +
                "with model [directory_path=" + pathToData + "]";
            learningEnvironment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
            throw new IllegalArgumentException(msg);
        }

        if (dataParquetFiles.length > 1) {
            String msg = "Directory should contain only one parquet file " +
                "with model [directory_path=" + pathToData + "]";
            learningEnvironment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
            throw new IllegalArgumentException(msg);
        }

        String pathToMdlFile = dataParquetFiles[0].getPath();

        String pathToMetadata = pathToMdl + File.separator + "metadata";
        File metadataDir = IgniteUtils.resolveIgnitePath(pathToMetadata);
        String[] metadataFiles = metadataDir.list();

        if (Arrays.stream(metadataFiles).noneMatch("part-00000"::equals)) {
            String msg = "Directory should contain json file with model metadata " +
                "with name part-00000 [directory_path=" + pathToMetadata + "]";
            learningEnvironment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
            throw new IllegalArgumentException(msg);
        }

        try {
            validateMetadata(pathToMetadata, parsedSparkMdl, learningEnvironment);
        }
        catch (FileNotFoundException e) {
            String msg = "Directory should contain json file with model metadata " +
                "with name part-00000 [directory_path=" + pathToMetadata + "]";
            learningEnvironment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
            throw new IllegalArgumentException(msg);
        }

        if (shouldContainTreeMetadataSubDirectory(parsedSparkMdl)) {
            if (Arrays.stream(files).noneMatch("treesMetadata"::equals)) {
                String msg = "Directory should contain treeMetadata sub-directory [directory_path=" + pathToMdl + "]";
                learningEnvironment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
                throw new IllegalArgumentException(msg);
            }

            String pathToTreesMetadata = pathToMdl + File.separator + "treesMetadata";
            File treesMetadataDir = IgniteUtils.resolveIgnitePath(pathToTreesMetadata);

            File[] treesMetadataParquetFiles = treesMetadataDir.listFiles((dir, name) -> name.matches("^part-.*\\.snappy\\.parquet$"));
            if (treesMetadataParquetFiles.length == 0) {
                String msg = "Directory should contain parquet file " +
                    "with model treesMetadata [directory_path=" + pathToTreesMetadata + "]";
                learningEnvironment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
                throw new IllegalArgumentException(msg);
            }

            if (treesMetadataParquetFiles.length > 1) {
                String msg = "Directory should contain only one parquet file " +
                    "with model [directory_path=" + pathToTreesMetadata + "]";
                learningEnvironment.logger().log(MLLogger.VerboseLevel.HIGH, msg);
                throw new IllegalArgumentException(msg);
            }

            String pathToTreesMetadataFile = treesMetadataParquetFiles[0].getPath();

            return parseDataWithMetadata(pathToMdlFile, pathToTreesMetadataFile, parsedSparkMdl, learningEnvironment);
        }
        else
            return parseData(pathToMdlFile, parsedSparkMdl, learningEnvironment);
    }