project/CelebornBuild.scala (1,459 lines of code) (raw):
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.nio.file.Files
import java.nio.charset.StandardCharsets.UTF_8
import java.util.Locale
import scala.util.Properties
import scala.xml._
import scala.xml.transform._
import com.github.sbt.git.SbtGit.GitKeys._
import org.openapitools.generator.sbt.plugin.OpenApiGeneratorPlugin
import org.openapitools.generator.sbt.plugin.OpenApiGeneratorPlugin.autoImport._
import sbtassembly.AssemblyPlugin.autoImport._
import sbtprotoc.ProtocPlugin.autoImport._
import sbt._
import sbt.Keys._
import Utils._
import CelebornCommonSettings._
// import sbt.Keys.streams
object Dependencies {
val zstdJniVersion = sparkClientProjects.map(_.zstdJniVersion).getOrElse("1.5.7-1")
val lz4JavaVersion = sparkClientProjects.map(_.lz4JavaVersion).getOrElse("1.8.0")
// Dependent library versions
val apLoaderVersion = "3.0-9"
val commonsCompressVersion = "1.4.1"
val commonsCryptoVersion = "1.0.0"
val commonsIoVersion = "2.17.0"
val commonsLoggingVersion = "1.1.3"
val commonsLang3Version = "3.17.0"
val commonsCollectionsVersion = "3.2.2"
val findbugsVersion = "1.3.9"
val guavaVersion = "33.1.0-jre"
val hadoopVersion = "3.3.6"
val awsS3Version = "1.12.532"
val aliyunOssVersion = "3.13.0"
val junitInterfaceVersion = "0.13.3"
// don't forget update `junitInterfaceVersion` when we upgrade junit
val junitVersion = "4.13.2"
val leveldbJniVersion = "1.8"
val log4j2Version = "2.24.3"
val jdkToolsVersion = "0.1"
val metricsVersion = "4.2.25"
val mockitoVersion = "4.11.0"
val nettyVersion = "4.1.118.Final"
val ratisVersion = "3.1.3"
val roaringBitmapVersion = "1.0.6"
val rocksdbJniVersion = "9.10.0"
val jacksonVersion = "2.15.3"
val jakartaActivationApiVersion = "1.2.1"
val scalatestMockitoVersion = "1.17.14"
val scalatestVersion = "3.2.16"
val slf4jVersion = "1.7.36"
val snakeyamlVersion = "2.2"
val snappyVersion = "1.1.10.5"
val mybatisVersion = "3.5.15"
val hikaricpVersion = "4.0.3"
val h2Version = "2.2.224"
val swaggerVersion = "2.2.1"
val swaggerUiVersion = "4.9.1"
val jerseyVersion = "2.39.1"
val jettyVersion = "9.4.56.v20240826"
val jakartaServeletApiVersion = "4.0.4"
val openApiToolsJacksonBindNullableVersion = "0.2.6"
val httpClient5Version = "5.3.1"
val httpCore5Version = "5.2.4"
val jakartaAnnotationApiVersion = "1.3.5"
val picocliVersion = "4.7.6"
val jmhVersion = "1.37"
// For SSL support
val bouncycastleVersion = "1.77"
// Versions for proto
val protocVersion = "3.25.5"
val protoVersion = "3.25.5"
// Tez
val tezVersion = "0.10.2"
val apLoader = "me.bechberger" % "ap-loader-all" % apLoaderVersion
val commonsCompress = "org.apache.commons" % "commons-compress" % commonsCompressVersion
val commonsCrypto = "org.apache.commons" % "commons-crypto" % commonsCryptoVersion excludeAll(
ExclusionRule("net.java.dev.jna", "jna"))
val commonsIo = "commons-io" % "commons-io" % commonsIoVersion
val commonsLang3 = "org.apache.commons" % "commons-lang3" % commonsLang3Version
val commonsLogging = "commons-logging" % "commons-logging" % commonsLoggingVersion
val jdkTools = "com.github.olivergondza" % "maven-jdk-tools-wrapper" % jdkToolsVersion
val findbugsJsr305 = "com.google.code.findbugs" % "jsr305" % findbugsVersion
val guava = "com.google.guava" % "guava" % guavaVersion excludeAll(
ExclusionRule("org.checkerframework", "checker-qual"),
ExclusionRule("org.codehaus.mojo", "animal-sniffer-annotations"),
ExclusionRule("com.google.errorprone", "error_prone_annotations"),
ExclusionRule("com.google.guava", "listenablefuture"),
ExclusionRule("com.google.j2objc", "j2objc-annotations"))
val hadoopClientApi = "org.apache.hadoop" % "hadoop-client-api" % hadoopVersion
val hadoopClientRuntime = "org.apache.hadoop" % "hadoop-client-runtime" % hadoopVersion
val hadoopMapreduceClientApp = "org.apache.hadoop" % "hadoop-mapreduce-client-app" % hadoopVersion excludeAll(
ExclusionRule("io.netty", "netty-transport-native-epoll"),
ExclusionRule("com.google.guava", "guava"),
ExclusionRule("com.fasterxml.jackson.core", "jackson-annotations"),
ExclusionRule("com.fasterxml.jackson.core", "jackson-databind"),
ExclusionRule("jakarta.activation", "jakarta.activation-api"),
ExclusionRule("jline", "jline"),
ExclusionRule("log4j", "log4j"),
ExclusionRule("org.slf4j", "slf4j-log4j12"))
val hadoopAws = "org.apache.hadoop" % "hadoop-aws" % hadoopVersion excludeAll (
ExclusionRule("com.amazonaws", "aws-java-sdk-bundle"))
val awsS3 = "com.amazonaws" % "aws-java-sdk-s3" % awsS3Version
val commonsCollections = "commons-collections" % "commons-collections" % commonsCollectionsVersion
val hadoopAliyun = "org.apache.hadoop" % "hadoop-aliyun" % hadoopVersion
val aliyunOss = "com.aliyun.oss" % "aliyun-sdk-oss" % aliyunOssVersion
val ioDropwizardMetricsCore = "io.dropwizard.metrics" % "metrics-core" % metricsVersion
val ioDropwizardMetricsGraphite = "io.dropwizard.metrics" % "metrics-graphite" % metricsVersion excludeAll (
ExclusionRule("com.rabbitmq", "amqp-client"))
val ioDropwizardMetricsJvm = "io.dropwizard.metrics" % "metrics-jvm" % metricsVersion
val ioNetty = "io.netty" % "netty-all" % nettyVersion excludeAll(
ExclusionRule("io.netty", "netty-handler-ssl-ocsp"))
val leveldbJniGroup = if (System.getProperty("os.name").startsWith("Linux")
&& System.getProperty("os.arch").equals("aarch64")) {
// use org.openlabtesting.leveldbjni on aarch64 platform except MacOS
// org.openlabtesting.leveldbjni requires glibc version 3.4.21
"org.openlabtesting.leveldbjni"
} else {
"org.fusesource.leveldbjni"
}
val leveldbJniAll = leveldbJniGroup % "leveldbjni-all" % leveldbJniVersion
val log4jApi = "org.apache.logging.log4j" % "log4j-api" % log4j2Version
val log4jCore = "org.apache.logging.log4j" % "log4j-core" % log4j2Version
val log4j12Api = "org.apache.logging.log4j" % "log4j-1.2-api" % log4j2Version
val log4jSlf4jImpl = "org.apache.logging.log4j" % "log4j-slf4j-impl" % log4j2Version
val lz4Java = "org.lz4" % "lz4-java" % lz4JavaVersion
val protobufJava = "com.google.protobuf" % "protobuf-java" % protoVersion
val ratisClient = "org.apache.ratis" % "ratis-client" % ratisVersion
val ratisCommon = "org.apache.ratis" % "ratis-common" % ratisVersion
val ratisGrpc = "org.apache.ratis" % "ratis-grpc" % ratisVersion
val ratisMetricsDefault = "org.apache.ratis" % "ratis-metrics-default" % ratisVersion
val ratisNetty = "org.apache.ratis" % "ratis-netty" % ratisVersion
val ratisServer = "org.apache.ratis" % "ratis-server" % ratisVersion
val ratisShell = "org.apache.ratis" % "ratis-shell" % ratisVersion excludeAll(
ExclusionRule("org.slf4j", "slf4j-simple"))
val roaringBitmap = "org.roaringbitmap" % "RoaringBitmap" % roaringBitmapVersion
val rocksdbJni = "org.rocksdb" % "rocksdbjni" % rocksdbJniVersion
val jacksonDatabind = "com.fasterxml.jackson.core" % "jackson-databind" % jacksonVersion
val jacksonCore = "com.fasterxml.jackson.core" % "jackson-core" % jacksonVersion
val jacksonAnnotations = "com.fasterxml.jackson.core" % "jackson-annotations" % jacksonVersion
val jacksonModule = "com.fasterxml.jackson.module" %% "jackson-module-scala" % jacksonVersion
val jacksonDataTypeJsr310 = "com.fasterxml.jackson.datatype" % "jackson-datatype-jsr310" % jacksonVersion
val jacksonDataFormatYam = "com.fasterxml.jackson.dataformat" % "jackson-dataformat-yaml" % jacksonVersion
val jacksonJaxrsBase = "com.fasterxml.jackson.jaxrs" % "jackson-jaxrs-base" % jacksonVersion
val jacksonJaxrsJsonProvider = "com.fasterxml.jackson.jaxrs" % "jackson-jaxrs-json-provider" % jacksonVersion excludeAll (
ExclusionRule("jakarta.activation", "jakarta.activation-api"))
val jacksonModuleJaxbAnnotations = "com.fasterxml.jackson.module" % "jackson-module-jaxb-annotations" % jacksonVersion excludeAll (
ExclusionRule("jakarta.activation", "jakarta.activation-api"))
val jakartaActivationApi = "jakarta.activation" % "jakarta.activation-api" % jakartaActivationApiVersion
val scalaReflect = "org.scala-lang" % "scala-reflect" % projectScalaVersion
val slf4jApi = "org.slf4j" % "slf4j-api" % slf4jVersion
val slf4jJulToSlf4j = "org.slf4j" % "jul-to-slf4j" % slf4jVersion
val slf4jJclOverSlf4j = "org.slf4j" % "jcl-over-slf4j" % slf4jVersion
val snakeyaml = "org.yaml" % "snakeyaml" % snakeyamlVersion
val snappyJava = "org.xerial.snappy" % "snappy-java" % snappyVersion
val zstdJni = "com.github.luben" % "zstd-jni" % zstdJniVersion
val mybatis = "org.mybatis" % "mybatis" % mybatisVersion
val hikaricp = "com.zaxxer" % "HikariCP" % hikaricpVersion
val jettyServer = "org.eclipse.jetty" % "jetty-server" % jettyVersion excludeAll(
ExclusionRule("javax.servlet", "javax.servlet-api"))
val jettyServlet = "org.eclipse.jetty" % "jetty-servlet" % jettyVersion excludeAll(
ExclusionRule("javax.servlet", "javax.servlet-api"))
val jettyProxy = "org.eclipse.jetty" % "jetty-proxy" % jettyVersion
val jakartaServletApi = "jakarta.servlet" % "jakarta.servlet-api" % jakartaServeletApiVersion
val jerseyServer = "org.glassfish.jersey.core" % "jersey-server" % jerseyVersion excludeAll(
ExclusionRule("jakarta.xml.bind", "jakarta.xml.bind-api"))
val jerseyContainerServletCore = "org.glassfish.jersey.containers" % "jersey-container-servlet-core" % jerseyVersion
val jerseyHk2 = "org.glassfish.jersey.inject" % "jersey-hk2" % jerseyVersion
val jerseyMediaJsonJackson = "org.glassfish.jersey.media" % "jersey-media-json-jackson" % jerseyVersion
val jerseyMediaMultipart = "org.glassfish.jersey.media" % "jersey-media-multipart" % jerseyVersion
val swaggerJaxrs2 = "io.swagger.core.v3" % "swagger-jaxrs2" %swaggerVersion excludeAll(
ExclusionRule("com.sun.activation", "jakarta.activation"),
ExclusionRule("org.javassist", "javassist"),
ExclusionRule("jakarta.activation", "jakarta.activation-api"))
val swaggerUi = "org.webjars" % "swagger-ui" % swaggerUiVersion
val openApiToolsJacksonBindNullable = "org.openapitools" % "jackson-databind-nullable" % openApiToolsJacksonBindNullableVersion excludeAll(
ExclusionRule("com.fasterxml.jackson.core", "jackson-databind"))
val httpClient5 = "org.apache.httpcomponents.client5" % "httpclient5" % httpClient5Version
val httpCore5 = "org.apache.httpcomponents.core5" % "httpcore5" % httpCore5Version
val httpCore5H2 = "org.apache.httpcomponents.core5" % "httpcore5-h2" % httpCore5Version
val jakartaAnnotationApi = "jakarta.annotation" % "jakarta.annotation-api" % jakartaAnnotationApiVersion
// Test dependencies
// https://www.scala-sbt.org/1.x/docs/Testing.html
val junitInterface = "com.github.sbt" % "junit-interface" % junitInterfaceVersion
val junit = "junit" % "junit" % junitVersion
val mockitoCore = "org.mockito" % "mockito-core" % mockitoVersion
val mockitoInline = "org.mockito" % "mockito-inline" % mockitoVersion
val scalatestMockito = "org.mockito" %% "mockito-scala-scalatest" % scalatestMockitoVersion
val scalatest = "org.scalatest" %% "scalatest" % scalatestVersion
val h2 = "com.h2database" % "h2" % h2Version
val jerseyTestFrameworkCore = "org.glassfish.jersey.test-framework" % "jersey-test-framework-core" % jerseyVersion
val jerseyTestFrameworkProviderJetty = "org.glassfish.jersey.test-framework.providers" % "jersey-test-framework-provider-jetty" % jerseyVersion excludeAll(
ExclusionRule("org.eclipse.jetty", "jetty-util"),
ExclusionRule("org.eclipse.jetty", "jetty-continuation"))
// SSL support
val bouncycastleBcprovJdk18on = "org.bouncycastle" % "bcprov-jdk18on" % bouncycastleVersion % "test"
val bouncycastleBcpkixJdk18on = "org.bouncycastle" % "bcpkix-jdk18on" % bouncycastleVersion % "test"
// Tez support
val tezCommon = "org.apache.tez" % "tez-common" % tezVersion excludeAll(
ExclusionRule("org.apache.hadoop", "hadoop-annotations"),
ExclusionRule("org.apache.hadoop", "hadoop-yarn-api"),
ExclusionRule("org.apache.hadoop", "hadoop-yarn-common")
)
val tezRuntimeLibrary = "org.apache.tez" % "tez-runtime-library" % tezVersion excludeAll(
ExclusionRule("org.apache.hadoop", "hadoop-annotations"),
ExclusionRule("org.apache.hadoop", "hadoop-yarn-api"),
ExclusionRule("org.apache.hadoop", "hadoop-yarn-common")
)
val tezRuntimeInternals = "org.apache.tez" % "tez-runtime-internals" % tezVersion excludeAll(
ExclusionRule("org.apache.hadoop", "hadoop-annotations"),
ExclusionRule("org.apache.hadoop", "hadoop-yarn-api"),
ExclusionRule("org.apache.hadoop", "hadoop-yarn-common"),
ExclusionRule("org.apache.hadoop", "hadoop-yarn-client"),
ExclusionRule("org.apache.hadoop", "hadoop-yarn-server-common"),
ExclusionRule("org.apache.hadoop", "hadoop-yarn-server-web-proxy")
)
val tezDag = "org.apache.tez" % "tez-dag" % tezVersion excludeAll(
ExclusionRule("org.apache.hadoop", "hadoop-annotations"),
ExclusionRule("org.apache.hadoop", "hadoop-yarn-api"),
ExclusionRule("org.apache.hadoop", "hadoop-yarn-common"),
ExclusionRule("org.apache.hadoop", "hadoop-yarn-client"),
ExclusionRule("org.apache.hadoop", "hadoop-yarn-server-common"),
ExclusionRule("org.apache.hadoop", "hadoop-yarn-server-web-proxy")
)
val tezApi = "org.apache.tez" % "tez-api" % tezVersion excludeAll(
ExclusionRule("org.apache.hadoop", "hadoop-annotations"),
ExclusionRule("org.apache.hadoop", "hadoop-yarn-api"),
ExclusionRule("org.apache.hadoop", "hadoop-yarn-common"),
ExclusionRule("org.apache.hadoop", "hadoop-auth"),
ExclusionRule("org.apache.hadoop", "hadoop-hdfs"),
ExclusionRule("org.apache.hadoop", "hadoop-yarn-client")
)
val hadoopCommon = "org.apache.hadoop" % "hadoop-common" % hadoopVersion excludeAll(
ExclusionRule("com.sun.jersey", "jersey-json"),
ExclusionRule("org.apache.httpcomponents", "httpclient"),
ExclusionRule("org.slf4j", "slf4j-log4j12")
)
val picocli = "info.picocli" % "picocli" % picocliVersion
val jmhCore = "org.openjdk.jmh" % "jmh-core" % jmhVersion % "test"
val jmhGeneratorAnnprocess = "org.openjdk.jmh" % "jmh-generator-annprocess" % jmhVersion % "test"
}
object CelebornCommonSettings {
// Scala versions
val SCALA_2_11_12 = "2.11.12"
val SCALA_2_12_10 = "2.12.10"
val SCALA_2_12_15 = "2.12.15"
val SCALA_2_12_17 = "2.12.17"
val SCALA_2_12_18 = "2.12.18"
val scala213 = "2.13.5"
val scala213_11 = "2.13.11"
val ALL_SCALA_VERSIONS = Seq(SCALA_2_11_12, SCALA_2_12_10, SCALA_2_12_15, SCALA_2_12_17, SCALA_2_12_18, scala213, scala213_11)
val DEFAULT_SCALA_VERSION = SCALA_2_12_18
val projectScalaVersion = defaultScalaVersion()
scalaVersion := projectScalaVersion
autoScalaLibrary := false
// crossScalaVersions must be set to Nil on the root project
crossScalaVersions := Nil
lazy val commonSettings = Seq(
organization := "org.apache.celeborn",
scalaVersion := projectScalaVersion,
crossScalaVersions := ALL_SCALA_VERSIONS,
fork := true,
scalacOptions ++= Seq("-target:jvm-1.8"),
javacOptions ++= Seq("-encoding", UTF_8.name(), "-source", "1.8", "-g"),
Compile / packageBin / packageOptions += Package.ManifestAttributes(
"Build-Jdk-Spec" -> System.getProperty("java.version"),
"Build-Revision" -> gitHeadCommit.value.getOrElse("N/A"),
"Build-Branch" -> gitCurrentBranch.value,
"Build-Time" -> java.time.ZonedDateTime.now().format(java.time.format.DateTimeFormatter.ISO_DATE_TIME)),
// -target cannot be passed as a parameter to javadoc. See https://github.com/sbt/sbt/issues/355
Compile / compile / javacOptions ++= Seq("-target", "1.8"),
dependencyOverrides := Seq(
Dependencies.commonsLogging,
Dependencies.findbugsJsr305,
Dependencies.slf4jApi),
// Make sure any tests in any project that uses Spark is configured for running well locally
Test / javaOptions ++= Seq(
"-Xmx4g",
"-XX:+IgnoreUnrecognizedVMOptions",
"--add-exports=jdk.internal.jvmstat/sun.jvmstat.monitor=ALL-UNNAMED",
"--add-opens=java.base/java.lang=ALL-UNNAMED",
"--add-opens=java.base/java.lang.invoke=ALL-UNNAMED",
"--add-opens=java.base/java.lang.reflect=ALL-UNNAMED",
"--add-opens=java.base/java.io=ALL-UNNAMED",
"--add-opens=java.base/java.net=ALL-UNNAMED",
"--add-opens=java.base/java.nio=ALL-UNNAMED",
"--add-opens=java.base/java.util=ALL-UNNAMED",
"--add-opens=java.base/java.util.concurrent=ALL-UNNAMED",
"--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED",
"--add-opens=java.base/jdk.internal.misc=ALL-UNNAMED",
"--add-opens=java.base/sun.nio.ch=ALL-UNNAMED",
"--add-opens=java.base/sun.nio.cs=ALL-UNNAMED",
"--add-opens=java.base/sun.security.action=ALL-UNNAMED",
"--add-opens=java.base/sun.util.calendar=ALL-UNNAMED",
"--add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED",
"-Dio.netty.tryReflectionSetAccessible=true"
),
testOptions += Tests.Argument("-oF"),
Test / testOptions += Tests.Argument("-oDF"),
Test / testOptions += Tests.Argument(TestFrameworks.JUnit, "-v", "-a"),
// Don't execute in parallel since we can't have multiple Sparks in the same JVM
Test / parallelExecution := false,
javaOptions += "-Xmx4g",
// Configurations to speed up tests and reduce memory footprint
Test / javaOptions ++= Seq(
"-Xmx4g"
),
Test / javaOptions ++= Seq(
"-Dspark.shuffle.sort.io.plugin.class="
+ sys.props.getOrElse("spark.shuffle.plugin.class", "org.apache.spark.shuffle.sort.io.LocalDiskShuffleDataIO"),
),
Test / envVars += ("IS_TESTING", "1")
)
////////////////////////////////////////////////////////
// Release settings //
////////////////////////////////////////////////////////
lazy val releaseSettings = Seq(
publishMavenStyle := true,
publishArtifact := true,
Test / publishArtifact := false,
credentials += {
val host = publishTo.value.map {
case repo: MavenRepo => scala.util.Try(new java.net.URL(repo.root)).map(_.getHost).getOrElse("repository.apache.org")
case _ => "repository.apache.org"
}.get
Credentials(
// Credentials matching is done using both: realm and host keys, sbt/sbt#2366 allows using
// credential without a realm by providing an empty string for realm.
"" /* realm */,
host,
sys.env.getOrElse("ASF_USERNAME", ""),
sys.env.getOrElse("ASF_PASSWORD", ""))
},
publishTo := {
if (isSnapshot.value) {
val publishUrl = sys.env.getOrElse("SONATYPE_SNAPSHOTS_URL", "https://repository.apache.org/content/repositories/snapshots")
Some(("snapshots" at publishUrl).withAllowInsecureProtocol(true))
} else {
val publishUrl = sys.env.getOrElse("SONATYPE_RELEASES_URL", "https://repository.apache.org/service/local/staging/deploy/maven2")
Some(("releases" at publishUrl).withAllowInsecureProtocol(true))
}
},
licenses += ("Apache-2.0", url("https://www.apache.org/licenses/LICENSE-2.0")),
scmInfo := Some(
ScmInfo(
url("https://celeborn.apache.org/"),
"scm:git:https://github.com/apache/celeborn.git",
"scm:git:git@github.com:apache/celeborn.git"))
)
lazy val protoSettings = Seq(
// Setting version for the protobuf compiler
PB.protocVersion := Dependencies.protocVersion,
// set proto sources path
Compile / PB.protoSources := Seq(sourceDirectory.value / "main" / "proto"),
Compile / PB.targets := Seq(PB.gens.java(Dependencies.protocVersion) -> (Compile / sourceManaged).value)
)
lazy val commonUnitTestDependencies = Seq(
Dependencies.mockitoCore % "test",
Dependencies.scalatest % "test",
Dependencies.junit % "test",
// https://www.scala-sbt.org/1.x/docs/Testing.html
Dependencies.junitInterface % "test")
}
object CelebornBuild extends sbt.internal.BuildDef {
override def projectDefinitions(baseDirectory: File): Seq[Project] = {
Seq(
CelebornOpenApi.openApiClientMasterGenerate,
CelebornOpenApi.openApiClientWorkerGenerate,
CelebornOpenApi.openApiClient,
CelebornSpi.spi,
CelebornCommon.common,
CelebornClient.client,
CelebornService.service,
CelebornWorker.worker,
CelebornMaster.master,
CelebornCli.cli
) ++ maybeSparkClientModules ++
maybeFlinkClientModules ++
maybeMRClientModules ++
maybeWebModules ++
maybeCelebornMPUModule ++
maybeTezClientModules
}
// ThisBuild / parallelExecution := false
// scalaVersion := "2.11.12"
// autoScalaLibrary := false
crossScalaVersions := Nil
// load user-defined Profiles
// loadProfiles()
}
object Utils {
val profiles = {
val profiles = Properties.envOrNone("SBT_MAVEN_PROFILES")
.orElse(Properties.propOrNone("sbt.maven.profiles")) match {
case None => Seq("sbt")
case Some(v) =>
v.split("(\\s+|,)").filterNot(_.isEmpty).map(_.trim.replaceAll("-P", "")).toSeq
}
if (profiles.contains("jdwp-test-debug")) {
sys.props.put("test.jdwp.enabled", "true")
}
profiles
}
val celeborMPUProject = profiles.find(p => p.startsWith("aws") || p.startsWith("aliyun")) match {
case Some("aws") => Some(CeleborMPU.celeborMPU)
case Some("aliyun") => Some(CeleborMPU.celeborMPUOss)
case _ => None
}
lazy val maybeCelebornMPUModule: Seq[Project] = celeborMPUProject.map(Seq(_)).getOrElse(Seq.empty)
val SPARK_VERSION = profiles.filter(_.startsWith("spark")).headOption
lazy val sparkClientProjects = SPARK_VERSION match {
case Some("spark-2.4") => Some(Spark24)
case Some("spark-3.0") => Some(Spark30)
case Some("spark-3.1") => Some(Spark31)
case Some("spark-3.2") => Some(Spark32)
case Some("spark-3.3") => Some(Spark33)
case Some("spark-3.4") => Some(Spark34)
case Some("spark-3.5") => Some(Spark35)
case Some("spark-4.0") => Some(Spark40)
case _ => None
}
lazy val maybeSparkClientModules: Seq[Project] = sparkClientProjects.map(_.modules).getOrElse(Seq.empty)
val FLINK_VERSION = profiles.filter(_.startsWith("flink")).headOption
lazy val flinkClientProjects = FLINK_VERSION match {
case Some("flink-1.16") => Some(Flink116)
case Some("flink-1.17") => Some(Flink117)
case Some("flink-1.18") => Some(Flink118)
case Some("flink-1.19") => Some(Flink119)
case Some("flink-1.20") => Some(Flink120)
case Some("flink-2.0") => Some(Flink20)
case _ => None
}
lazy val maybeFlinkClientModules: Seq[Project] = flinkClientProjects.map(_.modules).getOrElse(Seq.empty)
val MR_VERSION = profiles.filter(_.startsWith("mr")).headOption
lazy val mrClientProjects = MR_VERSION match {
case Some("mr") => Some(MRClientProjects)
case _ => None
}
lazy val maybeMRClientModules: Seq[Project] = mrClientProjects.map(_.modules).getOrElse(Seq.empty)
val TEZ_VERSION = profiles.filter(_.startsWith("tez")).headOption
lazy val tezClientProjects = TEZ_VERSION match {
case Some("tez") => Some(TezClientProjects)
case _ => None
}
lazy val maybeTezClientModules: Seq[Project] = tezClientProjects.map(_.modules).getOrElse(Seq.empty)
val WEB_VERSION = profiles.filter(_.startsWith("web")).headOption
lazy val webProjects = WEB_VERSION match {
case Some("web") => Some(WebProjects)
case _ => None
}
lazy val maybeWebModules: Seq[Project] = webProjects.map(_.modules).getOrElse(Seq.empty)
def defaultScalaVersion(): String = {
// 1. Inherit the scala version of the spark project
// 2. if the spark profile not specified, using the DEFAULT_SCALA_VERSION
val v = sparkClientProjects.map(_.sparkProjectScalaVersion).getOrElse(DEFAULT_SCALA_VERSION)
require(ALL_SCALA_VERSIONS.contains(v), s"found not allow scala version: $v")
v
}
/**
* The deps for shaded clients are already packaged in the jar,
* so we should not expose the shipped transitive deps.
*/
def removeDependenciesTransformer: xml.Node => xml.Node = { node =>
new RuleTransformer(new RewriteRule {
override def transform(n: xml.Node): Seq[xml.Node] = n match {
case e: Elem if e.label == "dependencies" =>
Nil
case _ =>
n
}
}).transform(node).head
}
}
object CelebornCli {
lazy val cli = Project("celeborn-cli", file("cli"))
.dependsOn(CelebornCommon.common % "test->test;compile->compile")
.dependsOn(CelebornMaster.master % "test->test;compile->compile")
.dependsOn(CelebornWorker.worker % "test->test;compile->compile")
.dependsOn(CelebornOpenApi.openApiClient % "test->test;compile->compile")
.settings (
commonSettings,
libraryDependencies ++= Seq(
Dependencies.picocli
) ++ commonUnitTestDependencies
)
}
object CelebornSpi {
lazy val spi = Project("celeborn-spi", file("spi"))
.settings(
commonSettings,
releaseSettings,
crossPaths := false,
Compile / doc / javacOptions := Seq("-encoding", UTF_8.name(), "-source", "1.8")
)
}
object CeleborMPU {
lazy val hadoopAwsDependencies = Seq(Dependencies.hadoopAws, Dependencies.awsS3)
lazy val hadoopAliyunDependencies = Seq(Dependencies.commonsCollections, Dependencies.hadoopAliyun, Dependencies.aliyunOss)
lazy val celeborMPU = Project("celeborn-multipart-uploader-s3", file("multipart-uploader/multipart-uploader-s3"))
.dependsOn(CelebornService.service % "test->test;compile->compile")
.settings (
commonSettings,
libraryDependencies ++= Seq(
Dependencies.log4j12Api,
Dependencies.log4jSlf4jImpl,
) ++ hadoopAwsDependencies
)
lazy val celeborMPUOss = Project("celeborn-multipart-uploader-oss", file("multipart-uploader/multipart-uploader-oss"))
.dependsOn(CelebornService.service % "test->test;compile->compile")
.settings (
commonSettings,
libraryDependencies ++= Seq(
Dependencies.log4j12Api,
Dependencies.log4jSlf4jImpl,
) ++ hadoopAliyunDependencies
)
}
object CelebornCommon {
lazy val common = Project("celeborn-common", file("common"))
.dependsOn(CelebornSpi.spi)
.settings (
commonSettings,
protoSettings,
libraryDependencies ++= Seq(
Dependencies.protobufJava,
Dependencies.findbugsJsr305,
Dependencies.guava,
Dependencies.commonsIo,
Dependencies.ioDropwizardMetricsCore,
Dependencies.ioDropwizardMetricsGraphite,
Dependencies.ioDropwizardMetricsJvm,
Dependencies.ioNetty,
Dependencies.commonsCrypto,
Dependencies.commonsLang3,
Dependencies.hadoopClientApi,
Dependencies.hadoopClientRuntime,
Dependencies.jdkTools,
Dependencies.leveldbJniAll,
Dependencies.roaringBitmap,
Dependencies.scalaReflect,
Dependencies.slf4jJclOverSlf4j,
Dependencies.slf4jJulToSlf4j,
Dependencies.slf4jApi,
Dependencies.snakeyaml,
Dependencies.snappyJava,
Dependencies.jacksonModule,
Dependencies.jacksonCore,
Dependencies.jacksonDatabind,
Dependencies.jacksonAnnotations,
Dependencies.log4jSlf4jImpl % "test",
Dependencies.log4j12Api % "test",
// SSL support
Dependencies.bouncycastleBcprovJdk18on,
Dependencies.bouncycastleBcpkixJdk18on
) ++ commonUnitTestDependencies,
Compile / sourceGenerators += Def.task {
val file = (Compile / sourceManaged).value / "org" / "apache" / "celeborn" / "package.scala"
streams.value.log.info(s"geneate version information file ${file.toPath}")
IO.write(file,
s"""package org.apache
|
|package object celeborn {
| val VERSION = "${version.value}"
|}
|""".stripMargin)
Seq(file)
// generate version task depends on PB generate to avoid concurrency generate source files
}.dependsOn(Compile / PB.generate),
// a task to show current profiles
printProfiles := {
val message = profiles.mkString("", " ", "")
println("compile with profiles: %s".format(message))
}
)
lazy val printProfiles = taskKey[Unit]("Prints Profiles")
}
object CelebornClient {
lazy val client = Project("celeborn-client", file("client"))
// ref: https://www.scala-sbt.org/1.x/docs/Multi-Project.html#Classpath+dependencies
.dependsOn(CelebornCommon.common % "test->test;compile->compile")
.settings (
commonSettings,
libraryDependencies ++= Seq(
Dependencies.ioNetty,
Dependencies.guava,
Dependencies.lz4Java,
Dependencies.zstdJni,
Dependencies.commonsLang3,
Dependencies.log4jSlf4jImpl % "test",
Dependencies.log4j12Api % "test"
) ++ commonUnitTestDependencies
)
}
object CelebornService {
lazy val service = Project("celeborn-service", file("service"))
.dependsOn(CelebornCommon.common % "test->test;compile->compile")
.dependsOn(CelebornOpenApi.openApiClient)
.settings (
commonSettings,
libraryDependencies ++= Seq(
Dependencies.findbugsJsr305,
Dependencies.commonsIo,
Dependencies.ioNetty,
Dependencies.commonsCrypto,
Dependencies.slf4jApi,
Dependencies.mybatis,
Dependencies.hikaricp,
Dependencies.jacksonDataFormatYam,
Dependencies.swaggerJaxrs2,
Dependencies.swaggerUi,
Dependencies.jakartaServletApi,
Dependencies.jerseyServer,
Dependencies.jerseyContainerServletCore,
Dependencies.jerseyHk2,
Dependencies.jerseyMediaJsonJackson,
Dependencies.jerseyMediaMultipart,
Dependencies.jettyServer,
Dependencies.jettyServlet,
Dependencies.jettyProxy,
Dependencies.log4jApi,
Dependencies.log4jCore,
Dependencies.log4jSlf4jImpl % "test",
Dependencies.log4j12Api % "test",
Dependencies.h2 % "test",
Dependencies.jerseyTestFrameworkCore % "test",
Dependencies.jerseyTestFrameworkProviderJetty % "test"
) ++ commonUnitTestDependencies
)
}
object CelebornMaster {
val mpuDependencies =
if (profiles.exists(_.startsWith("aws"))) {
CeleborMPU.hadoopAwsDependencies
} else if (profiles.exists(_.startsWith("aliyun"))) {
CeleborMPU.hadoopAliyunDependencies
} else {
Seq.empty
}
lazy val jmhDependencies = Seq(Dependencies.jmhCore, Dependencies.jmhGeneratorAnnprocess)
lazy val master = Project("celeborn-master", file("master"))
.dependsOn(CelebornCommon.common)
.dependsOn(CelebornCommon.common % "test->test;compile->compile")
.dependsOn(CelebornService.service % "test->test;compile->compile")
.settings (
commonSettings,
protoSettings,
libraryDependencies ++= Seq(
Dependencies.guava,
Dependencies.protobufJava,
Dependencies.ioNetty,
Dependencies.hadoopClientApi,
Dependencies.log4j12Api,
Dependencies.log4jSlf4jImpl,
Dependencies.ratisClient,
Dependencies.ratisCommon,
Dependencies.ratisGrpc,
Dependencies.ratisMetricsDefault,
Dependencies.ratisNetty,
Dependencies.ratisServer,
Dependencies.ratisShell,
Dependencies.scalatestMockito % "test",
) ++ commonUnitTestDependencies ++ mpuDependencies ++ jmhDependencies
)
}
object CelebornWorker {
var worker = Project("celeborn-worker", file("worker"))
.dependsOn(CelebornService.service)
.dependsOn(CelebornCommon.common % "test->test;compile->compile")
.dependsOn(CelebornService.service % "test->test;compile->compile")
.dependsOn(CelebornClient.client % "test->compile")
.dependsOn(CelebornMaster.master % "test->compile")
if (profiles.exists(_.startsWith("aws"))) {
worker = worker.dependsOn(CeleborMPU.celeborMPU)
} else if (profiles.exists(_.startsWith("aliyun"))) {
worker = worker.dependsOn(CeleborMPU.celeborMPUOss)
}
worker = worker.settings(
commonSettings,
libraryDependencies ++= Seq(
Dependencies.apLoader,
Dependencies.guava,
Dependencies.commonsIo,
Dependencies.ioNetty,
Dependencies.log4j12Api,
Dependencies.log4jSlf4jImpl,
Dependencies.leveldbJniAll,
Dependencies.roaringBitmap,
Dependencies.rocksdbJni,
Dependencies.scalatestMockito % "test",
Dependencies.jerseyTestFrameworkCore % "test",
Dependencies.jerseyTestFrameworkProviderJetty % "test"
) ++ commonUnitTestDependencies
)
}
////////////////////////////////////////////////////////
// Spark Client //
////////////////////////////////////////////////////////
object Spark24 extends SparkClientProjects {
val sparkClientProjectPath = "client-spark/spark-2"
val sparkClientProjectName = "celeborn-client-spark-2"
val sparkClientShadedProjectPath = "client-spark/spark-2-shaded"
val sparkClientShadedProjectName = "celeborn-client-spark-2-shaded"
// val jacksonVersion = "2.5.7"
// val jacksonDatabindVersion = "2.6.7.3"
val lz4JavaVersion = "1.4.0"
val sparkProjectScalaVersion = "2.11.12"
// scalaBinaryVersion
// val scalaBinaryVersion = "2.11"
val sparkVersion = "2.4.8"
val zstdJniVersion = "1.4.4-3"
override val includeColumnarShuffle: Boolean = false
}
object Spark30 extends SparkClientProjects {
val sparkClientProjectPath = "client-spark/spark-3"
val sparkClientProjectName = "celeborn-client-spark-3"
val sparkClientShadedProjectPath = "client-spark/spark-3-shaded"
val sparkClientShadedProjectName = "celeborn-client-spark-3-shaded"
val lz4JavaVersion = "1.7.1"
val sparkProjectScalaVersion = "2.12.10"
val sparkVersion = "3.0.3"
val zstdJniVersion = "1.4.4-3"
}
object Spark31 extends SparkClientProjects {
val sparkClientProjectPath = "client-spark/spark-3"
val sparkClientProjectName = "celeborn-client-spark-3"
val sparkClientShadedProjectPath = "client-spark/spark-3-shaded"
val sparkClientShadedProjectName = "celeborn-client-spark-3-shaded"
val lz4JavaVersion = "1.7.1"
val sparkProjectScalaVersion = "2.12.10"
val sparkVersion = "3.1.3"
val zstdJniVersion = "1.4.8-1"
}
object Spark32 extends SparkClientProjects {
val sparkClientProjectPath = "client-spark/spark-3"
val sparkClientProjectName = "celeborn-client-spark-3"
val sparkClientShadedProjectPath = "client-spark/spark-3-shaded"
val sparkClientShadedProjectName = "celeborn-client-spark-3-shaded"
val lz4JavaVersion = "1.7.1"
val sparkProjectScalaVersion = "2.12.15"
val sparkVersion = "3.2.4"
val zstdJniVersion = "1.5.0-4"
}
object Spark33 extends SparkClientProjects {
val sparkClientProjectPath = "client-spark/spark-3"
val sparkClientProjectName = "celeborn-client-spark-3"
val sparkClientShadedProjectPath = "client-spark/spark-3-shaded"
val sparkClientShadedProjectName = "celeborn-client-spark-3-shaded"
// val jacksonVersion = "2.13.4"
// val jacksonDatabindVersion = "2.13.4.2"
val lz4JavaVersion = "1.8.0"
val sparkProjectScalaVersion = "2.12.15"
// scalaBinaryVersion
// val scalaBinaryVersion = "2.12"
val sparkVersion = "3.3.4"
val zstdJniVersion = "1.5.2-1"
}
object Spark34 extends SparkClientProjects {
val sparkClientProjectPath = "client-spark/spark-3"
val sparkClientProjectName = "celeborn-client-spark-3"
val sparkClientShadedProjectPath = "client-spark/spark-3-shaded"
val sparkClientShadedProjectName = "celeborn-client-spark-3-shaded"
val lz4JavaVersion = "1.8.0"
val sparkProjectScalaVersion = "2.12.17"
val sparkVersion = "3.4.4"
val zstdJniVersion = "1.5.2-5"
}
object Spark35 extends SparkClientProjects {
val sparkClientProjectPath = "client-spark/spark-3"
val sparkClientProjectName = "celeborn-client-spark-3"
val sparkClientShadedProjectPath = "client-spark/spark-3-shaded"
val sparkClientShadedProjectName = "celeborn-client-spark-3-shaded"
val lz4JavaVersion = "1.8.0"
val sparkProjectScalaVersion = "2.12.18"
val sparkVersion = "3.5.5"
val zstdJniVersion = "1.5.5-4"
override val sparkColumnarShuffleVersion: String = "3.5"
}
object Spark40 extends SparkClientProjects {
val sparkClientProjectPath = "client-spark/spark-3"
val sparkClientProjectName = "celeborn-client-spark-4"
val sparkClientShadedProjectPath = "client-spark/spark-4-shaded"
val sparkClientShadedProjectName = "celeborn-client-spark-4-shaded"
val lz4JavaVersion = "1.8.0"
val sparkProjectScalaVersion = "2.13.11"
val sparkVersion = "4.0.0-preview2"
val zstdJniVersion = "1.5.6-5"
val scalaBinaryVersion = "2.13"
override val sparkColumnarShuffleVersion: String = "4"
}
trait SparkClientProjects {
val sparkClientProjectPath: String
val sparkClientProjectName: String
val sparkClientShadedProjectPath: String
val sparkClientShadedProjectName: String
val lz4JavaVersion: String
val sparkProjectScalaVersion: String
val sparkVersion: String
val zstdJniVersion: String
val includeColumnarShuffle: Boolean = true
def modules: Seq[Project] = {
val seq = Seq(sparkCommon, sparkClient, sparkIt, sparkGroup, sparkClientShade)
if (includeColumnarShuffle) seq ++ Seq(sparkColumnarCommon, sparkColumnarShuffle) else seq
}
// for test only, don't use this group for any other projects
lazy val sparkGroup = {
val p = (project withId "celeborn-spark-group")
.aggregate(sparkCommon, sparkClient, sparkIt)
if (includeColumnarShuffle) {
p.aggregate(sparkColumnarCommon, sparkColumnarShuffle)
} else {
p
}
}
def sparkCommon: Project = {
Project("celeborn-spark-common", file("client-spark/common"))
.dependsOn(CelebornCommon.common)
// ref: https://www.scala-sbt.org/1.x/docs/Multi-Project.html#Classpath+dependencies
.dependsOn(CelebornClient.client % "test->test;compile->compile")
.settings (
commonSettings,
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-core" % sparkVersion % "provided",
"org.apache.spark" %% "spark-sql" % sparkVersion % "provided",
"org.apache.spark" %% "spark-core" % sparkVersion % "test" classifier "tests"
) ++ commonUnitTestDependencies
)
}
def sparkClient: Project = {
Project(sparkClientProjectName, file(sparkClientProjectPath))
.dependsOn(CelebornCommon.common, sparkCommon)
// ref: https://www.scala-sbt.org/1.x/docs/Multi-Project.html#Classpath+dependencies
.dependsOn(CelebornClient.client % "test->test;compile->compile")
.settings (
commonSettings,
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-core" % sparkVersion % "provided",
"org.apache.spark" %% "spark-sql" % sparkVersion % "provided",
) ++ commonUnitTestDependencies ++ Seq(Dependencies.mockitoInline % "test")
)
}
def sparkColumnarCommon: Project = {
Project("celeborn-spark-3-columnar-common", file("client-spark/spark-3-columnar-common"))
// ref: https://www.scala-sbt.org/1.x/docs/Multi-Project.html#Classpath+dependencies
.dependsOn(sparkClient)
.settings(
commonSettings,
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-sql" % sparkVersion % "provided",
)
)
}
val sparkColumnarShuffleVersion: String = "3"
def sparkColumnarShuffle: Project = {
Project("celeborn-spark-3-columnar-shuffle", file(s"client-spark/spark-$sparkColumnarShuffleVersion-columnar-shuffle"))
// ref: https://www.scala-sbt.org/1.x/docs/Multi-Project.html#Classpath+dependencies
.dependsOn(sparkColumnarCommon)
.dependsOn(sparkClient % "test->test;compile->compile")
.dependsOn(CelebornClient.client % "test")
.settings(
commonSettings,
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-sql" % sparkVersion % "provided",
) ++ commonUnitTestDependencies ++ Seq(Dependencies.mockitoInline % "test")
)
}
def sparkIt: Project = {
Project("celeborn-spark-it", file("tests/spark-it"))
// ref: https://www.scala-sbt.org/1.x/docs/Multi-Project.html#Classpath+dependencies
.dependsOn(CelebornCommon.common % "test->test;compile->compile")
.dependsOn(CelebornClient.client % "test->test;compile->compile")
.dependsOn(CelebornMaster.master % "test->test;compile->compile")
.dependsOn(CelebornWorker.worker % "test->test;compile->compile")
.dependsOn(sparkClient % "test->test;compile->compile")
.settings (
commonSettings,
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-core" % sparkVersion % "test",
"org.apache.spark" %% "spark-sql" % sparkVersion % "test",
"org.apache.spark" %% "spark-core" % sparkVersion % "test" classifier "tests" excludeAll(
ExclusionRule("org.glassfish.jersey.inject", "*"),
ExclusionRule("org.glassfish.jersey.core", "*")),
"org.apache.spark" %% "spark-sql" % sparkVersion % "test" classifier "tests"
) ++ commonUnitTestDependencies
)
}
def sparkClientShade: Project = {
var p = Project(sparkClientShadedProjectName, file(sparkClientShadedProjectPath))
.dependsOn(sparkClient)
if (includeColumnarShuffle) {
p = p.dependsOn(sparkColumnarShuffle)
}
p = p.disablePlugins(AddMetaInfLicenseFiles)
.settings (
commonSettings,
releaseSettings,
// align final shaded jar name with maven.
(assembly / assemblyJarName) := {
val extension = artifact.value.extension
s"${moduleName.value}_${scalaBinaryVersion.value}-${version.value}.$extension"
},
(assembly / test) := { },
(assembly / logLevel) := Level.Info,
// Exclude `scala-library` from assembly.
(assembly / assemblyPackageScala / assembleArtifact) := false,
(assembly / assemblyExcludedJars) := {
val cp = (assembly / fullClasspath).value
cp filter { v =>
val name = v.data.getName
!(name.startsWith("celeborn-") ||
name.startsWith("protobuf-java-") ||
name.startsWith("guava-") ||
name.startsWith("failureaccess-") ||
name.startsWith("netty-") ||
name.startsWith("commons-lang3-") ||
name.startsWith("commons-io-") ||
name.startsWith("RoaringBitmap-"))
}
},
(assembly / assemblyShadeRules) := Seq(
ShadeRule.rename("com.google.protobuf.**" -> "org.apache.celeborn.shaded.com.google.protobuf.@1").inAll,
ShadeRule.rename("com.google.common.**" -> "org.apache.celeborn.shaded.com.google.common.@1").inAll,
ShadeRule.rename("io.netty.**" -> "org.apache.celeborn.shaded.io.netty.@1").inAll,
ShadeRule.rename("org.apache.commons.**" -> "org.apache.celeborn.shaded.org.apache.commons.@1").inAll,
ShadeRule.rename("org.roaringbitmap.**" -> "org.apache.celeborn.shaded.org.roaringbitmap.@1").inAll
),
(assembly / assemblyMergeStrategy) := {
case m if m.toLowerCase(Locale.ROOT).endsWith("manifest.mf") => MergeStrategy.discard
// the LicenseAndNoticeMergeStrategy always picks the license/notice file from the current project
case m @ ("META-INF/LICENSE" | "META-INF/NOTICE") => CustomMergeStrategy("LicenseAndNoticeMergeStrategy") { conflicts =>
val entry = conflicts.head
val projectLicenseFile = (Compile / resourceDirectory).value / entry.target
val stream = () => new java.io.BufferedInputStream(new java.io.FileInputStream(projectLicenseFile))
Right(Vector(JarEntry(entry.target, stream)))
}
case PathList(ps@_*) if Assembly.isLicenseFile(ps.last) => MergeStrategy.discard
// Drop all proto files that are not needed as artifacts of the build.
case m if m.toLowerCase(Locale.ROOT).endsWith(".proto") => MergeStrategy.discard
case m if m.toLowerCase(Locale.ROOT).startsWith("meta-inf/native-image") => MergeStrategy.discard
// Drop netty jnilib
case m if m.toLowerCase(Locale.ROOT).endsWith(".jnilib") => MergeStrategy.discard
// rename netty native lib
case "META-INF/native/libnetty_transport_native_epoll_x86_64.so" => CustomMergeStrategy.rename( _ => "META-INF/native/liborg_apache_celeborn_shaded_netty_transport_native_epoll_x86_64.so" )
case "META-INF/native/libnetty_transport_native_epoll_aarch_64.so" => CustomMergeStrategy.rename( _ => "META-INF/native/liborg_apache_celeborn_shaded_netty_transport_native_epoll_aarch_64.so" )
case _ => MergeStrategy.first
},
Compile / packageBin := assembly.value,
pomPostProcess := removeDependenciesTransformer
)
p
}
}
////////////////////////////////////////////////////////
// Flink Client //
////////////////////////////////////////////////////////
object Flink116 extends FlinkClientProjects {
val flinkVersion = "1.16.3"
// note that SBT does not allow using the period symbol (.) in project names.
val flinkClientProjectPath = "client-flink/flink-1.16"
val flinkClientProjectName = "celeborn-client-flink-1_16"
val flinkClientShadedProjectPath: String = "client-flink/flink-1.16-shaded"
val flinkClientShadedProjectName: String = "celeborn-client-flink-1_16-shaded"
}
object Flink117 extends FlinkClientProjects {
val flinkVersion = "1.17.2"
// note that SBT does not allow using the period symbol (.) in project names.
val flinkClientProjectPath = "client-flink/flink-1.17"
val flinkClientProjectName = "celeborn-client-flink-1_17"
val flinkClientShadedProjectPath: String = "client-flink/flink-1.17-shaded"
val flinkClientShadedProjectName: String = "celeborn-client-flink-1_17-shaded"
}
object Flink118 extends FlinkClientProjects {
val flinkVersion = "1.18.1"
// note that SBT does not allow using the period symbol (.) in project names.
val flinkClientProjectPath = "client-flink/flink-1.18"
val flinkClientProjectName = "celeborn-client-flink-1_18"
val flinkClientShadedProjectPath: String = "client-flink/flink-1.18-shaded"
val flinkClientShadedProjectName: String = "celeborn-client-flink-1_18-shaded"
}
object Flink119 extends FlinkClientProjects {
val flinkVersion = "1.19.2"
// note that SBT does not allow using the period symbol (.) in project names.
val flinkClientProjectPath = "client-flink/flink-1.19"
val flinkClientProjectName = "celeborn-client-flink-1_19"
val flinkClientShadedProjectPath: String = "client-flink/flink-1.19-shaded"
val flinkClientShadedProjectName: String = "celeborn-client-flink-1_19-shaded"
}
object Flink120 extends FlinkClientProjects {
val flinkVersion = "1.20.1"
// note that SBT does not allow using the period symbol (.) in project names.
val flinkClientProjectPath = "client-flink/flink-1.20"
val flinkClientProjectName = "celeborn-client-flink-1_20"
val flinkClientShadedProjectPath: String = "client-flink/flink-1.20-shaded"
val flinkClientShadedProjectName: String = "celeborn-client-flink-1_20-shaded"
}
object Flink20 extends FlinkClientProjects {
val flinkVersion = "2.0.0"
// note that SBT does not allow using the period symbol (.) in project names.
val flinkClientProjectPath = "client-flink/flink-2.0"
val flinkClientProjectName = "celeborn-client-flink-2_0"
val flinkClientShadedProjectPath: String = "client-flink/flink-2.0-shaded"
val flinkClientShadedProjectName: String = "celeborn-client-flink-2_0-shaded"
}
trait FlinkClientProjects {
val flinkVersion: String
// note that SBT does not allow using the period symbol (.) in project names.
val flinkClientProjectPath: String
val flinkClientProjectName: String
val flinkClientShadedProjectPath: String
val flinkClientShadedProjectName: String
lazy val flinkStreamingDependency: ModuleID = "org.apache.flink" % "flink-streaming-java" % flinkVersion % "test"
lazy val flinkClientsDependency: ModuleID = "org.apache.flink" % "flink-clients" % flinkVersion % "test"
lazy val flinkRuntimeWebDependency: ModuleID = "org.apache.flink" % "flink-runtime-web" % flinkVersion % "test"
def modules: Seq[Project] = Seq(flinkCommon, flinkClient, flinkIt, flinkGroup, flinkClientShade)
// for test only, don't use this group for any other projects
lazy val flinkGroup = (project withId "celeborn-flink-group")
.aggregate(flinkCommon, flinkClient, flinkIt)
// get flink major version. e.g:
// 1.20.1 -> 1.20
// 1.19.2 -> 1.19
// 1.18.1 -> 1.18
// 1.17.2 -> 1.17
// 1.16.3 -> 1.16
lazy val flinkMajorVersion: String = flinkVersion.split("\\.").take(2).reduce(_ + "." + _)
// the output would be something like: celeborn-client-flink-1.17-shaded_2.12-0.4.0-SNAPSHOT.jar
def flinkClientShadeJarName(
revision: String,
artifact: Artifact,
scalaBinaryVersionString: String): String =
s"celeborn-client-flink-$flinkMajorVersion-shaded_$scalaBinaryVersionString" + "-" + revision + "." + artifact.extension
def flinkCommon: Project = {
Project("celeborn-flink-common", file("client-flink/common"))
.dependsOn(CelebornCommon.common, CelebornClient.client)
.settings (
commonSettings,
libraryDependencies ++= Seq(
"org.apache.flink" % "flink-runtime" % flinkVersion % "provided"
) ++ commonUnitTestDependencies
)
}
def flinkClient: Project = {
Project(flinkClientProjectName, file(flinkClientProjectPath))
.dependsOn(CelebornCommon.common, CelebornClient.client, flinkCommon)
.settings (
commonSettings,
moduleName := s"celeborn-client-flink-$flinkMajorVersion",
libraryDependencies ++= Seq(
"org.apache.flink" % "flink-runtime" % flinkVersion % "provided",
Dependencies.log4jSlf4jImpl % "test",
Dependencies.log4j12Api % "test"
) ++ commonUnitTestDependencies
)
}
def flinkIt: Project = {
Project("celeborn-flink-it", file("tests/flink-it"))
// ref: https://www.scala-sbt.org/1.x/docs/Multi-Project.html#Classpath+dependencies
.dependsOn(CelebornCommon.common % "test->test;compile->compile")
.dependsOn(CelebornClient.client % "test->test;compile->compile")
.dependsOn(CelebornMaster.master % "test->test;compile->compile")
.dependsOn(CelebornWorker.worker % "test->test;compile->compile")
.dependsOn(flinkClient % "test->test;compile->compile")
.settings (
commonSettings,
libraryDependencies ++= Seq(
"org.apache.flink" % "flink-runtime" % flinkVersion % "test",
flinkStreamingDependency,
flinkClientsDependency,
flinkRuntimeWebDependency
) ++ commonUnitTestDependencies,
(Test / envVars) += ("FLINK_VERSION", flinkVersion)
)
}
def flinkClientShade: Project = {
Project(flinkClientShadedProjectName, file(flinkClientShadedProjectPath))
.dependsOn(flinkClient)
.disablePlugins(AddMetaInfLicenseFiles)
.settings (
commonSettings,
releaseSettings,
moduleName := s"celeborn-client-flink-$flinkMajorVersion-shaded",
(assembly / test) := { },
(assembly / assemblyJarName) := {
val revision: String = version.value
val artifactValue: Artifact = artifact.value
flinkClientShadeJarName(revision, artifactValue, scalaBinaryVersion.value)
},
(assembly / logLevel) := Level.Info,
// Exclude `scala-library` from assembly.
(assembly / assemblyPackageScala / assembleArtifact) := false,
(assembly / assemblyExcludedJars) := {
val cp = (assembly / fullClasspath).value
cp filter { v =>
val name = v.data.getName
!(name.startsWith("celeborn-") ||
name.startsWith("protobuf-java-") ||
name.startsWith("guava-") ||
name.startsWith("failureaccess-") ||
name.startsWith("netty-") ||
name.startsWith("commons-lang3-") ||
name.startsWith("RoaringBitmap-"))
}
},
(assembly / assemblyShadeRules) := Seq(
ShadeRule.rename("com.google.protobuf.**" -> "org.apache.celeborn.shaded.com.google.protobuf.@1").inAll,
ShadeRule.rename("com.google.common.**" -> "org.apache.celeborn.shaded.com.google.common.@1").inAll,
ShadeRule.rename("io.netty.**" -> "org.apache.celeborn.shaded.io.netty.@1").inAll,
ShadeRule.rename("org.apache.commons.**" -> "org.apache.celeborn.shaded.org.apache.commons.@1").inAll,
ShadeRule.rename("org.roaringbitmap.**" -> "org.apache.celeborn.shaded.org.roaringbitmap.@1").inAll
),
(assembly / assemblyMergeStrategy) := {
case m if m.toLowerCase(Locale.ROOT).endsWith("manifest.mf") => MergeStrategy.discard
// the LicenseAndNoticeMergeStrategy always picks the license/notice file from the current project
case m @ ("META-INF/LICENSE" | "META-INF/NOTICE") => CustomMergeStrategy("LicenseAndNoticeMergeStrategy") { conflicts =>
val entry = conflicts.head
val projectLicenseFile = (Compile / resourceDirectory).value / entry.target
val stream = () => new java.io.BufferedInputStream(new java.io.FileInputStream(projectLicenseFile))
Right(Vector(JarEntry(entry.target, stream)))
}
case PathList(ps@_*) if Assembly.isLicenseFile(ps.last) => MergeStrategy.discard
// Drop all proto files that are not needed as artifacts of the build.
case m if m.toLowerCase(Locale.ROOT).endsWith(".proto") => MergeStrategy.discard
case m if m.toLowerCase(Locale.ROOT).startsWith("meta-inf/native-image") => MergeStrategy.discard
// Drop netty jnilib
case m if m.toLowerCase(Locale.ROOT).endsWith(".jnilib") => MergeStrategy.discard
// rename netty native lib
case "META-INF/native/libnetty_transport_native_epoll_x86_64.so" => CustomMergeStrategy.rename( _ => "META-INF/native/liborg_apache_celeborn_shaded_netty_transport_native_epoll_x86_64.so" )
case "META-INF/native/libnetty_transport_native_epoll_aarch_64.so" => CustomMergeStrategy.rename( _ => "META-INF/native/liborg_apache_celeborn_shaded_netty_transport_native_epoll_aarch_64.so" )
case _ => MergeStrategy.first
},
Compile / packageBin := assembly.value,
pomPostProcess := removeDependenciesTransformer
)
}
}
////////////////////////////////////////////////////////
// MR Client //
////////////////////////////////////////////////////////
object MRClientProjects {
def mrClient: Project = {
Project("celeborn-client-mr", file("client-mr/mr"))
.dependsOn(CelebornCommon.common, CelebornClient.client)
.settings(
commonSettings,
libraryDependencies ++= Seq(
Dependencies.hadoopClientApi,
Dependencies.hadoopClientRuntime,
Dependencies.hadoopMapreduceClientApp,
Dependencies.jacksonJaxrsJsonProvider,
Dependencies.jakartaActivationApi
) ++ commonUnitTestDependencies,
dependencyOverrides += Dependencies.commonsCompress
)
}
def mrIt: Project = {
Project("celeborn-mr-it", file("tests/mr-it"))
// ref: https://www.scala-sbt.org/1.x/docs/Multi-Project.html#Classpath+dependencies
.dependsOn(CelebornCommon.common % "test->test;compile->compile")
.dependsOn(CelebornClient.client % "test->test;compile->compile")
.dependsOn(CelebornMaster.master % "test->test;compile->compile")
.dependsOn(CelebornWorker.worker % "test->test;compile->compile")
.dependsOn(mrClient % "test->test;compile->compile")
.settings(
commonSettings,
copyDepsSettings,
libraryDependencies ++= Seq(
"org.apache.hadoop" % "hadoop-client-minicluster" % Dependencies.hadoopVersion % "test",
"org.apache.hadoop" % "hadoop-mapreduce-examples" % Dependencies.hadoopVersion % "test",
"org.bouncycastle" % "bcpkix-jdk15on" % "1.68" % "test"
) ++ commonUnitTestDependencies
)
}
def mrClientShade: Project = {
Project("celeborn-client-mr-shaded", file("client-mr/mr-shaded"))
.dependsOn(mrClient)
.disablePlugins(AddMetaInfLicenseFiles)
.settings(
commonSettings,
releaseSettings,
// align final shaded jar name with maven.
(assembly / assemblyJarName) := {
val extension = artifact.value.extension
s"${moduleName.value}_${scalaBinaryVersion.value}-${version.value}.$extension"
},
(assembly / test) := {},
(assembly / logLevel) := Level.Info,
// include `scala-library` from assembly.
(assembly / assemblyPackageScala / assembleArtifact) := true,
(assembly / assemblyExcludedJars) := {
val cp = (assembly / fullClasspath).value
cp filter { v =>
val name = v.data.getName
!(name.startsWith("celeborn-") ||
name.startsWith("protobuf-java-") ||
name.startsWith("guava-") ||
name.startsWith("failureaccess-") ||
name.startsWith("netty-") ||
name.startsWith("commons-lang3-") ||
name.startsWith("RoaringBitmap-") ||
name.startsWith("lz4-java-") ||
name.startsWith("zstd-jni-") ||
name.startsWith("scala-library-"))
}
},
(assembly / assemblyShadeRules) := Seq(
ShadeRule.rename("com.google.protobuf.**" -> "org.apache.celeborn.shaded.com.google.protobuf.@1").inAll,
ShadeRule.rename("com.google.common.**" -> "org.apache.celeborn.shaded.com.google.common.@1").inAll,
ShadeRule.rename("io.netty.**" -> "org.apache.celeborn.shaded.io.netty.@1").inAll,
ShadeRule.rename("org.apache.commons.**" -> "org.apache.celeborn.shaded.org.apache.commons.@1").inAll,
ShadeRule.rename("org.roaringbitmap.**" -> "org.apache.celeborn.shaded.org.roaringbitmap.@1").inAll
),
(assembly / assemblyMergeStrategy) := {
case m if m.toLowerCase(Locale.ROOT).endsWith("manifest.mf") => MergeStrategy.discard
// For netty-3.x.y.Final.jar
case m if m.startsWith("META-INF/license/") => MergeStrategy.discard
// the LicenseAndNoticeMergeStrategy always picks the license/notice file from the current project
case m @ ("META-INF/LICENSE" | "META-INF/NOTICE") => CustomMergeStrategy("LicenseAndNoticeMergeStrategy") { conflicts =>
val entry = conflicts.head
val projectLicenseFile = (Compile / resourceDirectory).value / entry.target
val stream = () => new java.io.BufferedInputStream(new java.io.FileInputStream(projectLicenseFile))
Right(Vector(JarEntry(entry.target, stream)))
}
case PathList(ps@_*) if Assembly.isLicenseFile(ps.last) => MergeStrategy.discard
// Drop all proto files that are not needed as artifacts of the build.
case m if m.toLowerCase(Locale.ROOT).endsWith(".proto") => MergeStrategy.discard
case m if m.toLowerCase(Locale.ROOT).startsWith("meta-inf/native-image") => MergeStrategy.discard
// Drop netty jnilib
case m if m.toLowerCase(Locale.ROOT).endsWith(".jnilib") => MergeStrategy.discard
// rename netty native lib
case "META-INF/native/libnetty_transport_native_epoll_x86_64.so" => CustomMergeStrategy.rename(_ => "META-INF/native/liborg_apache_celeborn_shaded_netty_transport_native_epoll_x86_64.so")
case "META-INF/native/libnetty_transport_native_epoll_aarch_64.so" => CustomMergeStrategy.rename(_ => "META-INF/native/liborg_apache_celeborn_shaded_netty_transport_native_epoll_aarch_64.so")
case _ => MergeStrategy.first
},
Compile / packageBin := assembly.value,
pomPostProcess := removeDependenciesTransformer
)
}
def modules: Seq[Project] = {
Seq(mrClient, mrIt, mrGroup, mrClientShade)
}
// for test only, don't use this group for any other projects
lazy val mrGroup = (project withId "celeborn-mr-group").aggregate(mrClient, mrIt)
val copyDeps = TaskKey[Unit]("copyDeps", "Copies needed dependencies to the build directory.")
val destPath = (Compile / crossTarget) {
_ / "mapreduce_lib"
}
lazy val copyDepsSettings = Seq(
copyDeps := {
val dest = destPath.value
if (!dest.isDirectory() && !dest.mkdirs()) {
throw new java.io.IOException("Failed to create jars directory.")
}
(Compile / dependencyClasspath).value.map(_.data)
.filter { jar => jar.isFile() }
.foreach { jar =>
val destJar = new File(dest, jar.getName())
if (destJar.isFile()) {
destJar.delete()
}
Files.copy(jar.toPath(), destJar.toPath())
}
},
(Test / compile) := {
copyDeps.value
(Test / compile).value
}
)
}
object CelebornOpenApi {
val openApiSpecDir = "openapi/openapi-client/src/main/openapi3"
val openApiMasterInternalOutputDir = "openapi/openapi-client/target/master/generated-sources/java"
val openApiWorkerInternalOutputDir = "openapi/openapi-client/target/worker/generated-sources/java"
val openApiClientOutputDir = "openapi/openapi-client/src/main/java"
val generate = TaskKey[Unit]("generate", "generate openapi client code")
val check = TaskKey[Unit]("check", "check the openapi spec and generated code")
val commonOpenApiClientGenerateSettings = Seq(
openApiGeneratorName := "java",
openApiGenerateApiTests := SettingDisabled,
openApiGenerateModelTests := SettingDisabled,
openApiModelPackage := "org.apache.celeborn.rest.v1.model",
openApiAdditionalProperties := Map(
"dateLibrary" -> "java8",
"useGzipFeature" -> "true",
"library" -> "apache-httpclient",
"hideGenerationTimestamp" -> "true",
"supportUrlQuery" -> "false",
"annotationLibrary" -> "none",
"templateDir" -> s"$openApiSpecDir/templates",
"useEnumCaseInsensitive" -> "true"
)
)
lazy val openApiClientMasterGenerate = Project("celeborn-openapi-client-master-generate", file("openapi/openapi-client/target/master"))
.enablePlugins(OpenApiGeneratorPlugin)
.settings(
commonSettings,
openApiInputSpec := (file(openApiSpecDir) / "master_rest_v1.yaml").toString,
openApiOutputDir := openApiMasterInternalOutputDir,
openApiApiPackage := "org.apache.celeborn.rest.v1.master",
openApiInvokerPackage := "org.apache.celeborn.rest.v1.master.invoker",
commonOpenApiClientGenerateSettings
)
lazy val openApiClientWorkerGenerate = Project("celeborn-openapi-client-worker-generate", file("openapi/openapi-client/target/worker"))
.enablePlugins(OpenApiGeneratorPlugin)
.settings(
commonSettings,
openApiInputSpec := (file(openApiSpecDir) / "worker_rest_v1.yaml").toString,
openApiOutputDir := openApiWorkerInternalOutputDir,
openApiApiPackage := "org.apache.celeborn.rest.v1.worker",
openApiInvokerPackage := "org.apache.celeborn.rest.v1.worker.invoker",
commonOpenApiClientGenerateSettings
)
lazy val openApiClient = Project("celeborn-openapi-client", file("openapi/openapi-client"))
.settings (
commonSettings,
releaseSettings,
libraryDependencies ++= Seq(
Dependencies.jacksonAnnotations,
Dependencies.jacksonCore,
Dependencies.jacksonDatabind,
Dependencies.jacksonDataTypeJsr310,
Dependencies.jacksonJaxrsJsonProvider,
Dependencies.findbugsJsr305,
Dependencies.jakartaAnnotationApi,
Dependencies.httpClient5,
Dependencies.httpCore5,
Dependencies.httpCore5H2,
Dependencies.openApiToolsJacksonBindNullable,
Dependencies.slf4jApi
),
generate := {
(openApiClientMasterGenerate / Compile / openApiGenerate).value
(openApiClientWorkerGenerate / Compile / openApiGenerate).value
streams.value.log.info("Cleaning up openapi generate output directory: " + openApiClientOutputDir)
val dstDir = file(openApiClientOutputDir)
IO.delete(dstDir)
val masterSrcDir = file(openApiMasterInternalOutputDir) / "src" / "main" / "java"
streams.value.log.info(s"Copying openapi generated master sources from $masterSrcDir to $dstDir")
IO.copyDirectory(masterSrcDir, dstDir)
val workerSrcDir = file(openApiWorkerInternalOutputDir) / "src" / "main" / "java"
streams.value.log.info(s"Copying openapi generated worker sources from $workerSrcDir to $dstDir")
IO.copyDirectory(workerSrcDir, dstDir)
},
check := {
(openApiClientMasterGenerate / Compile / openApiGenerate).value
(openApiClientWorkerGenerate / Compile / openApiGenerate).value
val internalMasterSrcDir = file(openApiMasterInternalOutputDir) / "src" / "main" / "java"
val internalWorkerSrcDir = file(openApiWorkerInternalOutputDir) / "src" / "main" / "java"
val openApiSrcDir = file(openApiClientOutputDir)
def getRelativePaths(dir: File): Set[String] = {
(dir ** "*.java").get.map(_.relativeTo(dir).get.getPath).toSet
}
val internalSrcPaths = getRelativePaths(internalMasterSrcDir) ++ getRelativePaths(internalWorkerSrcDir)
val openApiSrcPaths = getRelativePaths(openApiSrcDir)
val notGeneratedSrcPaths = openApiSrcPaths -- internalSrcPaths
if (notGeneratedSrcPaths.nonEmpty) {
sys.error(s"Files ${notGeneratedSrcPaths.mkString(", ")} not generated by openapi generator anymore, seems outdated.")
}
def diffDirSrcFiles(srcDir: File, dstDir: File): Unit = {
val srcFiles = (srcDir ** "*.java").get
val dstFiles = (dstDir ** "*.java").get
srcFiles.foreach { srcFile =>
val relativePath = srcFile.relativeTo(srcDir).get.getPath
val dstFile = dstDir / relativePath
if (!dstFile.exists()) {
sys.error(s"File $relativePath does not exist in the openapi client code directory")
} else {
val srcContent = IO.read(srcFile, UTF_8)
val dstContent = IO.read(dstFile, UTF_8)
if (srcContent != dstContent) {
sys.error(s"File $relativePath differs, please re-generate the code.")
}
}
}
}
diffDirSrcFiles(internalMasterSrcDir, openApiSrcDir)
diffDirSrcFiles(internalWorkerSrcDir, openApiSrcDir)
streams.value.log.info("The openapi spec and code are consistent.")
},
(assembly / test) := { },
(assembly / assemblyJarName) := {
s"${moduleName.value}-${version.value}.${artifact.value.extension}"
},
(assembly / logLevel) := Level.Info,
// Exclude `scala-library` from assembly.
(assembly / assemblyPackageScala / assembleArtifact) := false,
(assembly / assemblyExcludedJars) := {
val cp = (assembly / fullClasspath).value
cp filter { v =>
val name = v.data.getName
!(name.startsWith("celeborn-") ||
name.startsWith("jackson-annotations-") ||
name.startsWith("jackson-core-") ||
name.startsWith("jackson-databind-") ||
name.startsWith("jackson-datatype-jsr310-") ||
name.startsWith("jackson-jaxrs-json-provider-") ||
name.startsWith("jsr305-") ||
name.startsWith("jakarta.annotation-api-") ||
name.startsWith("httpclient5-") ||
name.startsWith("httpcore5-") ||
name.startsWith("httpcore5-h2-") ||
name.startsWith("jackson-databind-nullable-") ||
name.startsWith("slf4j-api-"))
}
},
(assembly / assemblyShadeRules) := Seq(
ShadeRule.rename("org.openapitools.**" -> "org.apache.celeborn.shaded.org.openapitools.@1").inAll,
ShadeRule.rename("javax.annotation.**" -> "org.apache.celeborn.shaded.javax.annotation.@1").inAll,
ShadeRule.rename("com.fasterxml.jackson.**" -> "org.apache.celeborn.shaded.com.fasterxml.jackson.@1").inAll,
ShadeRule.rename("jakarta.validation.**" -> "org.apache.celeborn.shaded.jakarta.validation.@1").inAll,
ShadeRule.rename("javax.validation.**" -> "org.apache.celeborn.shaded.javax.validation.@1").inAll,
ShadeRule.rename("javax.ws.rs.ext.**" -> "org.apache.celeborn.shaded.javax.ws.rs.ext.@1").inAll,
ShadeRule.rename("org.apache.hc.**" -> "org.apache.celeborn.shaded.org.apache.hc.@1").inAll,
ShadeRule.rename("org.slf4j.**" -> "org.apache.celeborn.shaded.org.slf4j.@1").inAll
),
(assembly / assemblyMergeStrategy) := {
case m if m.toLowerCase(Locale.ROOT).endsWith("license") => MergeStrategy.discard
case m if m.toLowerCase(Locale.ROOT).endsWith("manifest.mf") => MergeStrategy.discard
case m if m.toLowerCase(Locale.ROOT).endsWith("meta-inf/dependencies") => MergeStrategy.discard
case m if m.toLowerCase(Locale.ROOT).endsWith("module-info.class") => MergeStrategy.discard
case m if m.toLowerCase(Locale.ROOT).endsWith("mozilla/public-suffix-list.txt") => MergeStrategy.discard
case m if m.toLowerCase(Locale.ROOT).endsWith("notice") => MergeStrategy.discard
case PathList(ps@_*) if Assembly.isLicenseFile(ps.last) => MergeStrategy.discard
case _ => MergeStrategy.first
},
Compile / packageBin := assembly.value,
pomPostProcess := removeDependenciesTransformer,
Compile / doc := {
// skip due to doc generation failure for openapi modules, see CELEBORN-1477
target.value / "none"
}
)
}
object WebProjects {
def web: Project = {
Project("celeborn-web", file("web"))
.settings(commonSettings)
}
def modules: Seq[Project] = {
Seq(web)
}
}
////////////////////////////////////////////////////////
// Tez Client //
////////////////////////////////////////////////////////
object TezClientProjects {
def tezClient: Project = {
Project("celeborn-client-tez", file("client-tez/tez"))
.dependsOn(CelebornCommon.common, CelebornClient.client)
.settings(
commonSettings,
libraryDependencies ++= Seq(
Dependencies.tezCommon,
Dependencies.tezRuntimeLibrary,
Dependencies.tezRuntimeInternals,
Dependencies.tezDag,
Dependencies.tezApi,
Dependencies.hadoopCommon,
Dependencies.slf4jApi,
) ++ commonUnitTestDependencies,
dependencyOverrides += Dependencies.commonsCompress
)
}
def tezIt: Project = {
Project("celeborn-tez-it", file("tests/tez-it"))
// ref: https://www.scala-sbt.org/1.x/docs/Multi-Project.html#Classpath+dependencies
.dependsOn(CelebornCommon.common % "test->test;compile->compile")
.dependsOn(CelebornClient.client % "test->test;compile->compile")
.dependsOn(CelebornMaster.master % "test->test;compile->compile")
.dependsOn(CelebornWorker.worker % "test->test;compile->compile")
.dependsOn(tezClient % "test->test;compile->compile")
.settings(
commonSettings,
copyDepsSettings,
libraryDependencies ++= Seq(
) ++ commonUnitTestDependencies
)
}
def tezClientShade: Project = {
Project("celeborn-client-tez-shaded", file("client-tez/tez-shaded"))
.dependsOn(tezClient)
.disablePlugins(AddMetaInfLicenseFiles)
.settings(
commonSettings,
releaseSettings,
// align final shaded jar name with maven.
(assembly / assemblyJarName) := {
val extension = artifact.value.extension
s"${moduleName.value}_${scalaBinaryVersion.value}-${version.value}.$extension"
},
(assembly / test) := {},
(assembly / logLevel) := Level.Info,
// include `scala-library` from assembly.
(assembly / assemblyPackageScala / assembleArtifact) := true,
(assembly / assemblyExcludedJars) := {
val cp = (assembly / fullClasspath).value
cp filter { v =>
val name = v.data.getName
!(name.startsWith("celeborn-") ||
name.startsWith("protobuf-java-") ||
name.startsWith("guava-") ||
name.startsWith("failureaccess-") ||
name.startsWith("netty-") ||
name.startsWith("commons-lang3-") ||
name.startsWith("RoaringBitmap-") ||
name.startsWith("lz4-java-") ||
name.startsWith("zstd-jni-") ||
name.startsWith("metrics-core-") ||
name.startsWith("scala-library-"))
}
},
(assembly / assemblyShadeRules) := Seq(
ShadeRule.rename("com.google.protobuf.**" -> "org.apache.celeborn.shaded.com.google.protobuf.@1").inAll,
ShadeRule.rename("com.google.common.**" -> "org.apache.celeborn.shaded.com.google.common.@1").inAll,
ShadeRule.rename("io.netty.**" -> "org.apache.celeborn.shaded.io.netty.@1").inAll,
ShadeRule.rename("org.apache.commons.**" -> "org.apache.celeborn.shaded.org.apache.commons.@1").inAll,
ShadeRule.rename("org.roaringbitmap.**" -> "org.apache.celeborn.shaded.org.roaringbitmap.@1").inAll,
ShadeRule.rename("io.dropwizard.metrics.**" -> "org.apache.celeborn.shaded.io.dropwizard.metrics.@1").inAll,
ShadeRule.rename("com.codahale.metrics.**" -> "org.apache.celeborn.shaded.com.codahale.metrics.@1").inAll,
ShadeRule.rename("com.github.luben.**" -> "org.apache.celeborn.shaded.com.github.luben.@1").inAll,
),
(assembly / assemblyMergeStrategy) := {
case m if m.toLowerCase(Locale.ROOT).endsWith("manifest.mf") => MergeStrategy.discard
// For netty-3.x.y.Final.jar
case m if m.startsWith("META-INF/license/") => MergeStrategy.discard
// the LicenseAndNoticeMergeStrategy always picks the license/notice file from the current project
case m@("META-INF/LICENSE" | "META-INF/NOTICE") => CustomMergeStrategy("LicenseAndNoticeMergeStrategy") { conflicts =>
val entry = conflicts.head
val projectLicenseFile = (Compile / resourceDirectory).value / entry.target
val stream = () => new java.io.BufferedInputStream(new java.io.FileInputStream(projectLicenseFile))
Right(Vector(JarEntry(entry.target, stream)))
}
case PathList(ps@_*) if Assembly.isLicenseFile(ps.last) => MergeStrategy.discard
// Drop all proto files that are not needed as artifacts of the build.
case m if m.toLowerCase(Locale.ROOT).endsWith(".proto") => MergeStrategy.discard
case m if m.toLowerCase(Locale.ROOT).startsWith("meta-inf/native-image") => MergeStrategy.discard
// Drop netty jnilib
case m if m.toLowerCase(Locale.ROOT).endsWith(".jnilib") => MergeStrategy.discard
// rename netty native lib
case "META-INF/native/libnetty_transport_native_epoll_x86_64.so" => CustomMergeStrategy.rename(_ => "META-INF/native/liborg_apache_celeborn_shaded_netty_transport_native_epoll_x86_64.so")
case "META-INF/native/libnetty_transport_native_epoll_aarch_64.so" => CustomMergeStrategy.rename(_ => "META-INF/native/liborg_apache_celeborn_shaded_netty_transport_native_epoll_aarch_64.so")
case _ => MergeStrategy.first
},
Compile / packageBin := assembly.value,
pomPostProcess := removeDependenciesTransformer
)
}
def modules: Seq[Project] = {
Seq(tezClient, tezIt, tezGroup, tezClientShade)
}
// for test only, don't use this group for any other projects
lazy val tezGroup = (project withId "celeborn-tez-group").aggregate(tezClient, tezIt)
val copyDeps = TaskKey[Unit]("copyDeps", "Copies needed dependencies to the build directory.")
val destPath = (Compile / crossTarget) {
_ / "mapreduce_lib"
}
lazy val copyDepsSettings = Seq(
copyDeps := {
val dest = destPath.value
if (!dest.isDirectory() && !dest.mkdirs()) {
throw new java.io.IOException("Failed to create jars directory.")
}
(Compile / dependencyClasspath).value.map(_.data)
.filter { jar => jar.isFile() }
.foreach { jar =>
val destJar = new File(dest, jar.getName())
if (destJar.isFile()) {
destJar.delete()
}
Files.copy(jar.toPath(), destJar.toPath())
}
},
(Test / compile) := {
copyDeps.value
(Test / compile).value
}
)
}