def handleAliases()

in gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/engine/GerritAnalyticsTransformations.scala [152:172]


    def handleAliases(aliasesDF: Option[DataFrame])(implicit spark: SparkSession): DataFrame = {
      aliasesDF
        .map { eaDF =>
          val renamedAliasesDF = eaDF
            .withColumnRenamed("email", "email_alias")
            .withColumnRenamed("author", "author_alias")
            .withColumnRenamed("organization", "organization_alias")

          df.join(renamedAliasesDF, df("email") === renamedAliasesDF("email_alias"), "left_outer")
            .withColumn(
              "organization",
              when(
                renamedAliasesDF("organization_alias").notEqual(""),
                lower(renamedAliasesDF("organization_alias"))
              ).otherwise(df("organization"))
            )
            .withColumn("author", coalesce(renamedAliasesDF("author_alias"), df("author")))
            .drop("email_alias", "author_alias", "organization_alias")
        }
        .getOrElse(df)
    }