def someIdentity[T]()

in backend/app/extraction/MetadataEnrichment.scala [36:96]


  def someIdentity[T](v: T): Option[T] = Some(v)
  def safeIntParse(c: String): Option[Int] = Try(c.toInt).toOption

  def enrich(metadata: Map[String, Seq[String]]): EnrichedMetadata = EnrichedMetadata(
    extractFields(metadata, titleKeys)(someIdentity),
    extractFields(metadata, authorKeys)(someIdentity),
    extractFields(metadata, createdAtKeys)(isoDateToLong),
    extractFields(metadata, lastModifiedKeys)(isoDateToLong),
    extractFields(metadata, createdWithKeys)(someIdentity),
    extractFields(metadata, pageCountKeys)(safeIntParse),
    extractFields(metadata, wordCountKeys)(safeIntParse)
  )

  // Probably these lists of keys could be simplified now we're on Tika v2.
  // But I've left the old ones in for backwards compatibility,
  // and because I'm not sure how to test this.
  // https://cwiki.apache.org/confluence/display/TIKA/Migrating+to+Tika+2.0.0#MigratingtoTika2.0.0-Removedduplicate/triplicatekeys
  val titleKeys = List(
    "pdf:docinfo:title",
    "title",
    "dc:title"
  )

  val authorKeys = List(
    "pdf:docinfo:author",
    "Author",
    "dc:creator",
    "creator"
  )

  val createdAtKeys = List(
    "meta:creation-date",
    "Creation-Date",
    "pdf:docinfo:created",
    "dcterms:created"
  )

  val lastModifiedKeys = List(
    "Last-Modified",
    "Last-Save-Date",
    "dcterms:modified"
  )

  val createdWithKeys = List(
    "pdf:docinfo:producer",
    "xmp:CreatorTool"
  )

  val pageCountKeys = List(
    "xmpTPg:NPages",
    "meta:page-count",
    "Page-Count"
  )

  val wordCountKeys = List(
    "meta:word-count",
    "Word-Count"
  )

  private def extractFields[T](metadata: Map[String, Seq[String]], keys: Seq[String])(transform: String => Option[T]): Option[T] =
    Try(metadata.filter { case (k, v) => keys.contains(k) }.values.flatten.groupBy(identity).maxBy(_._2.size)._1).toOption.flatMap(transform)