app/services/DuplicateFinderService.scala (67 lines of code) (raw):
package services
import org.slf4j.LoggerFactory
import akka.stream.{ClosedShape, Materializer}
import akka.stream.scaladsl.{GraphDSL, RunnableGraph, Sink}
import com.om.mxs.client.japi.SearchTerm
import helpers.UserInfoCache
import models.{CachedEntry, ExistingArchiveContentCache}
import streamcomponents.OMFastSearchSource
import javax.inject.{Inject, Singleton}
import scala.concurrent.ExecutionContext.Implicits.global
@Singleton
class DuplicateFinderService @Inject()(userInfoCache:UserInfoCache)(implicit mat:Materializer){
private val logger = LoggerFactory.getLogger(getClass)
def loadExistingContent(vaultId: String) = {
val interestingFields = Array(
"MXFS_PATH",
"MXFS_FILENAME",
"GNM_ASSET_FOLDER",
"GNM_TYPE",
"GNM_PROJECT_ID"
)
val catchAllSearchTerm = SearchTerm.createNOTTerm(SearchTerm.createSimpleTerm("oid", ""))
val finalSink = Sink.seq[CachedEntry]
val content = userInfoCache.infoForVaultId(vaultId)
val graph = GraphDSL.create(finalSink) { implicit builder =>
sink =>
import akka.stream.scaladsl.GraphDSL.Implicits._
val src = builder.add(new OMFastSearchSource(content.head, Array(catchAllSearchTerm), interestingFields, atOnce = 100))
src.out.map(elem => {
val ent = CachedEntry(
elem.oid,
elem.attributes.flatMap(_.stringValues.get("MXFS_PATH")).getOrElse("(no path)"),
elem.attributes.flatMap(_.stringValues.get("MXFS_FILENAME")).getOrElse("(no filename)"),
elem.attributes.flatMap(_.stringValues.get("GNM_ASSET_FOLDER")),
elem.attributes.flatMap(_.stringValues.get("GNM_TYPE")),
elem.attributes.flatMap(_.stringValues.get("GNM_PROJECT_ID")),
""
)
logger.debug(s"Got entry $ent")
ent
}) ~> sink
ClosedShape
}
RunnableGraph.fromGraph(graph).run()
}
case class FullDuplicateData(mxfsPath:String, duplicateNumber:Int, duplicatesData:Seq[CachedEntry])
case class AllDuplicateData(dupes_count:Int, item_count:Int, duplicates:Seq[FullDuplicateData])
def getDuplicateData(vaultId: String) = {
loadExistingContent(vaultId).map(results=>{
val contentCache = new ExistingArchiveContentCache(results)
val dupeCount = contentCache.dupesCount
if (dupeCount > 0) {
logger.warn(s"There are $dupeCount duplicated files in the archive")
} else {
logger.info(s"No duplicates found.")
}
val duplicatesArray = contentCache.dupedPaths.map(dupe=>{
logger.debug(s"${dupe._1}: ${dupe._2} copies")
val duplicatedItemData = contentCache.getAllForPath(dupe._1)
FullDuplicateData(dupe._1, dupe._2, duplicatedItemData)
}).toSeq
logger.info(s"Got existing ${results.length} items in the vault")
val duplicateDataToReturn = AllDuplicateData(dupes_count = contentCache.dupesCount, item_count = results.length, duplicates = duplicatesArray)
duplicateDataToReturn
})
}
}