backend/app/services/index/SearchContext.scala (115 lines of code) (raw):
package services.index
import com.sksamuel.elastic4s.ElasticDsl._
import com.sksamuel.elastic4s.requests.searches.queries.compound.BoolQuery
import com.sksamuel.elastic4s.requests.searches.queries.{Query}
import model.annotations.{WorkspaceEntry, WorkspaceLeaf}
import model.frontend.{TreeEntry, TreeLeaf, TreeNode}
import model.index.SearchParameters
import services.annotations.Annotations
import services.users.UserManagement
import utils.attempt.{Attempt, ClientFailure, NotFoundFailure}
import scala.concurrent.ExecutionContext
sealed trait SearchContext
case class DefaultSearchContext(visibleCollections: Set[String], visibleWorkspaces: List[String]) extends SearchContext
case class WorkspaceFolderSearchContext(blobUris: List[String]) extends SearchContext
case class WorkspaceSearchContextParams(workspaceId: String, workspaceFolderId: String)
object SearchContext {
def build(username: String, users: UserManagement, annotations: Annotations)(implicit ec: ExecutionContext): Attempt[DefaultSearchContext] = for {
visibleCollections <- users.getVisibleCollectionUrisForUser(username)
visibleWorkspaces <- annotations.getAllWorkspacesMetadata(username)
} yield {
DefaultSearchContext(visibleCollections, visibleWorkspaces.map(_.id))
}
def buildBlobFiltersForWorkspaceFolder(username: String, workspaceId: String, workspaceFolderId: String, annotations: Annotations)(implicit ec: ExecutionContext): Attempt[List[String]] = {
annotations.getWorkspaceContents(username, workspaceId).flatMap { root =>
TreeEntry.findNodeById(root, workspaceFolderId) match {
case Some(_: TreeLeaf[WorkspaceEntry]) =>
Attempt.Left(ClientFailure(s"$workspaceFolderId is a leaf, not a node in workspace $workspaceId"))
case None =>
Attempt.Left(NotFoundFailure(s"$workspaceFolderId not found in workspace $workspaceId"))
case Some(node: TreeNode[WorkspaceEntry]) =>
Attempt.Right(findBlobsInWorkspaceFolder(node))
}
}
}
def buildFilters(parameters: SearchParameters, context: SearchContext): BoolQuery = {
val createdAtFilter = buildCreatedAtFilter(parameters.start, parameters.end)
val mimeFilter = buildMimeFilter(parameters)
val visibilityFilters = context match {
case context: DefaultSearchContext => buildIngestionAndWorkspaceFilters(parameters, context)
case WorkspaceFolderSearchContext(blobUris) => buildWorkspaceBlobFilter(blobUris).toList
}
must(
createdAtFilter ++
visibilityFilters ++
mimeFilter
)
}
def findBlobsInWorkspaceFolder(folder: TreeEntry[WorkspaceEntry]): List[String] = {
folder match {
case leaf: TreeLeaf[WorkspaceEntry] => leaf.data match {
case node: WorkspaceLeaf => List(node.uri)
case _ => throw new IllegalStateException(s"Unexpected WorkspaceNode wrapped by TreeLeaf ${leaf.id}")
}
case node: TreeNode[WorkspaceEntry] => node.children.foldLeft(List.empty[String]) { (acc, entry) =>
acc ++ findBlobsInWorkspaceFolder(entry)
}
}
}
private def buildWorkspaceFilter(workspaceId: String) = {
nestedQuery(IndexFields.workspacesField,
termQuery(s"${IndexFields.workspacesField}.${IndexFields.workspaces.workspaceId}", workspaceId)
)
}
// At this point parameters have already been checked for permissions so we only require filters if the user has
// not already refined their search further
private def buildIngestionAndWorkspaceFilters(parameters: SearchParameters, context: DefaultSearchContext) = {
val cannotSeeAnything = context.visibleCollections.isEmpty && context.visibleWorkspaces.isEmpty
val noFiltersSpecified = parameters.ingestionFilters.isEmpty && parameters.workspaceFilters.isEmpty
if(cannotSeeAnything) {
// This is a fail-safe, we should have checked permissions and returned an empty result set in the controller
throw new IllegalStateException("No visible collections or workspaces")
}
if(noFiltersSpecified) {
// Show the user results in any collection or workspace they can see. Example translated into boolean logic:
// (collection == 'Panama' OR collection == 'Paradise') OR (workspace == 'Shared With Barry')
List(
should(
context.visibleCollections.map { c => prefixQuery(IndexFields.ingestionRaw, c + '/') } ++
context.visibleWorkspaces.map(buildWorkspaceFilter)
)
)
} else {
// Show the user just what they asked for. Example again (NB the 'AND' is performed by the `must` in the calling code)
// (collection == 'Panama' OR collection == 'Paradise') AND (workspace == 'Shared With Barry')
List(
should(parameters.ingestionFilters.map(prefixQuery(IndexFields.ingestionRaw, _))),
should(parameters.workspaceFilters.map(buildWorkspaceFilter))
)
}
}
private def buildMimeFilter(parameters: SearchParameters) = {
List(
should(parameters.mimeFilters.map(mime => prefixQuery("metadata." + IndexFields.metadata.mimeTypesRaw, mime)))
)
}
private def buildCreatedAtFilter(maybeStart: Option[Long], maybeEnd: Option[Long]): Option[Query] = {
(maybeStart, maybeEnd) match {
case (Some(start), Some(end)) =>
Some(rangeQuery(IndexFields.createdAt).gte(start).lt(end))
case (Some(start), None) =>
Some(rangeQuery(IndexFields.createdAt).gte(start))
case (None, Some(end)) =>
Some(rangeQuery(IndexFields.createdAt).lt(end))
case _ =>
None
}
}
private def buildWorkspaceBlobFilter(blobUris: List[String]): Option[BoolQuery] = {
if(blobUris.isEmpty) {
None
} else {
// The blob URIs have come from expanding a workspace folder into the blobs below it.
// We want to search and return results from all blobs, not just results that are present only in all of them.
// So we combine with OR (should) rather than must (AND).
Some(should(
blobUris.flatMap { blobUri =>
List(
termQuery("_id", blobUri),
// Also recursively match anything that is a child of this blob. They don't appear in the workspace tree but
// people should be able to access them even if they don't have access to the underlying dataset.
termQuery(IndexFields.parentBlobs, blobUri)
)
}
))
}
}
}