media-api/app/lib/elasticsearch/QueryBuilder.scala (102 lines of code) (raw):
package lib.elasticsearch
import com.gu.mediaservice.lib.ImageFields
import com.gu.mediaservice.lib.elasticsearch.IndexSettings
import com.gu.mediaservice.lib.formatting.printDateTime
import com.gu.mediaservice.lib.logging.GridLogging
import com.gu.mediaservice.model.Agency
import com.sksamuel.elastic4s.ElasticDsl
import com.sksamuel.elastic4s.ElasticDsl._
import com.sksamuel.elastic4s.requests.common.Operator
import com.sksamuel.elastic4s.requests.searches.queries.Query
import com.sksamuel.elastic4s.requests.searches.queries.matches.{MultiMatchQuery, MultiMatchQueryBuilderType}
import lib.querysyntax._
import lib.MediaApiConfig
class QueryBuilder(matchFields: Seq[String], overQuotaAgencies: () => List[Agency], config: MediaApiConfig) extends ImageFields with GridLogging {
def resolveFieldPath(field: String): String = {
config.fieldAliasConfigs.find(_.alias == field) match {
case Some(x) => x.elasticsearchPath
case None => getFieldPath(field)
}
}
// For some sad reason, there was no helpful alias for this in the ES library
private def multiMatchPhraseQuery(value: String, fields: Seq[String]): MultiMatchQuery =
ElasticDsl.multiMatchQuery(value).fields(fields).matchType(MultiMatchQueryBuilderType.PHRASE)
private def multiMatchWordQuery(value: String, fields: Seq[String]): MultiMatchQuery = {
val multiMatchQuery = ElasticDsl.multiMatchQuery(value).fields(fields).operator(Operator.AND)
if (config.fuzzySearchEnabled) {
multiMatchQuery.matchType(MultiMatchQueryBuilderType.BEST_FIELDS)
.fuzziness(config.fuzzySearchEditDistance)
.maxExpansions(config.fuzzyMaxExpansions)
.prefixLength(config.fuzzySearchPrefixLength)
} else {
multiMatchQuery.matchType(MultiMatchQueryBuilderType.CROSS_FIELDS)
}
}
private def makeMultiQuery(value: Value, fields: Seq[String]): MultiMatchQuery = value match {
case Words(value) => multiMatchWordQuery(value, fields)
case Phrase(string) => multiMatchPhraseQuery(string, fields)
// That's OK, we only do date queries on a single field at a time
case e => throw InvalidQuery(s"Cannot do multiQuery on $e")
}
private def makeQueryBit(condition: Match): Query = condition.field match {
case AnyField => makeMultiQuery(condition.value, matchFields)
case MultipleField(fields) => makeMultiQuery(condition.value, fields)
case SingleField(field) => condition.value match {
// Force AND operator else it will only require *any* of the words, not *all*
case Words(value) =>
matchQuery(resolveFieldPath(field), value).operator(Operator.AND)
case Phrase(value) => value match {
case "Added to Photo Sales" =>
matchPhraseQuery(resolveFieldPath(field), "syndication")
case _ => matchPhraseQuery(resolveFieldPath(field), value)
}
case DateRange(start, end) => rangeQuery(resolveFieldPath(field)).gte(printDateTime(start)).lte(printDateTime(end))
case e => throw InvalidQuery(s"Cannot do single field query on $e")
}
case HierarchyField => condition.value match {
case Phrase(value) => termQuery(resolveFieldPath("pathHierarchy"), value)
case _ => throw InvalidQuery("Cannot accept non-Phrase value for HierarchyField Match")
}
case HasField => condition.value match {
case HasValue(value) => boolQuery().filter(existsQuery(resolveFieldPath(value)))
case _ => throw InvalidQuery(s"Cannot perform has field on ${condition.value}")
}
case IsField => condition.value match {
case IsValue(value) => IsQueryFilter.apply(value, overQuotaAgencies, config) match {
case Some(isQuery) => isQuery.query
case _ => {
logger.info(s"Cannot perform IS query on ${condition.value}")
matchNoneQuery()
}
}
case _ => {
logger.info(s"Cannot perform IS query on ${condition.value}")
matchNoneQuery()
}
}
}
def makeQuery(conditions: List[Condition]) = conditions match {
case Nil => matchAllQuery()
case condList => {
val (nested: List[Nested], normal: List[Condition]) = (
condList collect { case n: Nested => n },
condList collect { case c: Condition => c }
)
val query = normal.foldLeft(boolQuery()) {
case (query, Negation(cond)) => query.withNot(makeQueryBit(cond))
case (query, cond@Match(_, _)) => query.withMust(makeQueryBit(cond))
case (query, _) => query
}
val nestedQueries = nested
.groupBy(_.parentField)
.map {
case (parent: SingleField, n: List[Nested]) => {
val nested = n.foldLeft(boolQuery()) {
case (query, Nested(_, f, v)) => query.withMust(makeQueryBit(Match(f, v)))
case (query, _) => query
}
nestedQuery(parent.name, nested)
}
case _ => throw InvalidQuery("Can only accept SingleField for Nested Query parent")
}.toList
nestedQueries.foldLeft(query) { case (q, nestedQ) => q.withMust(nestedQ) }
}
}
}