media-api/app/lib/querysyntax/QuerySyntax.scala (216 lines of code) (raw):

package lib.querysyntax import com.gu.mediaservice.lib.ImageFields import com.gu.mediaservice.model.{Jpeg, MimeType, Png, Tiff} import org.joda.time.DateTime import org.parboiled2._ case class InvalidQuery(message: String) extends Exception(message) class QuerySyntax(val input: ParserInput) extends Parser with ImageFields { val beginningOfTime = new DateTime(0L) def today = DateTime.now.withTimeAtStartOfDay def tomorrow = today.plusDays(1) def yesterday = today.minusDays(1) def Query = rule { Expression ~ EOI } def Expression = rule { zeroOrMore(Term) separatedBy Whitespace } def Term = rule { NestedFilter | NegatedFilter | Filter } def NegatedFilter = rule { '-' ~ Filter ~> Negation } def NestedFilter = rule { NestedMatch ~> Nested | NestedDateMatch } def Filter = rule { HasMatch ~> Match | IsMatch ~> Match | DateConstraintMatch | DateRangeMatch ~> Match | AtMatch | FileTypeMatch ~> Match | CollectionRule | ScopedMatch ~> Match | HashMatch | AnyMatch } def HasMatch = rule { HasMatchField ~ ':' ~ HasMatchValue } def HasMatchField = rule { capture(HasFieldName) ~> (_ => HasField) } def HasFieldName = rule { "has" } def HasMatchValue = rule { String ~> HasValue } def IsMatch = rule { IsMatchField ~ ':' ~ IsMatchValue } def IsMatchField = rule { capture(IsFieldName) ~> (_ => IsField) } def IsFieldName = rule { "is" } def IsMatchValue = rule { String ~> IsValue } def NestedMatch = rule { ParentField ~ "@" ~ NestedField ~ ':' ~ ExactMatchValue } def NestedDateMatch = rule { ParentField ~ "@" ~ DateConstraintMatch ~> ( (parentField: Field, dateMatch: Match) => { Nested(parentField, dateMatch.field, dateMatch.value) } )} def DateConstraintMatch = rule { DateConstraint ~ DateMatch ~> ( (constraint: String, dateMatch: Match) => { val dateRange = dateMatch.value match { case Date(d) => constraint match { case ">" => DateRange(d, tomorrow) case "<" => DateRange(beginningOfTime, d) } case _ => throw new InvalidQuery("No date for date constraint!") } Match(dateMatch.field, dateRange) } )} def DateConstraint = rule { capture(AllowedDateConstraints) } def AllowedDateConstraints = rule { "<" | ">" } def ScopedMatch = rule { MatchField ~ ':' ~ MatchValue } def HashMatch = rule { '#' ~ MatchValue ~> ( label => Match( SingleField(getFieldPath("labels")), label ) )} def CollectionRule = rule { ("~" | "collection:") ~ ExactMatchValue ~> ( collection => Match( HierarchyField, Phrase(collection.string.toLowerCase) ) )} def ParentField = rule { capture(AllowedParentFieldName) ~> resolveNamedField _ } def NestedField = rule { capture(AllowedNestedFieldName) ~> resolveNamedField _ } def MatchField = rule { (capture(AllowedFieldName) | QuotedString | StringWithoutColon) ~> resolveNamedField _ } def AllowedParentFieldName = rule { "usages" } def AllowedNestedFieldName = rule { "status" | "platform" | "section" | "publication" | "orderedBy" | "reference" } // BEWARE! Ordering of this list matters. // If a word is a prefix of another, it must come last. // e.g foo must come _after_ food. // This might be an issue with the parboiled library. // TODO: fix this! def AllowedFieldName = rule { "illustrator" | "uploader" | "location" | "city" | "state" | "country" | "in" | "bylineTitle" | "byline" | "by" | "photographer" | "description" | "credit" | "copyright" | "source" | "category" | "subject" | "suppliersReference" | "supplier" | "specialInstructions" | "title" | "keyword" | "label" | "croppedBy" | "filename" | "photoshoot" | "leasedBy" | "person" | "imageType" } def resolveNamedField(name: String): Field = (name match { case "illustrator" => "credit" case "uploader" => "uploadedBy" case "label" => "labels" case "subject" => "subjects" case "location" => "subLocation" case "by" | "photographer" => "byline" case "keyword" => "keywords" case "person" => "peopleInImage" case fieldName => fieldName }) match { case "publication" => MultipleField(List("publicationName", "publicationCode")) case "section" => MultipleField(List("sectionId","sectionCode")) case "reference" => MultipleField(List("references.uri", "references.name").map(usagesField)) case "in" => MultipleField(List("subLocation", "city", "state", "country").map(getFieldPath)) case field => SingleField(getFieldPath(field)) } def AnyMatch = rule { MatchValue ~> (v => Match(AnyField, v)) } def ExactMatchValue = rule { QuotedString ~> Phrase | String ~> Phrase } // Note: order matters, check for quoted string first def MatchValue = rule { QuotedString ~> Phrase | String ~> Words } def String = rule { capture(Chars) } def StringWithoutColon = rule { capture(NotColon) } def DateMatch = rule { MatchDateField ~ ':' ~ MatchDateValue ~> ((field, date) => Match(field, Date(date))) } def DateRangeMatch = rule { MatchDateField ~ ':' ~ MatchDateRangeValue } def FileTypeMatch = rule { MatchMimeTypeField ~ ':' ~ MatchMimeTypeValue } def AtMatch = rule { '@' ~ MatchDateRangeValue ~> (range => Match(SingleField(getFieldPath("uploadTime")), range)) } def MatchDateField = rule { capture(AllowedDateFieldName) ~> resolveDateField _ } def MatchMimeTypeField = rule { capture("fileType") ~> resolveMimeTypeField _ } def resolveMimeTypeField(name: String): Field = name match { case "fileType" => SingleField(getFieldPath("mimeType")) } def resolveDateField(name: String): Field = name match { case "date" | "uploaded" => SingleField("uploadTime") case "taken" => SingleField("dateTaken") case "added" => SingleField("dateAdded") } def AllowedDateFieldName = rule { "date" | "uploaded" | "taken" | "added" } def MatchDateValue = rule { (QuotedString | String) ~> normaliseDateExpr _ ~> parseDate _ ~> (d => { test(d.isDefined) ~ push(d.get) }) } def MatchDateRangeValue = rule { (QuotedString | String) ~> normaliseDateExpr _ ~> parseDateRange _ ~> (d => { test(d.isDefined) ~ push(d.get) }) } def MatchMimeTypeValue = rule { capture(AllowedFileTypesValues) ~> parseMimeType _ } def AllowedFileTypesValues = rule { "tiff" | "tif" | "jpg" | "jpeg" | "png" } def translateMimeType(expr: String): MimeType = expr match { case s if s.equals("tif") || s.equals("tiff") => Tiff case s if s.equals("jpg") || s.equals("jpeg") => Jpeg case s if s.equals("png") => Png } def parseMimeType(expr: String): Value = Words(translateMimeType(expr).toString) def normaliseDateExpr(expr: String): String = expr.replaceAll("\\.", " ") val todayParser = DateAliasParser("today", today, tomorrow) val yesterdayParser = DateAliasParser("yesterday", yesterday, today) val humanDateParser = DateFormatParser("dd MMMMM YYYY") val slashDateParser = DateFormatParser("d/M/YYYY") val paddedslashDateParser = DateFormatParser("dd/MM/YYYY") val isoDateParser = DateFormatParser("YYYY-MM-dd") val humanMonthParser = DateFormatParser("MMMMM YYYY", Some(_.plusMonths(1))) val yearParser = DateFormatParser("YYYY", Some(_.plusYears(1))) val dateParsers: List[DateParser] = List( todayParser, yesterdayParser, humanDateParser, slashDateParser, paddedslashDateParser, isoDateParser, humanMonthParser, yearParser ) def parseDate(expr: String): Option[DateTime] = { dateParsers.foldLeft[Option[DateTime]](None) { case (res, parser) => res orElse parser.parseDate(expr) } } def parseDateRange(expr: String): Option[DateRange] = { dateParsers.foldLeft[Option[DateRange]](None) { case (res, parser) => res orElse parser.parseRange(expr) } } // Quoted strings def SingleQuote = "'" def DoubleQuote = "\"" def QuotedString = rule { SingleQuote ~ capture(NotSingleQuote) ~ SingleQuote | DoubleQuote ~ capture(NotDoubleQuote) ~ DoubleQuote } // TODO: unless escaped? def NotSingleQuote = rule { oneOrMore(noneOf(SingleQuote)) } def NotDoubleQuote = rule { oneOrMore(noneOf(DoubleQuote)) } def Whitespace = rule { oneOrMore(' ') } def Chars = rule { oneOrMore(visibleChars) } def NotColon = rule { oneOrMore(charsMinusColon) } // Note: this is a somewhat arbitrarily list of common Unicode ranges that we // expect people to want to use (e.g. Latin1 accented characters, curly quotes, etc). // This is likely not exhaustive and will need reviewing in the future. val latin1SupplementSubset = CharPredicate('\u00a1' to '\u00ff') val latin1ExtendedA = CharPredicate('\u0100' to '\u017f') val latin1ExtendedB = CharPredicate('\u0180' to '\u024f') val generalPunctuation = CharPredicate('\u2010' to '\u203d') val latin1ExtendedAdditional = CharPredicate('\u1e00' to '\u1eff') val extraVisibleCharacters = latin1SupplementSubset ++ latin1ExtendedA ++ latin1ExtendedB ++ generalPunctuation val visibleChars = CharPredicate.Visible ++ extraVisibleCharacters val charsMinusColon = visibleChars -- ':' } // TODO: // - is archived, has exports, has picdarUrn