def insertBlob()

in backend/app/services/manifest/Neo4jManifest.scala [314:401]


  def insertBlob(tx: StatementRunner, file: IngestionFile, uri: Uri, parentBlobs: List[Uri], mimeType: MimeType,
                 ingestion: String, languages: List[String], extractors: Iterable[Extractor], workspace: Option[WorkspaceItemContext]): Either[Failure, Unit] = {
    def toParameterMap(e: Extractor): java.util.Map[String, Object] = {
      Map[String, Object](
        "name" -> e.name,
        "indexing" -> Boolean.box(e.indexing),
        "extractorPriority" -> Int.box(e.priority),
        "priority" -> Int.box(if(workspace.nonEmpty) { e.priority * 100 } else { e.priority }),
        "cost" -> Long.box(e.cost(mimeType, file.size)),
        "external" -> Boolean.box(e.external)
      ).asJava
    }

    val maybeWorkspaceProperties = workspace.map { _ =>
      """
        |,
        |workspaceId: {workspaceId},
        |workspaceNodeId: {workspaceNodeId},
        |workspaceBlobUri: {workspaceBlobUri}
        |""".stripMargin
    }.getOrElse("")

    val result = tx.run(
      s"""
        |MATCH (parent:Resource {uri: {parentUri}})
        |
        |MERGE (file:File:Resource {uri: {fileUri}})
        |MERGE (blob:Blob:Resource {uri: {blobUri}, size: {size}})
        |MERGE (mimeType:MimeType {mimeType: {mimeType}})
        |
        |MERGE (parent)<-[:PARENT]-(file)
        |MERGE (file)<-[:PARENT]-(blob)
        |MERGE (blob)-[:TYPE_OF]-(mimeType)
        |
        |WITH {extractorParamsArray} as extractors
        |UNWIND extractors as extractorParam
        |  MERGE (extractor :Extractor {name: extractorParam.name, indexing: extractorParam.indexing, priority: extractorParam.extractorPriority, external: extractorParam.external})
        |    WITH extractor, extractorParam.cost as cost, extractorParam.priority as priority
        |
        |  MATCH (unprocessedBlob: Blob:Resource {uri: {blobUri}})
        |    WHERE
        |      NOT (unprocessedBlob)<-[:PROCESSED {
        |        ingestion: {ingestion},
        |        languages: {languages},
        |        parentBlobs: {parentBlobs}
        |        ${maybeWorkspaceProperties}
        |      } ]-(extractor)
        |
        |  MERGE (unprocessedBlob)<-[todo:TODO {
        |    ingestion: {ingestion},
        |    languages: {languages},
        |    parentBlobs: {parentBlobs}
        |    ${maybeWorkspaceProperties}
        |  }]-(extractor)
        |    ON CREATE SET todo.cost = cost,
        |                  todo.priority = priority,
        |                  todo.attempts = 0
      """.stripMargin,
      parameters(
        "parentUri", file.parentUri.value,
        "fileUri", file.uri.value,
        "blobUri", uri.value,
        "size", file.size.asInstanceOf[java.lang.Long],
        "mimeType", mimeType.mimeType,
        "ingestion", ingestion,
        "extractorParamsArray", extractors.map(toParameterMap).toArray,
        "languages", languages.asJava,
        "parentBlobs", parentBlobs.map(_.value).toArray,
        "workspaceId", workspace.map(_.workspaceId).orNull,
        "workspaceNodeId", workspace.map(_.workspaceNodeId).orNull,
        "workspaceBlobUri", workspace.map(_.blobAddedToWorkspace).orNull
      )
    )

    // This operation will have an effect when we've just added a CHILD to a blob.
    // We look for a parent blob (and its parent file) and mark them as expandable.
    markResourceAsExpandable(tx, file.parentUri)

    // This operation will have an effect when we've just added a new PARENT to an existing expandable blob.
    // When you add a new file parent to an expandable blob, you need to mark that parent isExpandable,
    // because if the blob's already there, we won't be re-processing all its children,
    // so the markResourceAsExpandable step won't do what we want.
    // Note that this is a no-op if we've just added the blob for the first time,
    // since we have yet to delve into its children and mark the blob as expandable if it has them.
    markParentFileAsExpandableIfBlobIsExpandable(tx, blobUri = uri)

    Right(())
  }