in backend/app/services/manifest/Neo4jManifest.scala [314:401]
def insertBlob(tx: StatementRunner, file: IngestionFile, uri: Uri, parentBlobs: List[Uri], mimeType: MimeType,
ingestion: String, languages: List[String], extractors: Iterable[Extractor], workspace: Option[WorkspaceItemContext]): Either[Failure, Unit] = {
def toParameterMap(e: Extractor): java.util.Map[String, Object] = {
Map[String, Object](
"name" -> e.name,
"indexing" -> Boolean.box(e.indexing),
"extractorPriority" -> Int.box(e.priority),
"priority" -> Int.box(if(workspace.nonEmpty) { e.priority * 100 } else { e.priority }),
"cost" -> Long.box(e.cost(mimeType, file.size)),
"external" -> Boolean.box(e.external)
).asJava
}
val maybeWorkspaceProperties = workspace.map { _ =>
"""
|,
|workspaceId: {workspaceId},
|workspaceNodeId: {workspaceNodeId},
|workspaceBlobUri: {workspaceBlobUri}
|""".stripMargin
}.getOrElse("")
val result = tx.run(
s"""
|MATCH (parent:Resource {uri: {parentUri}})
|
|MERGE (file:File:Resource {uri: {fileUri}})
|MERGE (blob:Blob:Resource {uri: {blobUri}, size: {size}})
|MERGE (mimeType:MimeType {mimeType: {mimeType}})
|
|MERGE (parent)<-[:PARENT]-(file)
|MERGE (file)<-[:PARENT]-(blob)
|MERGE (blob)-[:TYPE_OF]-(mimeType)
|
|WITH {extractorParamsArray} as extractors
|UNWIND extractors as extractorParam
| MERGE (extractor :Extractor {name: extractorParam.name, indexing: extractorParam.indexing, priority: extractorParam.extractorPriority, external: extractorParam.external})
| WITH extractor, extractorParam.cost as cost, extractorParam.priority as priority
|
| MATCH (unprocessedBlob: Blob:Resource {uri: {blobUri}})
| WHERE
| NOT (unprocessedBlob)<-[:PROCESSED {
| ingestion: {ingestion},
| languages: {languages},
| parentBlobs: {parentBlobs}
| ${maybeWorkspaceProperties}
| } ]-(extractor)
|
| MERGE (unprocessedBlob)<-[todo:TODO {
| ingestion: {ingestion},
| languages: {languages},
| parentBlobs: {parentBlobs}
| ${maybeWorkspaceProperties}
| }]-(extractor)
| ON CREATE SET todo.cost = cost,
| todo.priority = priority,
| todo.attempts = 0
""".stripMargin,
parameters(
"parentUri", file.parentUri.value,
"fileUri", file.uri.value,
"blobUri", uri.value,
"size", file.size.asInstanceOf[java.lang.Long],
"mimeType", mimeType.mimeType,
"ingestion", ingestion,
"extractorParamsArray", extractors.map(toParameterMap).toArray,
"languages", languages.asJava,
"parentBlobs", parentBlobs.map(_.value).toArray,
"workspaceId", workspace.map(_.workspaceId).orNull,
"workspaceNodeId", workspace.map(_.workspaceNodeId).orNull,
"workspaceBlobUri", workspace.map(_.blobAddedToWorkspace).orNull
)
)
// This operation will have an effect when we've just added a CHILD to a blob.
// We look for a parent blob (and its parent file) and mark them as expandable.
markResourceAsExpandable(tx, file.parentUri)
// This operation will have an effect when we've just added a new PARENT to an existing expandable blob.
// When you add a new file parent to an expandable blob, you need to mark that parent isExpandable,
// because if the blob's already there, we won't be re-processing all its children,
// so the markResourceAsExpandable step won't do what we want.
// Note that this is a no-op if we've just added the blob for the first time,
// since we have yet to delve into its children and mark the blob as expandable if it has them.
markParentFileAsExpandableIfBlobIsExpandable(tx, blobUri = uri)
Right(())
}