frontend/src/js/components/IngestionEvents/IngestionEvents.tsx (531 lines of code) (raw):
import React, { ReactNode, useEffect, useState } from "react"
import authFetch from "../../util/auth/authFetch"
import {
EuiFlexItem,
EuiBasicTable,
EuiToolTip,
EuiText,
EuiButtonIcon,
EuiScreenReaderOnly,
EuiSpacer,
EuiIconTip,
EuiBadge,
EuiFlexGroup,
EuiInMemoryTable,
EuiBasicTableColumn,
EuiLoadingSpinner,
EuiCodeBlock,
Criteria,
} from "@elastic/eui"
import "@elastic/eui/dist/eui_theme_light.css"
import hdate from "human-date"
import { WorkspaceMetadata } from "../../types/Workspaces"
import moment from "moment"
import _ from "lodash"
import {
BlobStatus,
ExtractorStatus,
IngestionTable,
Status,
extractorStatusColors,
IngestionEventStatus,
} from "./types"
import styles from "./IngestionEvents.module.css"
type BlobProgress = "complete" | "completeWithErrors" | "inProgress" | "infiniteLoop"
const blobStatusIcons = {
complete: (
<EuiIconTip type="checkInCircleFilled" content={"Ingestion complete"} />
),
completeWithErrors: (
<EuiIconTip
type="alert"
content={"Ingestion complete with some errors"}
/>
),
inProgress: (
<EuiToolTip content={"Ingestion in progress"}>
<EuiLoadingSpinner />
</EuiToolTip>
),
infiniteLoop: (
<EuiIconTip
type="alert"
content={"Ingestion failing in infinite loop"}
/>
),
}
const SHORT_READABLE_DATE = "DD MMM HH:mm:ss"
const statusToColor = (status: Status) => extractorStatusColors[status]
const getFailedStatuses = (statuses: ExtractorStatus[]) =>
statuses.filter(
(status) =>
status.statusUpdates.find((u) => u.status === "Failure") !==
undefined
)
const getFailedBlobs = (blobs: BlobStatus[]) => {
return blobs.filter((wb) => {
return getFailedStatuses(wb.extractorStatuses).length > 0 || wb.infiniteLoop
})
}
const getBlobStatus = (statuses: ExtractorStatus[], infiniteLoop: boolean): BlobProgress => {
if (infiniteLoop) return "infiniteLoop"
const failures = getFailedStatuses(statuses)
const inProgress = statuses.filter(
(status) =>
status.statusUpdates.find(
(u) => !u.status || ["Failure", "Success"].includes(u.status)
) === undefined
)
return failures.length > 0
? "completeWithErrors"
: inProgress.length > 0
? "inProgress"
: "complete"
}
const blobIngestedMultipleTimes = (status: BlobStatus) =>
status.extractorStatuses.find(
(s) => s.statusUpdates.filter((u) => u.status === "Started").length > 1
) !== undefined
const extractorStatusList = (status: ExtractorStatus, title?: string) => {
const statusUpdateStrings = status.statusUpdates.map(
(u) => `${moment(u.eventTime).format(SHORT_READABLE_DATE)} ${u.status}`
)
return status.statusUpdates.length > 0 ? (
<p>
{title && (
<>
<b>{title}</b> <br />
</>
)}
<ul>
{statusUpdateStrings.map((s) => (
<li key={s}>{s}</li>
))}
</ul>
</p>
) : (
"No events so far"
)
}
// throw away everything after last / to get the filename from a path
const pathsToFileNames = (paths: string[]) =>
paths.map((p) => p.split("/").slice(-1)).join("\n")
const blobStatusText = {
complete: "Complete",
completeWithErrors: "Complete with errors",
inProgress: "In progress",
infiniteLoop: "Infinite Loop"
}
const statusIconColumn = {
field: "extractorStatuses",
name: "",
width: "40",
render: (statuses: ExtractorStatus[], row: BlobStatus) => {
if (row.infiniteLoop) {
return blobStatusIcons["infiniteLoop"]
}
const totalErrors = row.errors.length
const extractorStatus = getBlobStatus(statuses, row.infiniteLoop)
// if extractors have finished but there are other non-extractor related errors, show an error icon
const combinedStatus =
extractorStatus === "complete" && totalErrors > 0
? "completeWithErrors"
: extractorStatus
return blobStatusIcons[combinedStatus]
},
}
const columns: Array<EuiBasicTableColumn<BlobStatus>> = [
{
field: "paths",
name: "Filename(s)",
sortable: true,
truncateText: true,
render: pathsToFileNames,
},
{
field: "ingestStart",
name: "First event time",
sortable: true,
render: (ingestStart: Date) =>
moment(ingestStart).format(SHORT_READABLE_DATE),
},
{
name: "Ingestion run time",
render: (row: BlobStatus) => (
<>
{moment
.duration(
moment(row.mostRecentEvent).diff(
moment(row.ingestStart)
)
)
.humanize()}{" "}
{blobIngestedMultipleTimes(row) && (
<EuiIconTip
aria-label="Info"
size="m"
type="iInCircle"
color="primary"
content={
"This file has been ingested more than once so ingestion run time may not be accurate."
}
/>
)}
</>
),
},
{
field: "extractorStatuses",
name: "Status",
render: (statuses: ExtractorStatus[], row: BlobStatus) => {
return blobStatusText[getBlobStatus(statuses, row.infiniteLoop)]
},
},
{
field: "extractorStatuses",
name: "Extractors",
render: (statuses: ExtractorStatus[]) => {
return statuses.length > 0 ? (
<ul>
{statuses.map((status) => {
const mostRecent =
status.statusUpdates.length > 0
? status.statusUpdates[
status.statusUpdates.length - 1
]
: undefined
return (
<li key={status.extractorType}>
<EuiFlexGroup>
<EuiFlexItem>
{status.extractorType.replace(
"Extractor",
""
)}
</EuiFlexItem>
<EuiFlexItem grow={false}>
{mostRecent?.status ? (
<EuiToolTip
content={extractorStatusList(
status,
`All ${status.extractorType} events`
)}
>
<EuiBadge
color={statusToColor(
mostRecent.status
)}
>
{mostRecent.status} (
{moment(
mostRecent.eventTime
).format("HH:mm:ss")}
)
</EuiBadge>
</EuiToolTip>
) : (
<>No updates</>
)}
</EuiFlexItem>
</EuiFlexGroup>
</li>
)
})}
</ul>
) : (
<></>
)
},
width: "300",
},
]
const parseBlobStatus = (status: any): BlobStatus => {
return {
...status,
paths: status.paths.map((p: any) => (p ? p : "unknown-filename")),
ingestStart: new Date(status.ingestStart),
mostRecentEvent: new Date(status.mostRecentEvent),
mimeTypes: status.mimeTypes?.split(","),
eventStatuses: status.eventStatuses.map((es: any) => ({
...es,
eventTime: new Date(es.eventTime),
})),
extractorStatuses: status.extractorStatuses.map((s: any) => ({
extractorType: s.extractorType.replace("Extractor", ""),
statusUpdates: _.sortBy(
s.statusUpdates
// discard empty status updates (does this make sense? Maybe we should tag them as 'unknown status' instead
.filter(
(u: any) =>
u.eventTime !== undefined && u.status !== undefined
)
.map((u: any) => ({
...u,
eventTime: new Date(u.eventTime),
})),
(update) => update.eventTime
),
})),
}
}
const blobStatusId = (blobStatus: BlobStatus) =>
`${blobStatus.metadata.ingestId}-${blobStatus.metadata.blobId}`
const renderExpandedRow = (blobStatus: BlobStatus) => {
const columns: Array<EuiBasicTableColumn<IngestionEventStatus>> = [
{
field: "eventTime",
name: "Event time",
render: (time: Date) => moment(time).format(SHORT_READABLE_DATE),
},
{
field: "eventType",
name: "Event",
},
{
field: "eventStatus",
name: "Status",
render: (status: Status) => {
return (
<EuiBadge color={statusToColor(status)}>{status}</EuiBadge>
)
},
},
]
return (
<EuiText>
<h3>{pathsToFileNames(blobStatus.paths)}</h3>
<p>
Full file path(s) : {blobStatus.paths.join(", ")}. Ingestion
started on {hdate.prettyPrint(blobStatus.ingestStart)}
</p>
{blobStatus.infiniteLoop && (
<>
<h4>Ingestion failing in infinite loop</h4>
<p>blob id {blobStatus.metadata.blobId}</p>
</>
)}
{!blobStatus.infiniteLoop && (
<>
<h4>All ingestion events prior to extraction</h4>
<EuiBasicTable
tableCaption="Demo of EuiBasicTable"
items={_.sortBy(blobStatus.eventStatuses, (s) =>
s.eventTime.toISOString()
)}
columns={columns}
/>
<h4>Extraction events</h4>
{blobStatus.mimeTypes &&
`This file is of type ${blobStatus.mimeTypes.join(
","
)}.`}{" "}
Giant has run the following extractors on the file:
<div className={styles.expandedRowExtractorStatus}>
{blobStatus.extractorStatuses.map((extractorStatus) => {
const numErrors =
extractorStatus.statusUpdates.filter(
(su) => su.status === "Failure"
).length
const numStarted =
extractorStatus.statusUpdates.filter(
(su) => su.status === "Started"
).length
const mostRecent =
extractorStatus.statusUpdates.length > 0
? extractorStatus.statusUpdates[
extractorStatus.statusUpdates.length -
1
]
: undefined
return (
<>
<h4>{extractorStatus.extractorType}</h4>
<p>
The extractor{" "}
{extractorStatus.extractorType} has been
started {numStarted} times. There have
been {numErrors} errors.
<br />
{mostRecent ? (
<>
The most recent status event is
'{mostRecent.status}' which
happened on{" "}
{hdate.prettyPrint(
mostRecent.eventTime,
{ showTime: true }
)}
</>
) : (
""
)}{" "}
<br /> <br />
All {extractorStatus.extractorType}{" "}
events:
{extractorStatusList(extractorStatus)}
</p>
</>
)
})}
</div>
</>
)}
{blobStatus.errors.length > 0 && (
<>
<h4>Errors encountered processing this file</h4>
{blobStatus.errors.map((error) => (
<div>
<h5>{error.eventType}</h5>
{error.errors.map((e) => (
<EuiCodeBlock>{e.message}</EuiCodeBlock>
))}
</div>
))}
</>
)}
</EuiText>
)
}
function IngestionEventsTable({
blobs,
columnsWithExpandingRow,
itemIdToExpandedRowMap,
breakdownByWorkspace,
}: {
blobs: BlobStatus[]
columnsWithExpandingRow: Array<EuiBasicTableColumn<BlobStatus>>
itemIdToExpandedRowMap: Record<string, ReactNode>
breakdownByWorkspace: boolean
}) {
const defaultPageSize = breakdownByWorkspace ? 10 : 100
const [pageIndex, setPageIndex] = useState(0)
const [pageSize, setPageSize] = useState(defaultPageSize)
const pagination = {
pageIndex,
pageSize,
pageSizeOptions: breakdownByWorkspace
? [defaultPageSize, 100, 250]
: [defaultPageSize, 250, 500],
}
const onTableChange = ({ page }: Criteria<BlobStatus>) => {
if (page) {
const { index: pageIndex, size: pageSize } = page
setPageIndex(pageIndex)
setPageSize(pageSize)
}
}
return (
<EuiInMemoryTable
tableCaption="ingestion events"
items={blobs}
itemId={blobStatusId}
loading={blobs === undefined}
columns={columnsWithExpandingRow}
sorting={true}
itemIdToExpandedRowMap={itemIdToExpandedRowMap}
pagination={pagination}
onTableChange={onTableChange}
/>
)
}
export function IngestionEvents({
collectionId,
ingestId,
workspaces,
breakdownByWorkspace,
showErrorsOnly,
}: {
collectionId: string
ingestId?: string
workspaces: WorkspaceMetadata[]
breakdownByWorkspace: boolean
showErrorsOnly: boolean
}) {
const [blobs, updateBlobs] = useState<BlobStatus[] | undefined>(undefined)
const [tableData, setTableData] = useState<IngestionTable[]>([])
const ingestIdSuffix = ingestId && ingestId !== "all" ? `/${ingestId}` : ""
// Expanding rows logic - we use itemIdToExpandedRowMap to keep track of which rows have been expanded
const [itemIdToExpandedRowMap, setItemIdToExpandedRowMap] = useState<
Record<string, ReactNode>
>({})
const openRow = (blobStatus: BlobStatus) => {
const map = { ...itemIdToExpandedRowMap }
const id = blobStatusId(blobStatus)
map[id] = renderExpandedRow(blobStatus)
setItemIdToExpandedRowMap(map)
}
const closeRow = (blobStatus: BlobStatus) => {
const map = { ...itemIdToExpandedRowMap }
delete map[blobStatusId(blobStatus)]
setItemIdToExpandedRowMap(map)
}
const columnsWithWorkspace = breakdownByWorkspace
? columns
: columns.concat({
field: "workspaceName",
sortable: true,
name: "Workspace name",
})
const columnsWithExpandingRow: Array<EuiBasicTableColumn<BlobStatus>> = [
...columnsWithWorkspace,
statusIconColumn,
{
align: "right",
width: "40px",
isExpander: true,
name: (
<EuiScreenReaderOnly>
<span>Expand rows</span>
</EuiScreenReaderOnly>
),
render: (row: BlobStatus) => (
<EuiButtonIcon
onClick={() =>
itemIdToExpandedRowMap[blobStatusId(row)]
? closeRow(row)
: openRow(row)
}
aria-label={
itemIdToExpandedRowMap[blobStatusId(row)]
? "Collapse"
: "Expand"
}
iconType={
itemIdToExpandedRowMap[blobStatusId(row)]
? "arrowDown"
: "arrowRight"
}
/>
),
},
]
useEffect(() => {
authFetch(`/api/ingestion-events/${collectionId}${ingestIdSuffix}`)
.then((resp) => resp.json())
.then((json) => {
const blobStatuses: BlobStatus[] = json.map(parseBlobStatus)
updateBlobs(blobStatuses)
})
}, [collectionId, ingestId, updateBlobs, ingestIdSuffix])
const getWorkspaceBlobs = (
allBlobs: BlobStatus[],
workspaceName: string,
errorsOnly: boolean | undefined
) => {
const workspaceBlobs = allBlobs.filter(
(b) => b.workspaceName === workspaceName
)
if (errorsOnly) return getFailedBlobs(workspaceBlobs)
return workspaceBlobs
}
useEffect(() => {
if (blobs) {
if (breakdownByWorkspace) {
setTableData(
workspaces.map((w: WorkspaceMetadata) => ({
title: `Workspace: ${w.name}`,
blobs: getWorkspaceBlobs(blobs, w.name, showErrorsOnly),
}))
)
} else {
setTableData([
{
title: `${collectionId}${ingestIdSuffix}`,
blobs: showErrorsOnly ? getFailedBlobs(blobs) : blobs,
},
])
}
} else {
setTableData([])
}
}, [
breakdownByWorkspace,
blobs,
workspaces,
ingestIdSuffix,
collectionId,
showErrorsOnly,
setItemIdToExpandedRowMap,
])
return (
<>
{tableData.map((t: IngestionTable) => (
<div key={t.title}>
<EuiSpacer size={"m"} />
<h1>{t.title}</h1>
<IngestionEventsTable
blobs={t.blobs}
columnsWithExpandingRow={columnsWithExpandingRow}
itemIdToExpandedRowMap={itemIdToExpandedRowMap}
breakdownByWorkspace={breakdownByWorkspace}
/>
</div>
))}
</>
)
}