cloudrun-malware-scanner/scanner.ts (316 lines of code) (raw):
/*
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import {logger} from './logger';
import {Config, BucketDefs} from './config.js';
import * as gcs from '@google-cloud/storage';
import * as ClamdClient from 'clamdjs';
import * as metrics from './metrics';
type MetricsClient = typeof metrics;
const CLAMD_HOST = '127.0.0.1';
const CLAMD_PORT = 3310;
// 10 min timeout for scanning.
const CLAMD_TIMEOUT = 600000;
// Note: MAX_FILE_SIZE limits the size of files which are sent to th
// ClamAV Daemon.
//
// ClamAV itself has internal limits, which apply both to the total file
// size, and to the size of compressed files inside file containers.
// These are set in the clamd.conf file by bootstrap.sh
//
// Note scanning a 500MiB file can take 5 minutes, so ensure timeout is
// large enough.
const MAX_FILE_SIZE = 500000000; // 500MiB
export interface StorageObjectData {
name: string;
bucket: string;
size: string | number;
}
export interface ScanResponse {
status: string;
message: string;
clam_version?: string;
}
export class Scanner {
config: Config;
clamdClient: typeof ClamdClient;
storageClient: gcs.Storage;
metricsClient: MetricsClient;
clamdScanStream: ClamdClient.ScanStreamFunc;
constructor(
config: Config,
clamdClient: typeof ClamdClient,
storageClient: gcs.Storage,
metricsClient: MetricsClient,
) {
this.config = config;
this.clamdClient = clamdClient;
this.storageClient = storageClient;
this.metricsClient = metricsClient;
this.clamdScanStream = clamdClient.createScanner(
CLAMD_HOST,
CLAMD_PORT,
).scanStream;
}
async getClamVersion(): Promise<string> {
return (await this.clamdClient.version(CLAMD_HOST, CLAMD_PORT)).replace(
'\x00',
'',
);
}
async pingClamD(): Promise<void> {
if (!(await this.clamdClient.ping(CLAMD_HOST, CLAMD_PORT))) {
// ping can return false, or throw...
throw new Error('clamd PING failed');
}
}
validateStorageObject(storageObject: StorageObjectData): void {
if (storageObject == null) {
throw new Error('No storage object in request');
}
if (storageObject?.name == null) {
throw new Error('file name not specified in request');
}
if (storageObject?.bucket == null) {
throw new Error('bucket name not specified in request');
}
// file.size can be 0, which is falsey and == null, so check with ===
if (storageObject?.size === null || storageObject?.size === undefined) {
throw new Error('object size not specified in request');
}
}
async handleGcsObject(
storageObject: StorageObjectData,
): Promise<ScanResponse> {
try {
let bucketDefs: BucketDefs | undefined;
try {
this.validateStorageObject(storageObject);
bucketDefs = this.config.buckets.filter(
(bucketDefs) => bucketDefs.unscanned === storageObject.bucket,
)[0];
if (bucketDefs == null) {
throw new Error(
`Request has bucket name ${storageObject.bucket} which is not an unscanned bucket in config`,
);
}
} catch (e) {
logger.error(`Ignoring request: ${e as Error}`);
this.metricsClient.writeScanFailed();
return {message: 'ignoring invalid request', status: 'ignored'};
}
// Check for zero length file:
const fileSize = parseInt(String(storageObject.size));
if (fileSize === 0 && this.config.ignoreZeroLengthFiles) {
logger.info(
{
scanStatus: {
bucket: storageObject.bucket,
file: storageObject.name,
fileSize,
status: 'ignored',
result: 'zero length file',
},
},
`Scan status for gs://${storageObject.bucket}/${storageObject.name}: IGNORED (zero length file})`,
);
this.metricsClient.writeScanIgnored(
bucketDefs.unscanned,
bucketDefs.clean,
fileSize,
'ZERO_LENGTH_FILE',
);
return {status: 'ignored', message: 'zero_length_file'};
}
// Check if the file is too big to process
if (fileSize > MAX_FILE_SIZE) {
logger.info(
{
scanStatus: {
bucket: storageObject.bucket,
file: storageObject.name,
fileSize,
status: 'ignored',
result: 'file too large',
},
},
`Scan status for gs://${storageObject.bucket}/${storageObject.name}: IGNORED (file too large at ${fileSize} bytes})`,
);
this.metricsClient.writeScanIgnored(
bucketDefs.unscanned,
bucketDefs.clean,
fileSize,
'FILE_TOO_LARGE',
);
return {status: 'ignored', message: 'file_too_large'};
}
// Check if filename is excluded:
for (const regexp of this.config.fileExclusionRegexps) {
if (regexp.test(storageObject.name)) {
logger.info(
{
scanStatus: {
bucket: storageObject.bucket,
file: storageObject.name,
fileSize,
status: 'ignored',
result: 'file excluded by regex',
},
},
`Scan status for gs://${storageObject.bucket}/${storageObject.name}: IGNORED (matched regex: ${regexp.toString()})`,
);
this.metricsClient.writeScanIgnored(
bucketDefs.unscanned,
bucketDefs.clean,
fileSize,
'REGEXP_MATCH',
regexp.toString(),
);
return {status: 'ignored', message: 'exclusion_regexp_match'};
}
}
// Validate file exists
const gcsFile = this.storageClient
.bucket(storageObject.bucket)
.file(storageObject.name);
// File.exists() returns a FileExistsResponse, which is a list with a
// single value.
if (!(await gcsFile.exists())[0]) {
// Warn in logs, but return successful to client.
logger.warn(
`Ignoring no longer existing file: ${gcsFile.cloudStorageURI.href}`,
);
return {status: 'ignored', message: 'file deleted'};
}
const [metadata] = await gcsFile.getMetadata();
const metadataSize = parseInt(String(metadata.size));
if (fileSize !== metadataSize) {
logger.info(
{
scanStatus: {
bucket: storageObject.bucket,
file: storageObject.name,
fileSize,
status: 'ignored',
result: 'file size mismatch',
},
},
`Scan status for ${gcsFile.cloudStorageURI.href}: IGNORED (File size mismatch (reported: ${fileSize}, metadata: ${metadataSize}). File upload may not be complete).`,
);
this.metricsClient.writeScanIgnored(
bucketDefs.unscanned,
bucketDefs.clean,
fileSize,
'FILE_SIZE_MISMATCH',
);
return {status: 'ignored', message: 'file_size_mismatch'};
}
const clamdVersion = await this.getClamVersion();
logger.info(
`Scan request for ${gcsFile.cloudStorageURI.href}, (${fileSize} bytes) scanning with clam ${clamdVersion}`,
);
const startTime = Date.now();
const readStream = gcsFile.createReadStream();
let result;
try {
result = await this.clamdScanStream(readStream, CLAMD_TIMEOUT);
} finally {
// Ensure stream is destroyed in all situations to prevent any
// resource leaks.
readStream.destroy();
}
const scanDuration = Date.now() - startTime;
let isClean = this.clamdClient.isCleanReply(result);
if (isClean && this.config.quarantine.fileExtensionAllowList.length > 0) {
if (
!this.isExtensionInList(
storageObject.name,
this.config.quarantine.fileExtensionAllowList,
)
) {
// mimic ClamAV's response format
result = 'stream: Config.AllowList.Blocked FOUND';
isClean = false;
}
}
if (isClean && this.config.quarantine.fileExtensionDenyList.length > 0) {
if (
this.isExtensionInList(
storageObject.name,
this.config.quarantine.fileExtensionDenyList,
)
) {
// mimic ClamAV's response format
result = 'stream: Config.DenyList.Blocked FOUND';
isClean = false;
}
}
if (isClean) {
logger.info(
{
scanStatus: {
bucket: storageObject.bucket,
file: storageObject.name,
fileSize,
scanDuration,
status: 'clean',
clamdVersion,
result,
},
},
`Scan status for ${gcsFile.cloudStorageURI.href}: CLEAN (${fileSize} bytes in ${scanDuration} ms)`,
);
this.metricsClient.writeScanClean(
bucketDefs.unscanned,
bucketDefs.clean,
fileSize,
scanDuration,
clamdVersion,
);
await this.moveProcessedFile(gcsFile, bucketDefs.clean);
return {
status: 'clean',
clam_version: clamdVersion,
message: 'scan_success',
};
} else {
logger.warn(
{
scanStatus: {
bucket: storageObject.bucket,
file: storageObject.name,
fileSize,
scanDuration,
status: 'infected',
clamdVersion,
result,
},
},
`Scan status for ${gcsFile.cloudStorageURI.href}: INFECTED ${result} (${fileSize} bytes in ${scanDuration} ms)`,
);
this.metricsClient.writeScanInfected(
bucketDefs.unscanned,
bucketDefs.quarantined,
fileSize,
scanDuration,
clamdVersion,
);
await this.moveProcessedFile(gcsFile, bucketDefs.quarantined);
return {
message: result,
status: 'infected',
clam_version: clamdVersion,
};
}
} catch (e) {
logger.error(
{err: e},
`Exception when processing gs://${storageObject.bucket}/${storageObject.name}: ${e as Error}`,
);
// Check for an API error code
const errcode = (e as gcs.ApiError).code;
if (errcode && [403, 404].includes(errcode)) {
// Permission denied/file not found can be raised by the stream reading
// and by the object move. They cannot be retried, so respond
// with success, but log the error.
return {status: 'error', message: 'Error when reading file, ignoring'};
}
this.metricsClient.writeScanFailed(storageObject.bucket);
throw e;
}
}
/**
* Does filename have an extension that is included in extensionList
*/
isExtensionInList(fileName: string, extensionList: string[]): boolean {
for (let i = 0; i < extensionList.length; i++) {
if (extensionList[i].length === 0) {
// Matches files which do not have an extension
if (fileName.indexOf('.') < 0) {
// file does not have an extension.
return true;
}
} else {
if (fileName.toLocaleLowerCase().endsWith(extensionList[i])) {
return true;
}
}
}
return false;
}
async moveProcessedFile(
srcfile: gcs.File,
destinationBucketName: string,
): Promise<void> {
const destinationBucket = this.storageClient.bucket(destinationBucketName);
await srcfile.move(destinationBucket);
logger.info(
`Successfully moved file ${srcfile.cloudStorageURI.href} to gs://${destinationBucketName}/${srcfile.name}`,
);
}
}