cloudrun-malware-scanner/scanner.ts (316 lines of code) (raw):

/* * Copyright 2024 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * https://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import {logger} from './logger'; import {Config, BucketDefs} from './config.js'; import * as gcs from '@google-cloud/storage'; import * as ClamdClient from 'clamdjs'; import * as metrics from './metrics'; type MetricsClient = typeof metrics; const CLAMD_HOST = '127.0.0.1'; const CLAMD_PORT = 3310; // 10 min timeout for scanning. const CLAMD_TIMEOUT = 600000; // Note: MAX_FILE_SIZE limits the size of files which are sent to th // ClamAV Daemon. // // ClamAV itself has internal limits, which apply both to the total file // size, and to the size of compressed files inside file containers. // These are set in the clamd.conf file by bootstrap.sh // // Note scanning a 500MiB file can take 5 minutes, so ensure timeout is // large enough. const MAX_FILE_SIZE = 500000000; // 500MiB export interface StorageObjectData { name: string; bucket: string; size: string | number; } export interface ScanResponse { status: string; message: string; clam_version?: string; } export class Scanner { config: Config; clamdClient: typeof ClamdClient; storageClient: gcs.Storage; metricsClient: MetricsClient; clamdScanStream: ClamdClient.ScanStreamFunc; constructor( config: Config, clamdClient: typeof ClamdClient, storageClient: gcs.Storage, metricsClient: MetricsClient, ) { this.config = config; this.clamdClient = clamdClient; this.storageClient = storageClient; this.metricsClient = metricsClient; this.clamdScanStream = clamdClient.createScanner( CLAMD_HOST, CLAMD_PORT, ).scanStream; } async getClamVersion(): Promise<string> { return (await this.clamdClient.version(CLAMD_HOST, CLAMD_PORT)).replace( '\x00', '', ); } async pingClamD(): Promise<void> { if (!(await this.clamdClient.ping(CLAMD_HOST, CLAMD_PORT))) { // ping can return false, or throw... throw new Error('clamd PING failed'); } } validateStorageObject(storageObject: StorageObjectData): void { if (storageObject == null) { throw new Error('No storage object in request'); } if (storageObject?.name == null) { throw new Error('file name not specified in request'); } if (storageObject?.bucket == null) { throw new Error('bucket name not specified in request'); } // file.size can be 0, which is falsey and == null, so check with === if (storageObject?.size === null || storageObject?.size === undefined) { throw new Error('object size not specified in request'); } } async handleGcsObject( storageObject: StorageObjectData, ): Promise<ScanResponse> { try { let bucketDefs: BucketDefs | undefined; try { this.validateStorageObject(storageObject); bucketDefs = this.config.buckets.filter( (bucketDefs) => bucketDefs.unscanned === storageObject.bucket, )[0]; if (bucketDefs == null) { throw new Error( `Request has bucket name ${storageObject.bucket} which is not an unscanned bucket in config`, ); } } catch (e) { logger.error(`Ignoring request: ${e as Error}`); this.metricsClient.writeScanFailed(); return {message: 'ignoring invalid request', status: 'ignored'}; } // Check for zero length file: const fileSize = parseInt(String(storageObject.size)); if (fileSize === 0 && this.config.ignoreZeroLengthFiles) { logger.info( { scanStatus: { bucket: storageObject.bucket, file: storageObject.name, fileSize, status: 'ignored', result: 'zero length file', }, }, `Scan status for gs://${storageObject.bucket}/${storageObject.name}: IGNORED (zero length file})`, ); this.metricsClient.writeScanIgnored( bucketDefs.unscanned, bucketDefs.clean, fileSize, 'ZERO_LENGTH_FILE', ); return {status: 'ignored', message: 'zero_length_file'}; } // Check if the file is too big to process if (fileSize > MAX_FILE_SIZE) { logger.info( { scanStatus: { bucket: storageObject.bucket, file: storageObject.name, fileSize, status: 'ignored', result: 'file too large', }, }, `Scan status for gs://${storageObject.bucket}/${storageObject.name}: IGNORED (file too large at ${fileSize} bytes})`, ); this.metricsClient.writeScanIgnored( bucketDefs.unscanned, bucketDefs.clean, fileSize, 'FILE_TOO_LARGE', ); return {status: 'ignored', message: 'file_too_large'}; } // Check if filename is excluded: for (const regexp of this.config.fileExclusionRegexps) { if (regexp.test(storageObject.name)) { logger.info( { scanStatus: { bucket: storageObject.bucket, file: storageObject.name, fileSize, status: 'ignored', result: 'file excluded by regex', }, }, `Scan status for gs://${storageObject.bucket}/${storageObject.name}: IGNORED (matched regex: ${regexp.toString()})`, ); this.metricsClient.writeScanIgnored( bucketDefs.unscanned, bucketDefs.clean, fileSize, 'REGEXP_MATCH', regexp.toString(), ); return {status: 'ignored', message: 'exclusion_regexp_match'}; } } // Validate file exists const gcsFile = this.storageClient .bucket(storageObject.bucket) .file(storageObject.name); // File.exists() returns a FileExistsResponse, which is a list with a // single value. if (!(await gcsFile.exists())[0]) { // Warn in logs, but return successful to client. logger.warn( `Ignoring no longer existing file: ${gcsFile.cloudStorageURI.href}`, ); return {status: 'ignored', message: 'file deleted'}; } const [metadata] = await gcsFile.getMetadata(); const metadataSize = parseInt(String(metadata.size)); if (fileSize !== metadataSize) { logger.info( { scanStatus: { bucket: storageObject.bucket, file: storageObject.name, fileSize, status: 'ignored', result: 'file size mismatch', }, }, `Scan status for ${gcsFile.cloudStorageURI.href}: IGNORED (File size mismatch (reported: ${fileSize}, metadata: ${metadataSize}). File upload may not be complete).`, ); this.metricsClient.writeScanIgnored( bucketDefs.unscanned, bucketDefs.clean, fileSize, 'FILE_SIZE_MISMATCH', ); return {status: 'ignored', message: 'file_size_mismatch'}; } const clamdVersion = await this.getClamVersion(); logger.info( `Scan request for ${gcsFile.cloudStorageURI.href}, (${fileSize} bytes) scanning with clam ${clamdVersion}`, ); const startTime = Date.now(); const readStream = gcsFile.createReadStream(); let result; try { result = await this.clamdScanStream(readStream, CLAMD_TIMEOUT); } finally { // Ensure stream is destroyed in all situations to prevent any // resource leaks. readStream.destroy(); } const scanDuration = Date.now() - startTime; let isClean = this.clamdClient.isCleanReply(result); if (isClean && this.config.quarantine.fileExtensionAllowList.length > 0) { if ( !this.isExtensionInList( storageObject.name, this.config.quarantine.fileExtensionAllowList, ) ) { // mimic ClamAV's response format result = 'stream: Config.AllowList.Blocked FOUND'; isClean = false; } } if (isClean && this.config.quarantine.fileExtensionDenyList.length > 0) { if ( this.isExtensionInList( storageObject.name, this.config.quarantine.fileExtensionDenyList, ) ) { // mimic ClamAV's response format result = 'stream: Config.DenyList.Blocked FOUND'; isClean = false; } } if (isClean) { logger.info( { scanStatus: { bucket: storageObject.bucket, file: storageObject.name, fileSize, scanDuration, status: 'clean', clamdVersion, result, }, }, `Scan status for ${gcsFile.cloudStorageURI.href}: CLEAN (${fileSize} bytes in ${scanDuration} ms)`, ); this.metricsClient.writeScanClean( bucketDefs.unscanned, bucketDefs.clean, fileSize, scanDuration, clamdVersion, ); await this.moveProcessedFile(gcsFile, bucketDefs.clean); return { status: 'clean', clam_version: clamdVersion, message: 'scan_success', }; } else { logger.warn( { scanStatus: { bucket: storageObject.bucket, file: storageObject.name, fileSize, scanDuration, status: 'infected', clamdVersion, result, }, }, `Scan status for ${gcsFile.cloudStorageURI.href}: INFECTED ${result} (${fileSize} bytes in ${scanDuration} ms)`, ); this.metricsClient.writeScanInfected( bucketDefs.unscanned, bucketDefs.quarantined, fileSize, scanDuration, clamdVersion, ); await this.moveProcessedFile(gcsFile, bucketDefs.quarantined); return { message: result, status: 'infected', clam_version: clamdVersion, }; } } catch (e) { logger.error( {err: e}, `Exception when processing gs://${storageObject.bucket}/${storageObject.name}: ${e as Error}`, ); // Check for an API error code const errcode = (e as gcs.ApiError).code; if (errcode && [403, 404].includes(errcode)) { // Permission denied/file not found can be raised by the stream reading // and by the object move. They cannot be retried, so respond // with success, but log the error. return {status: 'error', message: 'Error when reading file, ignoring'}; } this.metricsClient.writeScanFailed(storageObject.bucket); throw e; } } /** * Does filename have an extension that is included in extensionList */ isExtensionInList(fileName: string, extensionList: string[]): boolean { for (let i = 0; i < extensionList.length; i++) { if (extensionList[i].length === 0) { // Matches files which do not have an extension if (fileName.indexOf('.') < 0) { // file does not have an extension. return true; } } else { if (fileName.toLocaleLowerCase().endsWith(extensionList[i])) { return true; } } } return false; } async moveProcessedFile( srcfile: gcs.File, destinationBucketName: string, ): Promise<void> { const destinationBucket = this.storageClient.bucket(destinationBucketName); await srcfile.move(destinationBucket); logger.info( `Successfully moved file ${srcfile.cloudStorageURI.href} to gs://${destinationBucketName}/${srcfile.name}`, ); } }