in source/api/services/package/lib/content-package.js [719:790]
getManifestImportedDatasetsList(packageId, defaultTarget, function(err, crawlerFilter) {
if (err) {
return cb(err, null);
}
let glue = new AWS.Glue();
let glueNames = getGlueNames(packageName, packageId);
let params = {Name: glueNames.crawler};
glue.getCrawler(params, function(err, data) {
let crawlerData = {
DatabaseName: glueNames.database,
Name: glueNames.crawler,
Role: process.env.CRAWLER_ROLE_ARN,
Targets: {S3Targets: [{Path: defaultTarget}]},
Description: 'Glue crawler that creates tables based on S3 DataLake resources',
Schedule: 'cron(0 0 * * ? *)',
Configuration: '{ "Version": 1.0, "CrawlerOutput": { "Partitions": { "AddOrUpdateBehavior": "InheritFromTable" } } }',
SchemaChangePolicy: {
DeleteBehavior: 'DELETE_FROM_DATABASE',
UpdateBehavior: 'UPDATE_IN_DATABASE'
},
TablePrefix: glueNames.tablePrefix
};
crawlerData.Targets.S3Targets[0].Exclusions = crawlerFilter.exclude;
crawlerData.Targets.S3Targets = crawlerData.Targets.S3Targets.concat(crawlerFilter.include);
if (data && data.Crawler !== undefined) {
if (data.Crawler.DatabaseName !== undefined) {
crawlerData.DatabaseName = data.Crawler.DatabaseName;
}
if (data.Crawler.Name !== undefined) {
crawlerData.Name = data.Crawler.Name;
}
if (data.Crawler.Role !== undefined) {
crawlerData.Role = data.Crawler.Role;
}
if (data.Crawler.Description !== undefined) {
crawlerData.Description = data.Crawler.Description;
}
if (data.Crawler.Schedule !== undefined && data.Crawler.Schedule.ScheduleExpression !== undefined) {
crawlerData.Schedule = data.Crawler.Schedule.ScheduleExpression;
}
if (data.Crawler.Configuration !== undefined) {
crawlerData.Configuration = data.Crawler.Configuration;
}
if (data.Crawler.SchemaChangePolicy !== undefined) {
crawlerData.SchemaChangePolicy = data.Crawler.SchemaChangePolicy;
}
if (data.Crawler.TablePrefix !== undefined) {
crawlerData.TablePrefix = data.Crawler.TablePrefix;
}
glue.updateCrawler(crawlerData, function(err, data) {
if (err) {
console.log(err);
return cb({code: 502, message: "Failed to update AWS Glue crawler. Check if the is not crawler running, the account limits and if the crawler wasn not deleted while running this request."}, null);
}
return cb(null, {code: 200, message: `AWS Glue crawler ${glueNames.database} updated.`});
});
} else {
glue.createCrawler(crawlerData, function(err, data) {
if (err) {
console.log(err);
return cb({code: 502, message: "Failed to create AWS Glue crawler. Check account limits and if the name of the package is supported by AWS Glue."}, null);
}
return cb(null, {code: 200, message: `AWS Glue crawler ${glueNames.database} created.`});
});
}
});
});