getManifestImportedDatasetsList()

in source/api/services/package/lib/content-package.js [719:790]


                getManifestImportedDatasetsList(packageId, defaultTarget, function(err, crawlerFilter) {
                    if (err) {
                        return cb(err, null);
                    }

                    let glue = new AWS.Glue();
                    let glueNames = getGlueNames(packageName, packageId);
                    let params = {Name: glueNames.crawler};
                    glue.getCrawler(params, function(err, data) {
                        let crawlerData = {
                            DatabaseName: glueNames.database,
                            Name: glueNames.crawler,
                            Role: process.env.CRAWLER_ROLE_ARN,
                            Targets: {S3Targets: [{Path: defaultTarget}]},
                            Description: 'Glue crawler that creates tables based on S3 DataLake resources',
                            Schedule: 'cron(0 0 * * ? *)',
                            Configuration: '{ "Version": 1.0, "CrawlerOutput": { "Partitions": { "AddOrUpdateBehavior": "InheritFromTable" } } }',
                            SchemaChangePolicy: {
                              DeleteBehavior: 'DELETE_FROM_DATABASE',
                              UpdateBehavior: 'UPDATE_IN_DATABASE'
                            },
                            TablePrefix: glueNames.tablePrefix
                        };
                        crawlerData.Targets.S3Targets[0].Exclusions = crawlerFilter.exclude;
                        crawlerData.Targets.S3Targets = crawlerData.Targets.S3Targets.concat(crawlerFilter.include);

                        if (data && data.Crawler !== undefined) {
                            if (data.Crawler.DatabaseName !== undefined) {
                                crawlerData.DatabaseName = data.Crawler.DatabaseName;
                            }
                            if (data.Crawler.Name !== undefined) {
                                crawlerData.Name = data.Crawler.Name;
                            }
                            if (data.Crawler.Role !== undefined) {
                                crawlerData.Role = data.Crawler.Role;
                            }
                            if (data.Crawler.Description !== undefined) {
                                crawlerData.Description = data.Crawler.Description;
                            }
                            if (data.Crawler.Schedule !== undefined && data.Crawler.Schedule.ScheduleExpression !== undefined) {
                                crawlerData.Schedule = data.Crawler.Schedule.ScheduleExpression;
                            }
                            if (data.Crawler.Configuration !== undefined) {
                                crawlerData.Configuration = data.Crawler.Configuration;
                            }
                            if (data.Crawler.SchemaChangePolicy !== undefined) {
                                crawlerData.SchemaChangePolicy = data.Crawler.SchemaChangePolicy;
                            }
                            if (data.Crawler.TablePrefix !== undefined) {
                                crawlerData.TablePrefix = data.Crawler.TablePrefix;
                            }
                            glue.updateCrawler(crawlerData, function(err, data) {
                                if (err) {
                                    console.log(err);
                                    return cb({code: 502, message: "Failed to update AWS Glue crawler. Check if the is not crawler running, the account limits and if the crawler wasn not deleted while running this request."}, null);
                                }

                                return cb(null, {code: 200, message: `AWS Glue crawler ${glueNames.database} updated.`});
                            });

                        } else {
                            glue.createCrawler(crawlerData, function(err, data) {
                                if (err) {
                                    console.log(err);
                                    return cb({code: 502, message: "Failed to create AWS Glue crawler. Check account limits and if the name of the package is supported by AWS Glue."}, null);
                                }

                                return cb(null, {code: 200, message: `AWS Glue crawler ${glueNames.database} created.`});
                            });
                        }
                    });
                });