private void deleteUnmarkedEntities()

in metacat-main/src/main/java/com/netflix/metacat/main/services/search/ElasticSearchCatalogTraversalAction.java [98:201]


    private void deleteUnmarkedEntities(final CatalogTraversal.Context context) {
        log.info("Start: Delete unmarked entities");
        //
        // get unmarked qualified names
        // check if it not exists
        // delete
        //
        elasticSearchUtil.refresh();
        final MetacatRequestContext requestContext = MetacatRequestContext.builder().userName("admin").
            clientAppName("metacat-refresh")
            .apiUri("esRefresh")
            .scheme("internal").build();


        final List<DatabaseDto> unmarkedDatabaseDtos = elasticSearchUtil
            .getQualifiedNamesByMarkerByNames("database", context.getQNames(), context.getStartInstant(),
                context.getExcludeQNames(),
                DatabaseDto.class);
        if (!unmarkedDatabaseDtos.isEmpty()) {
            if (unmarkedDatabaseDtos.size() <= config.getElasticSearchThresholdUnmarkedDatabasesDelete()) {
                log.info("Traversal Done: Start: Delete unmarked databases({})", unmarkedDatabaseDtos.size());
                final List<String> unmarkedDatabaseNames = Lists.newArrayList();
                final List<DatabaseDto> deleteDatabaseDtos = unmarkedDatabaseDtos.stream().filter(databaseDto -> {
                    boolean result = false;
                    try {
                        unmarkedDatabaseNames.add(databaseDto.getName().toString());
                        result = !databaseService.exists(databaseDto.getName());
                    } catch (Exception e) {
                        log.warn("Ignoring exception during deleteUnmarkedEntities for {}. Message: {}",
                            databaseDto.getName(), e.getMessage());
                    }
                    return result;
                }).collect(Collectors.toList());
                log.info("Unmarked databases({}): {}", unmarkedDatabaseNames.size(), unmarkedDatabaseNames);
                log.info("Deleting databases({})", deleteDatabaseDtos.size());
                if (!deleteDatabaseDtos.isEmpty()) {
                    final List<QualifiedName> deleteDatabaseQualifiedNames = deleteDatabaseDtos.stream()
                        .map(DatabaseDto::getName)
                        .collect(Collectors.toList());
                    final List<String> deleteDatabaseNames = deleteDatabaseQualifiedNames.stream().map(
                        QualifiedName::toString).collect(Collectors.toList());
                    log.info("Deleting databases({}): {}", deleteDatabaseNames.size(), deleteDatabaseNames);
                    userMetadataService.deleteDefinitionMetadata(deleteDatabaseQualifiedNames);
                    elasticSearchUtil.softDelete("database", deleteDatabaseNames, requestContext);
                }
                log.info("End: Delete unmarked databases({})", unmarkedDatabaseDtos.size());
            } else {
                log.info("Count of unmarked databases({}) is more than the threshold {}", unmarkedDatabaseDtos.size(),
                    config.getElasticSearchThresholdUnmarkedDatabasesDelete());
                registry.counter(
                    registry.createId(Metrics.CounterElasticSearchUnmarkedDatabaseThreshholdReached.getMetricName()))
                    .increment();
            }
        }

        final List<TableDto> unmarkedTableDtos = elasticSearchUtil
            .getQualifiedNamesByMarkerByNames("table",
                context.getQNames(), context.getStartInstant(), context.getExcludeQNames(), TableDto.class);
        if (!unmarkedTableDtos.isEmpty()) {
            if (unmarkedTableDtos.size() <= config.getElasticSearchThresholdUnmarkedTablesDelete()) {
                log.info("Start: Delete unmarked tables({})", unmarkedTableDtos.size());
                final List<String> unmarkedTableNames = Lists.newArrayList();
                final List<TableDto> deleteTableDtos = unmarkedTableDtos.stream().filter(tableDto -> {
                    boolean result = false;
                    try {
                        unmarkedTableNames.add(tableDto.getName().toString());
                        result = !tableService.exists(tableDto.getName());
                    } catch (Exception e) {
                        log.warn("Ignoring exception during deleteUnmarkedEntities for {}. Message: {}",
                            tableDto.getName(), e.getMessage());
                    }
                    return result;
                }).collect(Collectors.toList());
                log.info("Unmarked tables({}): {}", unmarkedTableNames.size(), unmarkedTableNames);
                log.info("Deleting tables({})", deleteTableDtos.size());
                if (!deleteTableDtos.isEmpty()) {
                    final List<String> deleteTableNames = deleteTableDtos.stream().map(
                        dto -> dto.getName().toString()).collect(Collectors.toList());
                    log.info("Deleting tables({}): {}", deleteTableNames.size(), deleteTableNames);
                    userMetadataService.deleteMetadata("admin", Lists.newArrayList(deleteTableDtos));

                    // Publish event. Elasticsearch event handler will take care of updating the index already
                    // TODO: Re-evaluate events vs. direct calls for these types of situations like in Genie
                    deleteTableDtos.forEach(
                        tableDto -> {
                            tagService.delete(tableDto.getName(), false);
                            this.eventBus.post(
                                new MetacatDeleteTablePostEvent(tableDto.getName(), requestContext, this, tableDto)
                            );
                        }
                    );
                }
                log.info("Traversal Done: End: Delete unmarked tables({})", unmarkedTableDtos.size());
            } else {
                log.info("Count of unmarked tables({}) is more than the threshold {}", unmarkedTableDtos.size(),
                    config.getElasticSearchThresholdUnmarkedTablesDelete());
                registry.counter(
                    registry.createId(Metrics.CounterElasticSearchUnmarkedTableThreshholdReached.getMetricName()))
                    .increment();

            }
        }
        log.info("End: Delete unmarked entities");
    }