public List cleanupAndGetValidFiles()

in hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFileManager.java [98:207]


    public List<LSMComponentFileReferences> cleanupAndGetValidFiles() throws HyracksDataException, IndexException {
        List<LSMComponentFileReferences> validFiles = new ArrayList<LSMComponentFileReferences>();
        ArrayList<ComparableFileName> allDictBTreeFiles = new ArrayList<ComparableFileName>();
        ArrayList<ComparableFileName> allInvListsFiles = new ArrayList<ComparableFileName>();
        ArrayList<ComparableFileName> allDeletedKeysBTreeFiles = new ArrayList<ComparableFileName>();
        ArrayList<ComparableFileName> allBloomFilterFiles = new ArrayList<ComparableFileName>();

        // Gather files.
        cleanupAndGetValidFilesInternal(deletedKeysBTreeFilter, btreeFactory, allDeletedKeysBTreeFiles);
        HashSet<String> deletedKeysBTreeFilesSet = new HashSet<String>();
        for (ComparableFileName cmpFileName : allDeletedKeysBTreeFiles) {
            int index = cmpFileName.fileName.lastIndexOf(SPLIT_STRING);
            deletedKeysBTreeFilesSet.add(cmpFileName.fileName.substring(0, index));
        }

        // TODO: do we really need to validate the inverted lists files or is validating the dict. BTrees is enough?
        validateFiles(deletedKeysBTreeFilesSet, allInvListsFiles, invListFilter, null);
        validateFiles(deletedKeysBTreeFilesSet, allDictBTreeFiles, dictBTreeFilter, btreeFactory);
        validateFiles(deletedKeysBTreeFilesSet, allBloomFilterFiles, bloomFilterFilter, null);

        // Sanity check.
        if (allDictBTreeFiles.size() != allInvListsFiles.size()
                || allDictBTreeFiles.size() != allDeletedKeysBTreeFiles.size()
                || allDictBTreeFiles.size() != allBloomFilterFiles.size()) {
            throw new HyracksDataException(
                    "Unequal number of valid Dictionary BTree, Inverted Lists, Deleted BTree, and Bloom Filter files found. Aborting cleanup.");
        }

        // Trivial cases.
        if (allDictBTreeFiles.isEmpty() || allInvListsFiles.isEmpty() || allDeletedKeysBTreeFiles.isEmpty()
                || allBloomFilterFiles.isEmpty()) {
            return validFiles;
        }

        if (allDictBTreeFiles.size() == 1 && allInvListsFiles.size() == 1 && allDeletedKeysBTreeFiles.size() == 1
                && allBloomFilterFiles.size() == 1) {
            validFiles.add(new LSMComponentFileReferences(allDictBTreeFiles.get(0).fileRef, allDeletedKeysBTreeFiles
                    .get(0).fileRef, allBloomFilterFiles.get(0).fileRef));
            return validFiles;
        }

        // Sorts files names from earliest to latest timestamp.
        Collections.sort(allDeletedKeysBTreeFiles);
        Collections.sort(allDictBTreeFiles);
        Collections.sort(allBloomFilterFiles);

        List<ComparableFileName> validComparableDictBTreeFiles = new ArrayList<ComparableFileName>();
        ComparableFileName lastDictBTree = allDictBTreeFiles.get(0);
        validComparableDictBTreeFiles.add(lastDictBTree);

        List<ComparableFileName> validComparableDeletedKeysBTreeFiles = new ArrayList<ComparableFileName>();
        ComparableFileName lastDeletedKeysBTree = allDeletedKeysBTreeFiles.get(0);
        validComparableDeletedKeysBTreeFiles.add(lastDeletedKeysBTree);

        List<ComparableFileName> validComparableBloomFilterFiles = new ArrayList<ComparableFileName>();
        ComparableFileName lastBloomFilter = allBloomFilterFiles.get(0);
        validComparableBloomFilterFiles.add(lastBloomFilter);

        for (int i = 1; i < allDictBTreeFiles.size(); i++) {
            ComparableFileName currentDeletedKeysBTree = allDeletedKeysBTreeFiles.get(i);
            ComparableFileName CurrentDictBTree = allDictBTreeFiles.get(i);
            ComparableFileName currentBloomFilter = allBloomFilterFiles.get(i);
            // Current start timestamp is greater than last stop timestamp.
            if (currentDeletedKeysBTree.interval[0].compareTo(lastDeletedKeysBTree.interval[1]) > 0
                    && CurrentDictBTree.interval[0].compareTo(lastDictBTree.interval[1]) > 0
                    && currentBloomFilter.interval[0].compareTo(lastBloomFilter.interval[1]) > 0) {
                validComparableDictBTreeFiles.add(CurrentDictBTree);
                validComparableDeletedKeysBTreeFiles.add(currentDeletedKeysBTree);
                validComparableBloomFilterFiles.add(currentBloomFilter);
                lastDictBTree = CurrentDictBTree;
                lastDeletedKeysBTree = currentDeletedKeysBTree;
                lastBloomFilter = currentBloomFilter;
            } else if (currentDeletedKeysBTree.interval[0].compareTo(lastDeletedKeysBTree.interval[0]) >= 0
                    && currentDeletedKeysBTree.interval[1].compareTo(lastDeletedKeysBTree.interval[1]) <= 0
                    && CurrentDictBTree.interval[0].compareTo(lastDictBTree.interval[0]) >= 0
                    && CurrentDictBTree.interval[1].compareTo(lastDictBTree.interval[1]) <= 0
                    && currentBloomFilter.interval[0].compareTo(lastBloomFilter.interval[0]) >= 0
                    && currentBloomFilter.interval[1].compareTo(lastBloomFilter.interval[1]) <= 0) {
                // Invalid files are completely contained in last interval.
                File invalidDeletedBTreeFile = new File(currentDeletedKeysBTree.fullPath);
                invalidDeletedBTreeFile.delete();
                File invalidDictBTreeFile = new File(CurrentDictBTree.fullPath);
                invalidDictBTreeFile.delete();
                File invalidBloomFilterFile = new File(currentBloomFilter.fullPath);
                invalidBloomFilterFile.delete();
            } else {
                // This scenario should not be possible.
                throw new HyracksDataException("Found LSM files with overlapping but not contained timetamp intervals.");
            }
        }

        // Sort valid files in reverse lexicographical order, such that newer
        // files come first.
        Collections.sort(validComparableDictBTreeFiles, recencyCmp);
        Collections.sort(validComparableDeletedKeysBTreeFiles, recencyCmp);
        Collections.sort(validComparableBloomFilterFiles, recencyCmp);

        Iterator<ComparableFileName> dictBTreeFileIter = validComparableDictBTreeFiles.iterator();
        Iterator<ComparableFileName> deletedKeysBTreeIter = validComparableDeletedKeysBTreeFiles.iterator();
        Iterator<ComparableFileName> bloomFilterFileIter = validComparableBloomFilterFiles.iterator();
        while (dictBTreeFileIter.hasNext() && deletedKeysBTreeIter.hasNext()) {
            ComparableFileName cmpDictBTreeFile = dictBTreeFileIter.next();
            ComparableFileName cmpDeletedKeysBTreeFile = deletedKeysBTreeIter.next();
            ComparableFileName cmpBloomFilterFileName = bloomFilterFileIter.next();
            validFiles.add(new LSMComponentFileReferences(cmpDictBTreeFile.fileRef, cmpDeletedKeysBTreeFile.fileRef,
                    cmpBloomFilterFileName.fileRef));
        }

        return validFiles;
    }