in reftable/stack.c [1193:1557]
static int stack_compact_range(struct reftable_stack *st,
size_t first, size_t last,
struct reftable_log_expiry_config *expiry,
unsigned int flags)
{
struct reftable_buf tables_list_buf = REFTABLE_BUF_INIT;
struct reftable_buf new_table_name = REFTABLE_BUF_INIT;
struct reftable_buf new_table_path = REFTABLE_BUF_INIT;
struct reftable_buf table_name = REFTABLE_BUF_INIT;
struct reftable_flock tables_list_lock = REFTABLE_FLOCK_INIT;
struct reftable_flock *table_locks = NULL;
struct reftable_tmpfile new_table = REFTABLE_TMPFILE_INIT;
int is_empty_table = 0, err = 0;
size_t first_to_replace, last_to_replace;
size_t i, nlocks = 0;
char **names = NULL;
if (first > last || (!expiry && first == last)) {
err = 0;
goto done;
}
st->stats.attempts++;
/*
* Hold the lock so that we can read "tables.list" and lock all tables
* which are part of the user-specified range.
*/
err = flock_acquire(&tables_list_lock, st->list_file, st->opts.lock_timeout_ms);
if (err < 0) {
if (errno == EEXIST)
err = REFTABLE_LOCK_ERROR;
else
err = REFTABLE_IO_ERROR;
goto done;
}
err = stack_uptodate(st);
if (err)
goto done;
/*
* Lock all tables in the user-provided range. This is the slice of our
* stack which we'll compact.
*
* Note that we lock tables in reverse order from last to first. The
* intent behind this is to allow a newer process to perform best
* effort compaction of tables that it has added in the case where an
* older process is still busy compacting tables which are preexisting
* from the point of view of the newer process.
*/
REFTABLE_ALLOC_ARRAY(table_locks, last - first + 1);
if (!table_locks) {
err = REFTABLE_OUT_OF_MEMORY_ERROR;
goto done;
}
for (i = 0; i < last - first + 1; i++)
table_locks[i] = REFTABLE_FLOCK_INIT;
for (i = last + 1; i > first; i--) {
err = stack_filename(&table_name, st, reftable_table_name(st->tables[i - 1]));
if (err < 0)
goto done;
err = flock_acquire(&table_locks[nlocks], table_name.buf, 0);
if (err < 0) {
/*
* When the table is locked already we may do a
* best-effort compaction and compact only the tables
* that we have managed to lock so far. This of course
* requires that we have been able to lock at least two
* tables, otherwise there would be nothing to compact.
* In that case, we return a lock error to our caller.
*/
if (errno == EEXIST && last - (i - 1) >= 2 &&
flags & STACK_COMPACT_RANGE_BEST_EFFORT) {
err = 0;
/*
* The subtraction is to offset the index, the
* addition is to only compact up to the table
* of the preceding iteration. They obviously
* cancel each other out, but that may be
* non-obvious when it was omitted.
*/
first = (i - 1) + 1;
break;
} else if (errno == EEXIST) {
err = REFTABLE_LOCK_ERROR;
goto done;
} else {
err = REFTABLE_IO_ERROR;
goto done;
}
}
/*
* We need to close the lockfiles as we might otherwise easily
* run into file descriptor exhaustion when we compress a lot
* of tables.
*/
err = flock_close(&table_locks[nlocks++]);
if (err < 0) {
err = REFTABLE_IO_ERROR;
goto done;
}
}
/*
* We have locked all tables in our range and can thus release the
* "tables.list" lock while compacting the locked tables. This allows
* concurrent updates to the stack to proceed.
*/
err = flock_release(&tables_list_lock);
if (err < 0) {
err = REFTABLE_IO_ERROR;
goto done;
}
/*
* Compact the now-locked tables into a new table. Note that compacting
* these tables may end up with an empty new table in case tombstones
* end up cancelling out all refs in that range.
*/
err = stack_compact_locked(st, first, last, expiry, &new_table);
if (err < 0) {
if (err != REFTABLE_EMPTY_TABLE_ERROR)
goto done;
is_empty_table = 1;
}
/*
* Now that we have written the new, compacted table we need to re-lock
* "tables.list". We'll then replace the compacted range of tables with
* the new table.
*/
err = flock_acquire(&tables_list_lock, st->list_file, st->opts.lock_timeout_ms);
if (err < 0) {
if (errno == EEXIST)
err = REFTABLE_LOCK_ERROR;
else
err = REFTABLE_IO_ERROR;
goto done;
}
if (st->opts.default_permissions) {
if (chmod(tables_list_lock.path,
st->opts.default_permissions) < 0) {
err = REFTABLE_IO_ERROR;
goto done;
}
}
/*
* As we have unlocked the stack while compacting our slice of tables
* it may have happened that a concurrently running process has updated
* the stack while we were compacting. In that case, we need to check
* whether the tables that we have just compacted still exist in the
* stack in the exact same order as we have compacted them.
*
* If they do exist, then it is fine to continue and replace those
* tables with our compacted version. If they don't, then we need to
* abort.
*/
err = stack_uptodate(st);
if (err < 0)
goto done;
if (err > 0) {
ssize_t new_offset = -1;
int fd;
fd = open(st->list_file, O_RDONLY);
if (fd < 0) {
err = REFTABLE_IO_ERROR;
goto done;
}
err = fd_read_lines(fd, &names);
close(fd);
if (err < 0)
goto done;
/*
* Search for the offset of the first table that we have
* compacted in the updated "tables.list" file.
*/
for (size_t i = 0; names[i]; i++) {
if (strcmp(names[i], st->tables[first]->name))
continue;
/*
* We have found the first entry. Verify that all the
* subsequent tables we have compacted still exist in
* the modified stack in the exact same order as we
* have compacted them.
*/
for (size_t j = 1; j < last - first + 1; j++) {
const char *old = first + j < st->merged->tables_len ?
st->tables[first + j]->name : NULL;
const char *new = names[i + j];
/*
* If some entries are missing or in case the tables
* have changed then we need to bail out. Again, this
* shouldn't ever happen because we have locked the
* tables we are compacting.
*/
if (!old || !new || strcmp(old, new)) {
err = REFTABLE_OUTDATED_ERROR;
goto done;
}
}
new_offset = i;
break;
}
/*
* In case we didn't find our compacted tables in the stack we
* need to bail out. In theory, this should have never happened
* because we locked the tables we are compacting.
*/
if (new_offset < 0) {
err = REFTABLE_OUTDATED_ERROR;
goto done;
}
/*
* We have found the new range that we want to replace, so
* let's update the range of tables that we want to replace.
*/
first_to_replace = new_offset;
last_to_replace = last + (new_offset - first);
} else {
/*
* `fd_read_lines()` uses a `NULL` sentinel to indicate that
* the array is at its end. As we use `free_names()` to free
* the array, we need to include this sentinel value here and
* thus have to allocate `tables_len + 1` many entries.
*/
REFTABLE_CALLOC_ARRAY(names, st->merged->tables_len + 1);
if (!names) {
err = REFTABLE_OUT_OF_MEMORY_ERROR;
goto done;
}
for (size_t i = 0; i < st->merged->tables_len; i++) {
names[i] = reftable_strdup(st->tables[i]->name);
if (!names[i]) {
err = REFTABLE_OUT_OF_MEMORY_ERROR;
goto done;
}
}
first_to_replace = first;
last_to_replace = last;
}
/*
* If the resulting compacted table is not empty, then we need to move
* it into place now.
*/
if (!is_empty_table) {
err = format_name(&new_table_name, st->tables[first]->min_update_index,
st->tables[last]->max_update_index);
if (err < 0)
goto done;
err = reftable_buf_addstr(&new_table_name, ".ref");
if (err < 0)
goto done;
err = stack_filename(&new_table_path, st, new_table_name.buf);
if (err < 0)
goto done;
err = tmpfile_rename(&new_table, new_table_path.buf);
if (err < 0)
goto done;
}
/*
* Write the new "tables.list" contents with the compacted table we
* have just written. In case the compacted table became empty we
* simply skip writing it.
*/
for (i = 0; i < first_to_replace; i++) {
if ((err = reftable_buf_addstr(&tables_list_buf, names[i])) < 0 ||
(err = reftable_buf_addstr(&tables_list_buf, "\n")) < 0)
goto done;
}
if (!is_empty_table) {
if ((err = reftable_buf_addstr(&tables_list_buf, new_table_name.buf)) < 0 ||
(err = reftable_buf_addstr(&tables_list_buf, "\n")) < 0)
goto done;
}
for (i = last_to_replace + 1; names[i]; i++) {
if ((err = reftable_buf_addstr(&tables_list_buf, names[i])) < 0 ||
(err = reftable_buf_addstr(&tables_list_buf, "\n")) < 0)
goto done;
}
err = reftable_write_data(tables_list_lock.fd,
tables_list_buf.buf, tables_list_buf.len);
if (err < 0) {
err = REFTABLE_IO_ERROR;
unlink(new_table_path.buf);
goto done;
}
err = stack_fsync(&st->opts, tables_list_lock.fd);
if (err < 0) {
err = REFTABLE_IO_ERROR;
unlink(new_table_path.buf);
goto done;
}
err = flock_commit(&tables_list_lock);
if (err < 0) {
err = REFTABLE_IO_ERROR;
unlink(new_table_path.buf);
goto done;
}
/*
* Reload the stack before deleting the compacted tables. We can only
* delete the files after we closed them on Windows, so this needs to
* happen first.
*/
err = reftable_stack_reload_maybe_reuse(st, first < last);
if (err < 0)
goto done;
/*
* Delete the old tables. They may still be in use by concurrent
* readers, so it is expected that unlinking tables may fail.
*/
for (i = 0; i < nlocks; i++) {
struct reftable_flock *table_lock = &table_locks[i];
reftable_buf_reset(&table_name);
err = reftable_buf_add(&table_name, table_lock->path,
strlen(table_lock->path) - strlen(".lock"));
if (err)
continue;
unlink(table_name.buf);
}
done:
flock_release(&tables_list_lock);
for (i = 0; table_locks && i < nlocks; i++)
flock_release(&table_locks[i]);
reftable_free(table_locks);
tmpfile_delete(&new_table);
reftable_buf_release(&new_table_name);
reftable_buf_release(&new_table_path);
reftable_buf_release(&tables_list_buf);
reftable_buf_release(&table_name);
free_names(names);
if (err == REFTABLE_LOCK_ERROR)
st->stats.failures++;
return err;
}