in mcrouter/ConfigApi.cpp [171:247]
void ConfigApi::configThreadRun() {
mcrouterSetThisThreadName(opts_, "mcrcfg");
if (opts_.constantly_reload_configs) {
while (!finish_) {
LOG(INFO) << "Reload config due to constantly_reload_configs";
callbacks_.notify();
{
std::unique_lock<std::mutex> lk(finishMutex_);
finishCV_.wait_for(lk, std::chrono::milliseconds(10), [this] {
return finish_.load();
});
}
}
return;
}
while (!finish_) {
bool hasUpdate = false;
try {
hasUpdate = checkFileUpdate();
} catch (const std::exception& e) {
MC_LOG_FAILURE(
opts_,
memcache::failure::Category::kOther,
"Check for config update failed: {}",
e.what());
} catch (...) {
MC_LOG_FAILURE(
opts_,
memcache::failure::Category::kOther,
"Check for config update failed with unknown error");
}
// There are a couple of races that can happen here
// First, the IN_MODIFY event can be fired before the write is complete,
// resulting in a malformed JSON error. Second, text editors may do
// some of their own shuffling of the file (e.g. between .swp and the
// real thing in Vim) after the write. This may can result in a file
// access error router_configure_from_file below. That's just a theory,
// but that error does happen. Race 1 can be fixed by changing the
// watch for IN_MODIFY to IN_CLOSE_WRITE, but Race 2 has no apparent
// elegant solution. The following jankiness fixes both.
{
std::unique_lock<std::mutex> lk(finishMutex_);
finishCV_.wait_for(
lk,
std::chrono::milliseconds(opts_.reconfiguration_delay_ms),
[this] { return finish_.load(); });
}
if (hasUpdate) {
if (opts_.reconfiguration_jitter_ms) {
int jitter_ms =
folly::Random::randDouble01() * opts_.reconfiguration_jitter_ms;
std::unique_lock<std::mutex> lk(finishMutex_);
finishCV_.wait_for(
lk,
std::chrono::milliseconds(
opts_.reconfiguration_delay_ms + jitter_ms),
[this] { return finish_.load(); });
}
callbacks_.notify();
// waits before checking for config updates again.
if (opts_.post_reconfiguration_delay_ms > 0) {
std::unique_lock<std::mutex> lk(finishMutex_);
finishCV_.wait_for(
lk,
std::chrono::milliseconds(opts_.post_reconfiguration_delay_ms),
[this] { return finish_.load(); });
}
}
// Otherwise there was nothing to read, so check that we aren't shutting
// down, and wait on the FD again.
}
}