host/common/hostrecoverymanager.cpp (376 lines of code) (raw):
//---------------------------------------------------------------
// <copyright file="hostrecoverymanager.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// Description: Windows implementation for HostRecoveryManager
//
// History: 15-Aug-2016 veshivan Created
//
//----------------------------------------------------------------
#include <boost/filesystem.hpp>
#include <boost/tokenizer.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/uuid/uuid_io.hpp>
#include <boost/uuid/uuid.hpp>
#include <boost/uuid/random_generator.hpp>
#include "hostrecoverymanager.h"
#include "portablehelpersmajor.h"
#include "localconfigurator.h"
#include "service.h"
#include "biosidoperations.h"
using namespace std;
#define VMWARE_TOOLS_SERVICE_NAME "VMTools"
const std::string INVALID_UUID("00000000-0000-0000-0000-000000000000");
bool HostRecoveryManager::GetPersistedVMInfo(string& persistedHypervisorName,
string& persistedSystemUUID,
bool& persistedIsAzureVm)
{
LocalConfigurator lc;
persistedIsAzureVm = lc.getIsAzureVm();
return
lc.getHypervisorName(persistedHypervisorName) &&
lc.getSystemUUID(persistedSystemUUID);
}
void HostRecoveryManager::PersistSystemUUId(const std::string& systemUUID)
{
DebugPrintf(SV_LOG_DEBUG, "Entering %s\n", FUNCTION_NAME);
LocalConfigurator lc;
if (systemUUID.empty()) {
DebugPrintf(SV_LOG_ERROR,"Skipping persist uuid as SystemUUID is empty");
return;
}
try {
lc.setSystemUUID(systemUUID);
}
catch (ContextualException &e) {
DebugPrintf(SV_LOG_ERROR, "Failed to update systemuuid with exception: %s\n", e.what());
}
DebugPrintf(SV_LOG_DEBUG, "Exiting %s\n", FUNCTION_NAME);
}
void HostRecoveryManager::PersistHypervisorInfo(const std::string& hypervisorName)
{
DebugPrintf(SV_LOG_DEBUG, "Entering %s\n", FUNCTION_NAME);
LocalConfigurator lc;
if (hypervisorName.empty()) {
DebugPrintf(SV_LOG_ERROR, "Skipping persist hypervisor as Hypervisor info is empty");
return;
}
try {
lc.setHypervisorName(hypervisorName);
}
catch (ContextualException &e) {
DebugPrintf(SV_LOG_ERROR, "Failed to update hypervisor with exception: %s\n", e.what());
}
DebugPrintf(SV_LOG_DEBUG, "Exiting %s\n", FUNCTION_NAME);
}
void HostRecoveryManager::PersistIsAzureVm(bool bAzureVm)
{
DebugPrintf(SV_LOG_DEBUG, "Entering %s\n", FUNCTION_NAME);
LocalConfigurator lc;
try {
lc.setIsAzureVm(bAzureVm);
}
catch (ContextualException &e) {
DebugPrintf(SV_LOG_ERROR, "Failed to update isAzureVm with exception: %s\n", e.what());
}
DebugPrintf(SV_LOG_DEBUG, "Exiting %s\n", FUNCTION_NAME);
}
void HostRecoveryManager::PersistVMInfo(const string& hypervisorName,
const string& systemUUID,
bool bIsAzureVm)
{
DebugPrintf(SV_LOG_DEBUG, "Entering %s\n", FUNCTION_NAME);
if (hypervisorName.empty() || systemUUID.empty()) {
DebugPrintf(SV_LOG_ERROR,
"%s: hypervisor or, systemUUID is empty", FUNCTION_NAME);
return;
}
PersistSystemUUId(systemUUID);
PersistHypervisorInfo(hypervisorName);
PersistIsAzureVm(bIsAzureVm);
DebugPrintf(SV_LOG_DEBUG, "Exiting %s\n", FUNCTION_NAME);
}
bool HostRecoveryManager::IsVMInfoMatching(const std::string& hypervisor,
const std::string& systemUUID,
bool bIsAzureVm)
{
DebugPrintf(SV_LOG_DEBUG, "Entering %s\n", FUNCTION_NAME);
bool bRecInfoMatching = false;
std::string persistedHypervisorName;
std::string persistedSystemUUID;
bool persistedIsAzureVm;
//
// Retrieve persisted info and compare. If persisted info is
// not present then GetPersistedRecoveryInfo() will return false
// and retun false as nothing to compare.
//
if (!GetPersistedVMInfo(
persistedHypervisorName,
persistedSystemUUID,
persistedIsAzureVm))
{
DebugPrintf(SV_LOG_ERROR,"Failed to get Persisted info\n");
return false;
}
bRecInfoMatching =
boost::iequals(hypervisor, persistedHypervisorName) &&
boost::iequals(systemUUID, persistedSystemUUID) &&
(persistedIsAzureVm == bIsAzureVm);
DebugPrintf(SV_LOG_ALWAYS,
"Persisted VM Info: Hypervisor: %s, System UUID: %s, IsAzureVM %d\n",
persistedHypervisorName.c_str(),
persistedSystemUUID.c_str(),
persistedIsAzureVm);
DebugPrintf(SV_LOG_DEBUG, "Exiting %s\n", FUNCTION_NAME);
return bRecInfoMatching;
}
bool HostRecoveryManager::IsRecoveryInProgress(bool & bIsHydrationWorkflow, bool& bIsClone, QuitFunction_t qf)
{
bool bHypervisorChanged = false,
bSystemUuidChanged = false,
bIsAzureVm = false,
bVmTypeChanged = false,
bIsRecoveryInProgress = false,
bIsFailoverDetected = false;
GetRecoveryInfo(bIsHydrationWorkflow,
bSystemUuidChanged,
bHypervisorChanged,
bIsAzureVm,
bVmTypeChanged,
bIsFailoverDetected,
qf);
if (bIsHydrationWorkflow)
return bIsHydrationWorkflow;
if (!bSystemUuidChanged)
return bSystemUuidChanged;
// this is to retain the existing V2A Legacy behavior. this should be same as IsRecoveryInProgressEx
// after handling Linux post hydration steps in agent instead of vCon scripts at boot time.
// or remove on EOL of V2A Legacy
#ifdef SV_WINDOWS
bIsRecoveryInProgress = bIsAzureVm;
#endif
bIsClone = !bHypervisorChanged && !bIsAzureVm;
return (bIsRecoveryInProgress || bIsClone);
}
bool HostRecoveryManager::IsRecoveryInProgressEx(bool & bIsHydrationWorkflow, bool& bIsClone, QuitFunction_t qf)
{
bool bHypervisorChanged = false,
bSystemUuidChanged = false,
bIsAzureVm = false,
bVmTypeChanged = false,
bIsRecoveryInProgress = false,
bIsFailoverDetected = false;
GetRecoveryInfo(bIsHydrationWorkflow,
bSystemUuidChanged,
bHypervisorChanged,
bIsAzureVm,
bVmTypeChanged,
bIsFailoverDetected,
qf);
if (bIsHydrationWorkflow)
return bIsHydrationWorkflow;
if (!bSystemUuidChanged)
return bSystemUuidChanged;
if (bIsFailoverDetected)
return bIsFailoverDetected;
bIsRecoveryInProgress = bSystemUuidChanged;
bIsClone = !bHypervisorChanged && !bVmTypeChanged;
return (bIsRecoveryInProgress || bIsClone);
}
void HostRecoveryManager::GetRecoveryInfo(bool & bIsHydrationWorkflow,
bool& bSystemUuidChanged,
bool& bHypervisorChanged,
bool& bIsAzureVm,
bool& bVmTypeChanged,
bool& bIsFailoverDetected,
QuitFunction_t qf)
{
DebugPrintf(SV_LOG_DEBUG, "Entering %s\n", FUNCTION_NAME);
bIsHydrationWorkflow = false;
bHypervisorChanged = false;
bSystemUuidChanged = false;
bIsAzureVm = false;
bVmTypeChanged = false;
LocalConfigurator lc;
// Recovery is only meaningful for mobility agent
if (!lc.isMobilityAgent()) {
DebugPrintf(SV_LOG_DEBUG, "Not running as mobility agent.. skipping recovery check\n");
return;
}
#ifdef SV_WINDOWS
//
// ### Hydration ###
///
// Check if it is hydration workflow.
//
bIsHydrationWorkflow = ::IsRecoveryInProgress();
if (bIsHydrationWorkflow)
{
DebugPrintf(SV_LOG_INFO,
"Hydration has happened. Recovery is in pogress.");
DebugPrintf(SV_LOG_DEBUG, "Exiting %s\n", FUNCTION_NAME);
return;
}
#endif
//
// ### No-Hydration / Clone ###
//
//
// Check if the Persisted VM info availability
//
std::string persistedHypervisorName;
std::string persistedSystemUUID;
bool persistedIsAzureVm;
if (GetPersistedVMInfo(persistedHypervisorName, persistedSystemUUID, persistedIsAzureVm))
{
DebugPrintf(SV_LOG_INFO,
"Persisted VM info available: Hypervisor: %s, System UUID: %s, IsAzureVm %d\n",
persistedHypervisorName.c_str(),
persistedSystemUUID.c_str(),
persistedIsAzureVm);
}
else
{
DebugPrintf(SV_LOG_INFO,
"Persisted VM info not available. It might not be a recovery.\n");
return;
}
//
// Discover the System UUID and Hypervisor
//
string systemUUID = GetSystemUUID();
// Skip recovery detection if System UUID is empty.
if (systemUUID.empty()) {
DebugPrintf(SV_LOG_ERROR, "IsRecoveryInProgress: Could not retrieve System UUID or got empty UUID.");
return;
}
if (boost::iequals(systemUUID, INVALID_UUID)) {
DebugPrintf(SV_LOG_ERROR, "IsRecoveryInProgress: Got Invalid UUID.");
return;
}
string hypervisor, hypervisorversion;
if (!IsVirtual(hypervisor, hypervisorversion))
{
hypervisor = PHYSICALMACHINE;
}
bSystemUuidChanged = !boost::iequals(systemUUID, persistedSystemUUID);
if (persistedHypervisorName.empty() ||
persistedSystemUUID.empty() ||
boost::iequals(persistedSystemUUID, INVALID_UUID) ||
!bSystemUuidChanged) {
DebugPrintf(SV_LOG_DEBUG, "Exiting %s\n", FUNCTION_NAME);
return;
}
//
// Compare discovered VM info with persisted values to detect the VM recovery.
// On Vmware if system UUID changes it is a clone
// On failover if system UUID changes
//
bIsAzureVm = IsAzureVirtualMachine();
// If hypervisor has not changed, it means it is a clone except when
// Hyper-v VM is migrated to Azure,
// Clone of Azure VM after failover from on-prem - [Migraton of Azure VM not supported using V2A,
// use A2A and such recovery is handled in AzureVmRecoveryManager ]
// Azure stack to Azure migration - [ this is not handled by this logic.
// it is detected as clone and that is fine as no failback is supported ]
// AVS scenario involved where the decision is made based on the FailoverVmBiosid received from rcm
// it is detected as failover if the rcm received biosid matches with the current system biosid
// Note that the only difference between clone and failover is to set the new hostId as BIOS-ID and the source control plane
// for the AVS scenario
// (see CompleteRecovery()). Except for the V2A Legacy there is no such requirement in other providers.
bHypervisorChanged = !boost::iequals(hypervisor, persistedHypervisorName);
bVmTypeChanged = (bIsAzureVm != persistedIsAzureVm);
DebugPrintf(SV_LOG_INFO,
"Current VM Info: Hypervisor: %s, System UUID: %s, IsAzureVm %d\n",
hypervisor.c_str(),
systemUUID.c_str(),
bIsAzureVm);
string failoverVmBiosId = lc.getFailoverVmBiosId();
if (!failoverVmBiosId.empty()){
DebugPrintf(SV_LOG_DEBUG, "Failover Vm BiosId is %s.\n",
failoverVmBiosId.c_str());
}
if ( bVmTypeChanged || (bHypervisorChanged ||
(!failoverVmBiosId.empty() && (boost::iequals(failoverVmBiosId, systemUUID) ||
boost::iequals(failoverVmBiosId, BiosID::GetByteswappedBiosID(systemUUID)))) ||
(IsAzureStackVirtualMachine() && HasAzureStackHubFailoverTag(qf)))) {
bIsFailoverDetected = true;
DebugPrintf(SV_LOG_ALWAYS, "Failover Detected Persisted UUID: %s CurrentUUID: %s\n", persistedSystemUUID.c_str(), systemUUID.c_str());
}
else
{
DebugPrintf(SV_LOG_ALWAYS, "Clone Detected Persisted UUID: %s CurrentUUID: %s\n", persistedSystemUUID.c_str(), systemUUID.c_str());
}
DebugPrintf(SV_LOG_DEBUG, "Exiting %s\n", FUNCTION_NAME);
return;
}
void HostRecoveryManager::ResetVMInfo(void)
{
DebugPrintf(SV_LOG_DEBUG, "Entering %s\n", FUNCTION_NAME);
// VM Info is applicable only for mobility agent
LocalConfigurator lc;
if (!lc.isMobilityAgent()) {
DebugPrintf(SV_LOG_DEBUG, "Skipping resetting vm details as it is not mobility agent\n");
return;
}
//
// Discover the hypervisor and get the system UUID
//
string systemUUID = GetSystemUUID();
string hypervisor, hypervisorVersion;
if (!IsVirtual(hypervisor, hypervisorVersion))
{
hypervisor = PHYSICALMACHINE;
}
//
// Skip Reset VM Info if hypervisor and UUID are empty.
//
if (hypervisor.empty() || systemUUID.empty()) {
DebugPrintf(SV_LOG_ERROR, "ResetVMInfo: Hypervisor and System UUID should not be empty.");
return;
}
bool bIsAzureVm = IsAzureVirtualMachine();
DebugPrintf(SV_LOG_INFO,
"Current VM Info: Hypervisor: %s, System UUID: %s, IsAzureVm %d\n",
hypervisor.c_str(),
systemUUID.c_str(),
bIsAzureVm);
//
// Compare the discovered info with persisted info
//
if (IsVMInfoMatching(hypervisor, systemUUID, bIsAzureVm))
{
DebugPrintf(SV_LOG_DEBUG,
"No change detected in IsAzureVM, Hypervisor and SystemUUID.\n");
return;
}
PersistVMInfo(hypervisor, systemUUID, bIsAzureVm);
DebugPrintf(SV_LOG_DEBUG, "Exiting %s\n", FUNCTION_NAME);
}
void HostRecoveryManager::UpdateHostId(const std::string& hostId)
{
DebugPrintf(SV_LOG_DEBUG, "Entering %s\n", FUNCTION_NAME);
std::string newHostId = boost::trim_copy(hostId);
if (newHostId.empty()) {
DebugPrintf(SV_LOG_ERROR, "UpdateHostId: HostId should not be empty");
return;
}
//
// Update HostId in drscount.conf file
//
LocalConfigurator lConfig;
lConfig.setHostId(newHostId);
DebugPrintf(SV_LOG_ALWAYS, "Updated HostId: %s\n", newHostId.c_str());
DebugPrintf(SV_LOG_DEBUG, "Exiting %s\n", FUNCTION_NAME);
}
void HostRecoveryManager::ResetResourceId()
{
DebugPrintf(SV_LOG_DEBUG, "Entering %s\n", FUNCTION_NAME);
std::string oldResourceId;
//
// Reset ResourceId in drscount.conf file
// Vx Service will automatically generate this id
//
LocalConfigurator lConfig;
oldResourceId = lConfig.getResourceId();
DebugPrintf(SV_LOG_ALWAYS, "Resetting ResourceId Old ResourceId: %s\n", oldResourceId.c_str());
lConfig.setResourceId("");
DebugPrintf(SV_LOG_DEBUG, "Exiting %s\n", FUNCTION_NAME);
}
void HostRecoveryManager::CompleteRecovery()
{
return CompleteRecovery(false);
}
void HostRecoveryManager::CompleteRecovery(bool bClone)
{
DebugPrintf(SV_LOG_DEBUG, "Entering %s\n", FUNCTION_NAME);
//
// Reset the replication on recovered VM
//
DebugPrintf(SV_LOG_INFO, "Resetting replication state.\n");
ResetReplicationState();
//
// Disable/Enable underlying Hypervisor related tools/services
// for failover VM
//
#ifdef SV_WINDOWS
bool isRebootRequired = false;
#endif
if (!bClone) {
DebugPrintf(SV_LOG_ALWAYS, "Disabling or Enabling hypervisor tools.\n");
DisableEnablePlatformTools();
#ifdef SV_WINDOWS
RunDiskRecoveryWF(isRebootRequired);
#endif
}
//
// Update the HostId.
// For clone generate a new host ID.
// For no-hydration failover use system uuid.
//
std::string newHostId = "";
std::string oldHostId;
LocalConfigurator lc;
oldHostId = lc.getHostId();
if (!bClone) {
newHostId = GetSystemUUID();
}
else {
boost::uuids::uuid guid = boost::uuids::random_generator()();
newHostId = boost::lexical_cast<string>(guid);
ResetResourceId();
}
if (!oldHostId.empty()) {
DebugPrintf(SV_LOG_ALWAYS, "%s detected Old Host Id: %s New HostId : %s.\n", (bClone) ? "Clone" : "Failover", oldHostId.c_str(), newHostId.c_str());
}
else {
DebugPrintf(SV_LOG_ALWAYS, "%s detected Updating HostId with : %s.\n", (bClone) ? "Clone" : "Failover", newHostId.c_str());
}
UpdateHostId(newHostId);
//
// Reset the vm info to current system values. And this step should be the
// last one in failover/clone.
//
ResetVMInfo();
#ifdef SV_WINDOWS
if (isRebootRequired) {
DebugPrintf(SV_LOG_ALWAYS, "Azure Site Recovery: Shutting down system to recover dynamic disks\n");
if (!RebootMachine()) {
system("shutdown /r /t 0 /c \"Azure Site Recovery: Fixing Failover VM\"");
}
}
#endif
DebugPrintf(SV_LOG_DEBUG, "Exiting %s\n", FUNCTION_NAME);
}
void HostRecoveryManager::DisableEnablePlatformTools()
{
DebugPrintf(SV_LOG_DEBUG, "Entering %s\n", FUNCTION_NAME);
std::string hypervisor, hypervisorVersion;
if (!IsVirtual(hypervisor, hypervisorVersion))
{
hypervisor = PHYSICALMACHINE;
}
DisableEnableVMWareTools(boost::iequals(hypervisor, VMWARENAME));
#ifdef SV_WINDOWS
// Enable Azure services if needed
DisableEnableAzureServices(IsAgentRunningOnAzureVm() || IsAzureStackVirtualMachine());
#endif
//
// Based on the requirements, add other platform specific tools handling
// logic routine and call it here similar to DisableEnableVMWareTools().
//
DebugPrintf(SV_LOG_DEBUG, "Exiting %s\n", FUNCTION_NAME);
}