in automation/tinc/main/ext/qautils/gppylib/programs/clsRecoverSegment.py [0:0]
def SanFailback(self, array_config, gpEnv):
# Get the configuration information maps.
(san_mounts, san_mount_by_dbid) = array_config.getSanConfigMaps()
# 1 Get the failed segments
bad_segs = {}
for dbid, v in san_mount_by_dbid.iteritems():
(status, content, mountlist) = v
if status == 'd':
self.logger.info('Bad segment with dbid %d' % dbid)
bad_segs[dbid] = (status, content, mountlist)
# 2 Get the failed mountpoints.
bad_mounts = {}
for mount_id, v in san_mounts.iteritems():
if v['active'] == 'm':
self.logger.info('Bad mountpoint with id %d' % mount_id)
bad_mounts[mount_id] = v
# 3 Verify that the required hosts are back up (this may reduce the number of recoverable segments)
recoverable_mps = {}
for mount_id, v in bad_mounts.iteritems():
try:
unix.Echo.remote('check host', 'Success', v['primaryhost'])
recoverable_mps[mount_id] = v
except:
# Host not available, not added to recoverable_mps. We'll ignore
# because there may be others we can recover
pass
# 4
# From the recoverable mountpoints, we should now be able to identify the
# mountpoints required to recover for the segments. A segment is recoverable
# if all of its mountpoints are recoverable.
recoverable_segs = {}
for dbid, v in bad_segs.iteritems():
(status, content, mountlist) = v
recoverable = True
for mount_id in mountlist:
if not recoverable_mps.has_key(mount_id):
recoverable = False
break
if recoverable:
recoverable_segs[dbid] = v
else:
self.logger.warning('Unrecoverable segment dbid %d' % (dbid))
if len(recoverable_segs) == 0:
raise Exception("Found no recoverable segments.")
# 4 Stop GPDB.
e = os.system('gpstop -aq -d %s' % (os.environ.get('MASTER_DATA_DIRECTORY')))
ok = not e
if not ok:
self.logger.error('Failed to shutdown Greenplum Database: segment recovery cannot proceed.')
raise Exception("Failed to shutdown GPDB. Segment recovery failed.")
else:
self.logger.info('Successfully shutdown the Greenplum Database')
# 5 Move mountpoints
# For each recoverable seg, walk its mountlist.
# 5a
# unmount on failover host.
# 5b
# reconnect to primary.
# 5c
# mount on primary.
mount_done = {}
for dbid, v in recoverable_segs.iteritems():
(status, content, mountlist) = v
for mount_id in mountlist:
if mount_done.has_key(mount_id):
continue # already moved this
if self.SanFailback_mountpoint(mount_id, recoverable_mps[mount_id]) == 0:
# TODO: some kind of error handling here ??
mount_done[mount_id] = True
else:
mount_done[mount_id] = False
self.logger.debug('Completed mount-recovery:')
for mount_id, v in mount_done.iteritems():
if v:
self.logger.debug('mount-id %d ---> TRUE' % mount_id)
else:
self.logger.debug('mount-id %d ---> FALSE' % mount_id)
# N - 3
# Start GPDB in admin-mode
os.putenv('GPSTART_INTERNAL_MASTER_ONLY', '1')
e = os.system('gpstart -m -d %s' % (os.environ.get('MASTER_DATA_DIRECTORY')))
ok = not e
if not ok:
self.logger.error('Failed to bring Greenplum Database up in management mode: segment recovery failed')
raise Exception("Failed to start GPDB in management mode.")
else:
self.logger.info('Greenplum Database restarted for configuration update')
# N - 2
# Update configuration
# Open a connection to the DB.
conn = None
try:
db_url = dbconn.DbURL(port=gpEnv.getMasterPort(), dbname='template1')
conn = dbconn.connect(db_url, utility=True)
dbconn.execSQL(conn, "BEGIN")
self.logger.debug('Starting Transaction')
# Update gp_san_configuration
for mount_id, v in mount_done.iteritems():
self.logger.debug('Checking Mount id %d' % mount_id)
if v:
sql = 'UPDATE gp_san_configuration SET active_host=\'p\' WHERE mountid=%d' % mount_id
self.logger.debug('Issuing SQL [%s]' % sql)
dbconn.executeUpdateOrInsert(conn, sql, 1)
history_message = "GPRECOVERSEG: san-mount-id %d set active_host to primary" % (mount_id)
sql = 'INSERT INTO gp_configuration_history values (now(), -1, \'%s\')' % history_message
self.logger.debug('Issuing SQL [%s]' % sql)
dbconn.executeUpdateOrInsert(conn, sql, 1)
# Update gp_segment_configuration
for dbid, v in recoverable_segs.iteritems():
(status, content, mountlist) = v
self.logger.debug('Checking dbid id %d' % dbid)
all_mountpoints = True
for mount_id, v in mount_done.iteritems():
self.logger.debug('Scanning mountid %d in dbid id %d' % (mount_id, dbid))
if not v:
self.logger.debug('Mountid %d --> False' % mount_id)
all_mountpoints = False
else:
self.logger.debug('Mountid %d --> True' % mount_id)
if all_mountpoints:
sql = 'UPDATE gp_segment_configuration SET status = \'u\' where dbid = %d' % dbid
self.logger.debug('Issuing SQL [%s]' % sql)
dbconn.executeUpdateOrInsert(conn, sql, 1)
sql = 'UPDATE gp_segment_configuration SET role = preferred_role where content = %d' % content
self.logger.debug('Issuing SQL [%s]' % sql)
dbconn.executeUpdateOrInsert(conn, sql, 2)
history_message = "GPRECOVERSEG: content %d, dbid %d moved to primary host" % (content, dbid)
sql = 'INSERT INTO gp_configuration_history values (now(), %d, \'%s\')' % (dbid, history_message)
self.logger.debug('Issuing SQL [%s]' % sql)
dbconn.executeUpdateOrInsert(conn, sql, 1)
else:
self.logger.info('Failed to recover sufficient mountpoints for dbid %d' % dbid)
self.logger.debug('Committing our updates.')
dbconn.execSQL(conn, "COMMIT")
finally:
if conn:
conn.close()
# N - 1
# Stop GPDB-admin-mode
e = os.system('gpstop -m -d %s' % (os.environ.get('MASTER_DATA_DIRECTORY')))
ok = not e
if not ok:
self.logger.error('Failed to stop Greenplum Database up in management mode: segment recovery failed')
raise Exception("Failed to stop GPDB, from management mode.")
else:
self.logger.info('Greenplum Database stopped, preparing for full restart.')
# N Start GPDB
e = os.system('gpstart -aq -d %s' % (os.environ.get('MASTER_DATA_DIRECTORY')))
ok = not e
if not ok:
self.logger.error('Failed to restart Greenplum Database: segment recovery failed')
raise Exception("Failed to restart GPDB.")
else:
self.logger.info('Successfully restarted the Greenplum Database')
configInterface.getConfigurationProvider().sendPgElogFromMaster( "SAN recovery has completed.", True)
return 0