tools/mongodb-schema.py (117 lines of code) (raw):

#!/usr/bin/env python # -*- coding:utf-8 -*- import getopt import string import sys import pymongo import time # constant EXCLUDE_DBS = "excludeDbs" EXCLUDE_COLLS = "excludeColls" AUTO_IGNR_DATABASES = ["admin", "local"] AUTO_IGNR_TABLES = ["system.profile"] configure = {} def log_info(message): print "INFO [%s] %s " % (time.strftime('%Y-%m-%d %H:%M:%S'), message) def log_error(message): print "ERROR [%s] %s " % (time.strftime('%Y-%m-%d %H:%M:%S'), message) class MongoCluster: # pymongo connection conn = None # connection string url = "" def __init__(self, url): self.url = url def connect(self): self.conn = pymongo.MongoClient(self.url) def close(self): self.conn.close() def removeUncheck(m): # del m["collections"] # del m["indexes"] del m["avgObjSize"] del m["storageSize"] del m["indexSize"] del m["objects"] del m["dataSize"] if m.get('''$gleStats''') is not None: del m['''$gleStats'''] """ check meta data. include db.collection names and stats() """ def check(src, dst): # # check metadata # srcDbNames = src.conn.database_names() dstDbNames = dst.conn.database_names() srcDbNames = [db for db in srcDbNames if db not in configure[EXCLUDE_DBS]] dstDbNames = [db for db in dstDbNames if db not in configure[EXCLUDE_DBS]] srcDbNames.sort() dstDbNames.sort() if len(srcDbNames) != len(dstDbNames) or len(set(srcDbNames).difference(set(dstDbNames))): log_error("DIFF => database count not equals. \nsrc[%s], \ndst[%s]" % (srcDbNames, dstDbNames)) return False log_info("EQUL => database set equals. db_list : [%s]" % string.join(srcDbNames)) # check database names and collections for db in srcDbNames: # if db in configure[EXCLUDE_DBS]: # log_info("IGNR => ignore database [%s]" % db) # continue # # if dstDbNames.count(db) == 0: # log_error("DIFF => database [%s] only in srcDb" % (db)) # return False # db.stats() comparision srcDb = src.conn[db] dstDb = dst.conn[db] srcStats = srcDb.command("dbstats") dstStats = dstDb.command("dbstats") removeUncheck(srcStats) removeUncheck(dstStats) if srcStats != dstStats: log_error("DIFF => database [%s] stats not equals \nsrc[%s], \ndst[%s]" % (db, srcStats, dstStats)) return False log_info("EQUL => database [%s] stats() equals" % db) # for collections in db srcColls = srcDb.collection_names() dstColls = dstDb.collection_names() srcColls = [coll for coll in srcColls if coll not in configure[EXCLUDE_COLLS]] dstColls = [coll for coll in dstColls if coll not in configure[EXCLUDE_COLLS]] srcColls.sort() dstColls.sort() if len(srcColls) != len(dstColls) or len(set(srcColls).difference(set(dstColls))): log_error("DIFF => database [%s] collections count not equals, \nsrc[%s], \ndst[%s]" % (db, srcColls, dstColls)) return False log_info("EQUL => database [%s] collections set equals, coll_list : [%s]" % (db, string.join(srcColls))) for coll in srcColls: srcColl = srcDb[coll] dstColl = dstDb[coll] srcIndexes, dstIndexes = [], [] # compare collection indexes for index in srcColl.list_indexes(): srcIndexes.append(index["name"]) for index in dstColl.list_indexes(): dstIndexes.append(index["name"]) if len(set(srcIndexes).difference(set(dstIndexes))): log_error("DIFF => collection [%s] has diffrence indexes. \nsrc[%s], \ndst[%s]" % (coll, srcIndexes, dstIndexes)) return False log_info("EQUL => collection [%s] indexes equals. indexes[%s]" % (coll, srcIndexes)) return True def usage(): print "Usage: %s --src=mongodb://localhost:8001/? --dest=mongodb://localhost:8001/? " \ "--excludeDbs=admin,local --excludeCollections=system.profile" % \ sys.argv[0] exit(0) if __name__ == "__main__": opts, args = getopt.getopt(sys.argv[1:], "hs:n:e:", ["help", "src=", "dest=", "excludeDbs=", "excludeCollections="]) configure[EXCLUDE_DBS] = [] configure[EXCLUDE_COLLS] = [] srcUrl, dstUrl = "", "" for key, value in opts: if key in ("-h", "--help"): usage() if key in ("-s", "--src"): srcUrl = value if key in ("-d", "--dest"): dstUrl = value if key in ("-e", "--excludeDbs"): configure[EXCLUDE_DBS] = value.split(",") if key in ("-x", "--excludeCollections"): configure[EXCLUDE_COLLS] = value.split(",") # params verify if len(srcUrl) == 0 or len(dstUrl) == 0: usage() # ignore databases configure[EXCLUDE_DBS] += AUTO_IGNR_DATABASES configure[EXCLUDE_COLLS] += AUTO_IGNR_TABLES # dump configuration log_info( "configuration : excludeDbs=%s, excludeColls=%s" % (configure[EXCLUDE_DBS], configure[EXCLUDE_COLLS])) try: src, dst = MongoCluster(srcUrl), MongoCluster(dstUrl) src.connect() dst.connect() except Exception, e: print e log_error("create mongo connection failed src[%s], dest[%s]" % (srcUrl, dstUrl)) exit() if check(src, dst): exit(0) else: exit(-1) src.close() dst.close()