def cleanUpFile()

in APS to SQL DW Migration - Schema and Data Migration with PolyBase/2_CleanScripts/CleanScripts.py [0:0]


def cleanUpFile(srcDir, outDir, file, objectType):

    numLines = sum( 1 for line in open (srcDir + file)) # used for processing ending lines 

    fi = open(os.path.join(srcDir, file), 'r') 
    fo = open(os.path.join(outDir, file), 'w')
    #idxsFn = "IDXS_" + file
    idxsFn = "IDXSXYZ_" + file # Updated Jan 18, 2020, to avoid possible file name duplicates. Gail Zhou
    #statsFn = "STATS_" + file
    statsFn = "STATSXYZ_" + file # Updated Jan 18, 2020, to avoid possible file name duplicates. Gail Zhou
    idxsFp = outDir + idxsFn
    statsFp = outDir + statsFn
    
    
    fidxs = open(os.path.join(outDir, idxsFn), 'w')
    fstas = open(os.path.join(outDir, statsFn), 'w')

    # For each file, start anew 
    oldText = ";"
    newText =""
    oldTextFound = False
    

    crTable = "CREATE TABLE"
    crStats = "CREATE STATISTICS"
    crIndex = "CREATE INDEX"
    crView = "CREATE VIEW"
    crProc = "CREATE PROC"
    prtEnd = "PRINT 'END'"

    crTableFound = False
    crStatsFound = False
    crIndexFound = False
    crViewFound = False
    crProcFound = False 
    prtEndFound = False 
    goFound = False 


    if (objectType.upper() == "TABLE"):
        for row in fi:
            myRow = row 
            if (re.match(crTable, row, re.I)):
                crTableFound = True
            elif (re.match(crStats, row, re.I)):
                crStatsFound = True
            elif (re.match(crIndex, row, re.I)):
                crIndexFound = True
            elif (re.search(oldText, row, re.I)):
                oldTextFound = True
                repText = re.compile(re.escape(oldText), re.IGNORECASE)
                myRow = repText.sub(newText, myRow)
            else:
                pass

            if ((crTableFound) and (not crIndexFound) and (not crStatsFound)):
                fo.write(myRow)
            elif (crTableFound and crIndexFound and (not crStatsFound)):  # found index prior to stats? Verify 
                fidxs.write(myRow) 
            elif (crTableFound and (not crIndexFound) and (crStatsFound)):
                fstas.write(myRow) 
            elif (crTableFound and (crIndexFound) and (crStatsFound)):
                fstas.write(myRow) 
            else:
                pass

    elif (objectType.upper() == "VIEW"):     
        lineCount = 0
        for row in fi:
            myRow = row
            lineCount = lineCount + 1
            if (re.match(crView, row, re.I)):
                crViewFound = True
            if (re.match(prtEnd, row, re.I)):
                prtEndFound = True
            if (re.search(oldText, row, re.I)):
                oldTextFound = True
                repText = re.compile(re.escape(oldText), re.IGNORECASE)
                myRow = repText.sub(newText, myRow)
            #if (crViewFound and (not prtEndFound)):           
            #   fo.write(row)
            if (crViewFound and ((numLines - lineCount) >= 5)):
                fo.write(myRow)
            else:
                pass

    elif (objectType.upper() == "SP"):
        lineCount = 0
        for row in fi:
            myRow = row
            lineCount = lineCount + 1
            if (re.match(crProc, row, re.I)):
                crProcFound = True
            if (re.match(prtEnd, row, re.I)):
                prtEndFound = True
            #if (crProcFound and (not prtEndFound)):            
            #   fo.write(row)
            if (crProcFound and ((numLines - lineCount) >= 5)):  
                fo.write(myRow)
            else:
                pass 
    else:
        pass
        print ("Somthing wrong with the Object Type. I expect Table, View, or SP")
    
    fi.close()
    fo.close()
 
    fidxs.close()
    fstas.close()

    if (neFile (idxsFp)):
        pass
    else:
        os.remove(idxsFp)

    if (neFile (statsFp)):
        pass
    else:
        os.remove(statsFp)