compile/remote-compile/lbc/tool/getFuncs.py (398 lines of code) (raw):

# -*- coding: utf-8 -*- # cython:language_level=3 """ ------------------------------------------------- File Name: getFuncs Description : Author : liaozhaoyan date: 2021/12/1 ------------------------------------------------- Change Activity: 2021/12/1: ------------------------------------------------- """ __author__ = 'liaozhaoyan' import sys import os import select from select import epoll as CPoll import shlex from subprocess import PIPE, Popen import sqlite3 import json import re import eventlet ON_POSIX = 'posix' in sys.builtin_module_names class CasyncCmdQue(object): def __init__(self, cmd): super(CasyncCmdQue, self).__init__() self.daemon = True # thread dies with the program self.__p = Popen(shlex.split(cmd), stdout=PIPE, stdin=PIPE, close_fds=ON_POSIX) self.__e = CPoll() self.__e.register(self.__p.stdout.fileno(), select.EPOLLIN) def __del__(self): self.__p.kill() def write(self, cmd): try: self.__p.stdin.write(cmd.encode()) self.__p.stdin.flush() except IOError: return -1 def writeLine(self, cmd): self.write(cmd + "\n") def read(self, tmout=0.2, l=16384): while True: es = self.__e.poll(tmout) if not es: return "" for f, e in es: if e & select.EPOLLIN: s = os.read(f, l).decode() return s elif e & select.EPOLLERR: raise OSError("epoll error.") elif e & select.EPOLLHUP: raise OSError("epoll hup error.") else: raise OSError(f"unknown catch able events {e}") def readw(self, want, tries=20): i = 0 r = "" while i < tries: line = self.read() if want in line: return r + line r += line i += 1 raise OSError("get want args %s overtimes" % want) def terminate(self): self.writeLine("q") self.__p.terminate() return self.__p.wait() class CgetVminfo(object): def __init__(self, vmPath, arch, waits=20): super(CgetVminfo, self).__init__() # print(vmPath) self._gdb = CasyncCmdQue("gdb %s" % vmPath) self._gdb.readw("(gdb)", waits) self._gdb.writeLine("set pagination off") self._gdb.readw("(gdb)") self._gdb.writeLine("set architecture %s" % arch) self._gdb.readw("(gdb)") self._gdb.writeLine("set max-value-size unlimited") self._gdb.readw("(gdb)") def __del__(self): if hasattr(self, "_gdb"): self._gdb.terminate() def genType(self, t): self._gdb.writeLine(f"ptype {t}") r = self._gdb.readw("(gdb)").split("\n")[0] _, alias = r.split("=", 1) alias = alias.strip() self._gdb.writeLine(f"p sizeof({t})") r = self._gdb.readw("(gdb)").split("\n")[0] _, size = r.split("=", 1) size = int(size.strip()) return [alias, size] def showTypeSize(self, t): self._gdb.writeLine(f"p sizeof({t})") r = self._gdb.readw("(gdb)").split("\n")[0] _, size = r.split("=", 1) return int(size.strip()) def showStruct(self, sStruct): self._gdb.writeLine("ptype /o %s" % sStruct) return self._gdb.readw("(gdb)", 80) def genFuncs(self, fName="funs.txt"): self._gdb.writeLine("i functions") with open(fName, 'w') as f: s = "dummy" while "\n(gdb)" not in s: s = self._gdb.read(tmout=240) f.write(s) def genTypes(self, fName="types.txt"): self._gdb.writeLine("i types") with open(fName, 'w') as f: s = "dummy" while "\n(gdb)" not in s: s = self._gdb.read(tmout=240) f.write(s) class CgenfuncsDb(object): def __init__(self, dbName, arch, build=True): self._db = None self._res = None self._arch = arch self._setupDb(dbName, build) self._reTypeLine = re.compile(r"[\d]+:") self._rePaholeRem1 = re.compile(r"\/\* *[\d]+( *|: *[\d] *)\| *[\d]+ *\*\/") self._rePaholeRem2 = re.compile(r"\/\* *[\d]+ *\*\/") self._reRem = re.compile(r"\/\*.*\*\/") self._banTypes = [] def __del__(self): if self._db is not None: self._db.commit() self._db.close() def _setupDb(self, dbName, build): if not build: self._db = sqlite3.connect(dbName) return if os.path.exists(dbName): os.remove(dbName) self._db = sqlite3.connect(dbName) cur = self._db.cursor() sql = """CREATE TABLE files ( id INTEGER PRIMARY KEY autoincrement, file TEXT );""" cur.execute(sql) sql = """CREATE TABLE funs ( id INTEGER PRIMARY KEY autoincrement, func VARCHAR (128), args JSON, ret VARCHAR (64), line INTEGER, fid INTEGER, module VARCHAR (64) );""" cur.execute(sql) sql = """CREATE TABLE structs ( id INTEGER PRIMARY KEY autoincrement, name VARCHAR (64), members INTEGER, bytes INTEGER );""" cur.execute(sql) sql = """CREATE TABLE members ( id INTEGER PRIMARY KEY autoincrement, fid INTEGER, types VARCHAR (128), name VARCHAR (64), offset INTEGER, bytes INTEGER, bits VARCHAR (16) DEFAULT "" );""" cur.execute(sql) sql = """CREATE TABLE types ( id INTEGER PRIMARY KEY autoincrement, name VARCHAR (64), alias VARCHAR (64), bytes INTEGER );""" cur.execute(sql) cur.close() def _arg_split(self, argStr): args = [] arg = "" count = 0 for a in argStr: if count == 0 and a == ",": args.append(arg.strip()) arg = "" continue elif a == "(": count += 1 elif a == ")": count -= 1 arg += a if arg != "": args.append(arg.strip()) return args def _funcs(self, funcPath, module="vm"): cur = self._db.cursor() with open(funcPath, 'r') as f: fid = 0 for index, line in enumerate(f): line = line[:-1] if line == "": continue elif line.startswith("(gdb)"): break elif line.startswith("File "): if line.endswith(".h:"): # do not add any fid = -1 else: _, sFile = line.split(" ", 1) sql = f'''INSERT INTO files (file) VALUES ("{sFile[:-1]}")''' cur.execute(sql) fid = cur.lastrowid elif fid > 0 and line.endswith(");"): #8: static int __paravirt_pgd_alloc(struct mm_struct *); line = line[:-2] lineNo, body = line.split(":", 1) head, args = body.split("(", 1) # args = [x.strip() for x in args.split(",")] args = self._arg_split(args) if "*" in head: ret, func = head.rsplit("*", 1) ret += "*" else: ret, func = head.rsplit(" ", 1) sql = f'''INSERT INTO funs (func, args, ret, line, fid, module) VALUES \ ("{func}", '{json.dumps(args)}', "{ret.strip()}", {lineNo}, {fid}, "{module}")''' cur.execute(sql) cur.close() def _stripRem(self, line): return self._reRem.sub("", line).strip() def _splitStructLine(self, line): rd = {"offset": None, "size": None, "bits": None} res = self._rePaholeRem1.search(line) if res: l = res.group()[2:-2].strip() # /* 19: 0 | 1 */ unsigned char skc_reuse : 4; # /* 19: 4 | 1 */ unsigned char skc_reuseport : 1; off, size = l.split('|', 1) rd["size"] = int(size.strip()) if ":" in off: off, bits = off.split(":", 1) rd['bits'] = bits.strip() # offset rd["offset"] = int(off.strip()) else: res = self._rePaholeRem2.search(line) if res: l = res.group()[2:-2].strip() # /* 8 | 4 */ union { # /* 4 */ unsigned int skc_hash; # /* 4 */ __u16 skc_u16hashes[2]; size = l.strip() rd["size"] = int(size) rd["line"] = self._stripRem(line) return rd def _parseMember(self, sStruct, line, pre="", off=0): """struct list_head * next;""" """void (*func)(struct callback_head *);""" """unsigned int p:1;""" if ";" not in line: """/* total size (bytes): 4 */""" if "total size (bytes):" in line: size = line.split(":", 1)[1] size = size.split("*", 1)[0].strip() self._res['res']['size'] = size return rd = self._splitStructLine(line) l = rd['line'] bits = "" if ':' in l: l, bits = l.rsplit(" : ", 1) bits = "%s:%s" % (rd["bits"], bits) if '(' in l: _, func = l.split("(*", 1) func, _ = func.split(")", 1) types = l.replace(" (*%s)(" % func, " (*)(", 1) types = re.sub(" +", " ", types) name = func elif '*' in l: types, name = l.rsplit("*", 1) types = types + "*" name = name.strip("; ") else: types, name = l.rsplit(" ", 1) types = types.strip() name = name.strip("; ") name = pre + name if rd["offset"] is None: rd["offset"] = off cell = {"type": types, "name": name, "offset": rd["offset"], "size": rd["size"], "bits": bits} self._res['res']['cell'].append(cell) def _parseBox(self, sStruct, lines, pre): """union {""" """} pci;""" rd = self._splitStructLine(lines[0]) t = rd['line'].split(" ", 1)[0] if t in ["union", "struct"]: lastLine = lines[-1].strip() if not lastLine.startswith("};"): npre, _ = lastLine[1:].split(";", 1) _, npre = npre.rsplit(" ", 1) pre += npre.strip() + "." if rd["offset"] is None: rd["offset"] = 0 self._parseLoop(sStruct, lines, pre, rd["offset"]) def _parseLoop(self, sStruct, lines, pre, off=0): qCount = 0 box = [] for line in lines[1:-1]: lCount = line.count("{") rCount = line.count("}") qCount += lCount - rCount if qCount > 0: box.append(line) elif len(box) > 0: box.append(line) self._parseBox(sStruct, box, pre) box = [] else: self._parseMember(sStruct, line, pre, off) def _getStruct(self, gdb, sStruct): self._res = {"log": "struct"} lines = gdb.showStruct(sStruct).split('\n') self._res['res'] = {"name": sStruct, "size": 0, "cell": []} self._parseLoop(sStruct, lines, "") self._res['res']['members'] = len(self._res['res']['cell']) return self._res def _struct_is_in(self, cur, sStruct): sql = f"SELECT name FROM structs WHERE name = '{sStruct}'" res = cur.execute(sql) if res is None: return False r = res.fetchone() if r is None: return False return True def _struct(self, cur, gdb, sStruct): if self._struct_is_in(cur, sStruct): return res = self._getStruct(gdb, sStruct) if res is None: return dStruct = res['res'] sql = f'''INSERT INTO structs (name, members, bytes) VALUES \ ("{dStruct['name']}", {dStruct['members']}, {dStruct['size']})''' cur.execute(sql) fid = cur.lastrowid for cell in dStruct['cell']: sql = f'''INSERT INTO members (fid, types, name, offset, bytes, bits) VALUES \ ({fid}, "{cell['type']}", "{cell['name']}", {cell['offset']}, {cell['size']}, "{cell['bits']}")''' try: cur.execute(sql) except sqlite3.OperationalError: print(f"bad {sql}, for {dStruct['name']}") def _save_type(self, cur, gdb, t): alias, size = gdb.genType(t) sql = f'INSERT INTO types (name, alias, bytes) VALUES ("{t}", "{alias}", {size})' cur.execute(sql) if alias == "struct {" or alias == 'union {': # there is no alias struct in this type self._struct(cur, gdb, t) def _type_is_in(self, cur, t): if t in self._banTypes: return True sql = f"SELECT name FROM types WHERE name = '{t}'" res = cur.execute(sql) if res is None: return False r = res.fetchone() if r is None: return False return True def _check_type(self, cur, gdb, t): if not self._type_is_in(cur, t): try: self._save_type(cur, gdb, t) except ValueError: self._banTypes.append(t) print(f"failed to parse type {t}") def _types(self, typePath, cur, gdb): with open(typePath, 'r') as f: for i, line in enumerate(f): if i < 1 or line.startswith("(gdb)"): # skip head and end continue if self._reTypeLine.match(line): line = line.split(':', 1)[1] line = line.strip() if line != "": # strip blank line if line.startswith("File "): # jump File continue if line.startswith("enum "): continue if line.startswith("struct ") or line.startswith("union "): self._struct(cur, gdb, line[:-1]) continue if line.startswith("typedef "): # for typedef sType = line.rsplit(" ", 1)[1] self._check_type(cur, gdb, sType[:-1]) # skip last ; continue if line.startswith("__int128"): line = "__int128" self._check_type(cur, gdb, line) def _parseElf(self, cur, gdb, mod): try: gdb.genTypes("types.txt") gdb.genFuncs("funcs.txt") except OSError as e: print(f"parse error. report {e}") return self._funcs("funcs.txt", mod) self._types("types.txt", cur, gdb) def pasrseVmLinux(self, vmPath): cur = self._db.cursor() gdb = CgetVminfo(vmPath, self._arch, waits=1600) self._save_type(cur, gdb, "void *") self._parseElf(cur, gdb, "vm") cur.close() def pasrseLbc(self, bcPath): cur = self._db.cursor() gdb = CgetVminfo(bcPath, self._arch, waits=200) self._save_type(cur, gdb, "void *") self._parseElf(cur, gdb, "lbc") cur.close() def _parse_ko(self, path, fName): if fName.endswith("ko"): mod = fName.rsplit(".", 1)[0] else: mod = fName.rsplit(".", 2)[0] try: gdb = CgetVminfo(os.path.join(path, fName), self._arch, 80) except OSError as e: print(f"load module {fName} error. report {e}") return cur = self._db.cursor() self._parseElf(cur, gdb, mod) cur.close() def parse_kos(self, path): g = os.walk(path) for path, dirL, fileL in g: for fName in fileL: if fName.endswith("ko") or fName.endswith("ko.debug"): eventlet.monkey_patch() try: with eventlet.Timeout(6 * 60): self._parse_ko(path, fName) except (OSError, eventlet.timeout.Timeout) as e: print(f"parse {fName} failed report {e}") if __name__ == "__main__": d = CgenfuncsDb("lbc.db", arch='x86_64') d.pasrseLbc("lbc.bpf.obj") pass