scripts/rulekey_diag2.py (423 lines of code) (raw):
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import, division, print_function, unicode_literals
import math
import traceback
from rulekey_diff2 import *
def format_duration(duration_s):
if duration_s < 0:
return "-"
if duration_s < 1e-3:
return "%0.3f us" % (duration_s * 1e6)
if duration_s < 1:
return "%0.3f ms" % (duration_s * 1e3)
t = math.floor(duration_s * 1000)
ms = t % 1000
t /= 1000
s = t % 60
t /= 60
m = t % 60
t /= 60
h = t
return "%02d:%02d:%02d.%03d" % (h, m, s, ms)
def truncate(s, l):
"""
Truncates the string `s` to length `l`
"""
if l is None or len(s) <= l:
return s
return (s[: l - 3] + "...") if l >= 3 else ""
class Table:
"""
A printable table.
"""
def __init__(self, rows=None, max_lengths=None):
self.rows = rows or []
self.max_lengths = max_lengths or []
def add_row(self, row):
self.rows.append(row)
def max_length(self, i):
return self.max_lengths[i] if i < len(self.max_lengths) else None
def format_element(self, el, i):
return truncate(" %s " % el, self.max_length(i))
def __str__(self):
lengths = []
for row in self.rows:
while len(lengths) < len(row):
lengths.append(0)
for i in range(len(row)):
lengths[i] = max(lengths[i], len(self.format_element(row[i], i)))
lines = []
for row in self.rows:
line = ""
if len(row) > 0:
line += "|"
for i in range(len(lengths)):
if lengths[i] > 0:
el = self.format_element(row[i], i) if i < len(row) else ""
line += el + " " * (lengths[i] - len(el)) + "|"
else:
line += "+"
for i in range(len(lengths)):
if lengths[i] > 0:
line += "-" * lengths[i] + "+"
lines.append(line)
return "\n".join(lines)
class NodeInfo:
"""
A class that encapsulates per node information.
"""
def __init__(self, line):
parts = line.strip().split(" ")
self.id = int(parts[0])
self.duration = int(parts[1])
self.total_duration = -1
self.rule_type = parts[2]
self.target = parts[3]
self.is_cacheable = bool(int(parts[4]))
self.default_key = parts[5]
self.input_key = parts[6]
self.depfile_key = parts[7]
self.manifest_key = parts[8]
self.output_hash = parts[9]
def get_column_values(self):
return [
str(self.id),
str(self.rule_type),
str(self.target),
format_duration(self.duration / 1e9),
format_duration(self.total_duration / 1e9),
str(self.is_cacheable),
str(self.default_key),
str(self.input_key),
str(self.depfile_key),
str(self.manifest_key),
str(self.output_hash),
]
@staticmethod
def num_columns():
return 11
@staticmethod
def get_column_names():
return [
"id",
"rule_type",
"target",
"duration",
"total_duration",
"is_cacheable",
"default_key",
"input_key",
"depfile_key",
"manifest_key",
"output_hash",
]
@staticmethod
def get_max_lengths():
return [
1000, # "id"
1000, # "rule_type"
1000, # "target"
1000, # "duration"
1000, # "total_duration"
1000, # "is_cacheable"
1000, # "default_key"
0, # "input_key"
0, # "depfile_key"
0, # "manifest_key"
1000, # "output_hash"
]
@staticmethod
def compare_columns(info1, info2, col_id):
if col_id == 0:
return cmp(info1.id, info2.id)
elif col_id == 1:
return cmp(info1.rule_type, info2.rule_type)
elif col_id == 2:
return cmp(info1.target, info2.target)
elif col_id == 3:
return -cmp(info1.duration, info2.duration)
elif col_id == 4:
return -cmp(info1.total_duration, info2.total_duration)
elif col_id == 5:
return cmp(info1.is_cacheable, info2.is_cacheable)
elif col_id == 6:
return cmp(info1.default_key, info2.default_key)
elif col_id == 7:
return cmp(info1.input_key, info2.input_key)
elif col_id == 8:
return cmp(info1.depfile_key, info2.depfile_key)
elif col_id == 9:
return cmp(info1.manifest_key, info2.manifest_key)
elif col_id == 10:
return cmp(info1.output_hash, info2.output_hash)
else:
return 0
class Graph:
"""
A class that encapsulates nodes and dependencies among them.
"""
def __init__(self, lines):
it = iter(lines)
# int id_all, id_roots;
t0 = time.clock()
# read nodes
n = int(next(it))
self.nodes = [None] * n
for i in range(n):
info = NodeInfo(next(it))
self.nodes[info.id] = info
# read deps
m = int(next(it))
self.deps = [[] for _ in range((n + 2))]
self.rdeps = [[] for _ in range((n + 2))]
deg = [0] * (n + 1)
for i in range(m):
parts = next(it).split(" ")
u = int(parts[0])
v = int(parts[1])
self.deps[u].append(v)
self.rdeps[v].append(u)
deg[v] += 1
# all nodes: convenience sink (has all the nodes as its direct dep)
self.id_all = n + 0
self.deps[self.id_all] = range(n)
# root nodes: convenience sink (has all the nodes that no node depends on)
self.id_roots = n + 1
self.deps[self.id_roots] = [i for i in range(n) if deg[i] == 0]
# compute additional data
self.calc_total_durations()
self.calc_target_index()
dt = time.clock() - t0
eprint("nodes: ", n, ", edges: ", m, ", time: ", dt, sep="")
def __len__(self):
return len(self.nodes)
def calc_total_durations(self):
for u in range(len(self.nodes)):
if self.nodes[u].total_duration == -1:
total = 0
stk = [u]
visited = {u}
while len(stk) > 0:
w = stk.pop()
total += self.nodes[w].duration
for v in self.deps[w]:
if v not in visited:
visited.add(v)
stk.append(v)
self.nodes[u].total_duration = total
def calc_target_index(self):
self.target_index = {}
for node in self.nodes:
self.target_index[node.target] = node
def print_deps(g, u, sort_col_id, cacheable_only, reverse_deps=False):
is_proper_node = 0 <= u < len(g)
if not is_proper_node and u != g.id_all and u != g.id_roots:
eprint("id out of range:", u)
return
def col_cmp(v1, v2):
return NodeInfo.compare_columns(g.nodes[v1], g.nodes[v2], sort_col_id)
def id_col(i):
return str(i) + (" * " if i == sort_col_id else "")
# build & print table
tbl = Table([], NodeInfo.get_max_lengths())
tbl.add_row([])
tbl.add_row([id_col(i) for i in range(NodeInfo.num_columns())])
tbl.add_row(NodeInfo.get_column_names())
if is_proper_node:
tbl.add_row(g.nodes[u].get_column_values())
tbl.add_row([])
deps = g.rdeps[u] if reverse_deps else g.deps[u]
for v in sorted(deps, col_cmp):
if g.nodes[v].is_cacheable or not cacheable_only:
tbl.add_row(g.nodes[v].get_column_values())
tbl.add_row([])
if u == g.id_all:
print("all:")
elif u == g.id_roots:
print("roots:")
elif reverse_deps:
print("reverse deps for:", u)
else:
print("deps for:", u)
print(str(tbl))
def print_node(g, u):
if u < 0 or u >= len(g):
eprint("id out of range:", u)
return
tbl = Table()
tbl.add_row([])
for name, val in zip(NodeInfo.get_column_names(), g.nodes[u].get_column_values()):
tbl.add_row([name, val])
tbl.add_row([])
print(str(tbl))
def print_help():
description = """
interactive commands:
q quits
h prints this help message
lg <path> loads graph from a file
c toggles cacheable_only mode (only cacheable rules get displayed)
r shows root nodes
a shows all nodes
d 12 shows deps of node 12
rd 12 shows reverse deps (parents) of node 12
b shows deps of the previously queried node
s 3 selects 3-rd column as the sort column
p 12 prints the node 12
ft <pattern> finds and lists all the targets matching the given pattern
lk <path> loads keys from a file
lkr <path> loads reference keys from a file
fk <criteria> finds and lists all the keys matching the given criteria
fkr <criteria> finds and lists all the reference keys matching the given criteria
criteria: pattern1 field1 [pattern2 field2 pattern3 field3 ...]
e.g.: `fk java/com/example/my_target .target_name`
pk <hahs> shows rulekey composition for the given key hash
pkr <hahs> shows rulekey composition for the given reference key hash
pt shows all the targets whose rulekey differ from the reference key hash
pta shows all the targets that are present in the both keys sets
dt <target> diffs the keys and reference keys for the given target
gv path saves the graph as a graphviz file
Note that some commands require the corresponding graph or keys file to be loaded.
These files can be found at `buck-out/log/{build}/rule_key_diag_{graph|keys}.txt`.
It is enough to specify just the containing folder though.
"""
print(description)
class Diag:
"""
A class that encapsulates diagnostics state such as graph and rulekey data.
"""
def __init__(self):
self.graph = None
self.keys = None
self.targets_to_keys = None
self.keys_ref = None
self.targets_to_keys_ref = None
def load_graph(self, path):
if not os.path.exists(path):
raise Exception(path + " does not exist")
if os.path.isdir(path):
path = os.path.join(path, "rule_key_diag_graph.txt")
eprint("Loading:", path)
with codecs.open(path, "r", "utf-8") as graph_file:
self.graph = Graph(graph_file)
def load_keys(self, path):
self.keys = read_rulekeys_autodetect(path)
self.targets_to_keys = build_targets_to_rulekeys_index(self.keys)
def load_keys_ref(self, path):
self.keys_ref = read_rulekeys_autodetect(path)
self.targets_to_keys_ref = build_targets_to_rulekeys_index(self.keys_ref)
def print_targets_intersection(self, only_different, only_cacheable):
same = 0
nonc = 0
for target, keys in self.targets_to_keys.iteritems():
if target in self.targets_to_keys_ref:
keys_ref = self.targets_to_keys_ref[target]
cacheable = False
if self.graph is not None and target in self.graph.target_index:
cacheable = self.graph.target_index[target].is_cacheable
if keys == keys_ref:
same += 1
if not cacheable:
nonc += 1
if (keys != keys_ref or not only_different) and (
cacheable or not only_cacheable
):
print(target, keys, keys_ref)
print("%d with mathcing keys, %d out of which non-cacheable" % (same, nonc))
def save_graphviz(self, path):
# $ dot -Tpng graph.gv -o graph.png
with codecs.open(path, "w", "utf-8") as gv_file:
print("digraph G {", file=gv_file)
for u in range(len(self.graph)):
label = self.graph.nodes[u].target
print(' %d [shape=box, label="%s"];' % (u, label), file=gv_file)
for u in range(len(self.graph)):
for v in self.graph.deps[u]:
print(" %d -> %d;" % (u, v), file=gv_file)
print("}", file=gv_file)
def find_targets(self, pattern):
regex = re.compile(pattern)
res = []
for u in range(len(self.graph)):
if regex.search(self.graph.nodes[u].target) is not None:
res.append(u)
return res
def diff_keys_for_target(self, target):
# use the prebuilt index when finding keys for target
keys_left = {k: self.keys[k] for k in self.targets_to_keys.get(target, [])}
keys_right = {
k: self.keys_ref[k] for k in self.targets_to_keys_ref.get(target, [])
}
keys_tuple_to_diff = find_keys_for_target(keys_left, keys_right, target)
if keys_tuple_to_diff is not None:
diff_keys_recursively(self.keys, self.keys_ref, keys_tuple_to_diff)
def parse_args():
description = """
"""
parser = argparse.ArgumentParser(description=description)
parser.add_argument(
"-g", "--graph", metavar="<path>", help="path to a graph file to load"
)
parser.add_argument(
"-k", "--keys", metavar="<path>", help="path to a keys file to load"
)
parser.add_argument(
"-l",
"--log_folder",
metavar="<path>",
help="folder containing the keys and graph file (use instead of -g and -k)",
)
parser.add_argument(
"-r",
"--ref_keys",
metavar="<path>",
help="path to a reference keys file to load",
)
if len(sys.argv) == 1:
parser.print_help()
print_help()
return parser.parse_args()
def main():
print("RuleKey Diagnostics script v0.1")
d = Diag()
args = parse_args()
if (args.graph or args.log_folder) is not None:
d.load_graph(args.graph or args.log_folder)
if (args.keys or args.log_folder) is not None:
d.load_keys(args.keys or args.log_folder)
if args.ref_keys is not None:
d.load_keys_ref(args.ref_keys)
bstk = []
sort_col_id = 4
cacheable_only = True
while True:
try:
print()
parts = raw_input("> ").strip().split(" ")
except EOFError:
break
try:
cmd = parts[0]
if cmd == "q" or cmd == "quit":
break
if cmd == "h" or cmd == "help":
print_help()
elif cmd == "lg" or cmd == "load_graph":
path = parts[1]
d.load_graph(path)
elif cmd == "r" or cmd == "roots":
bstk.append(d.graph.id_roots)
print_deps(d.graph, bstk[-1], sort_col_id, cacheable_only)
elif cmd == "a" or cmd == "all":
bstk.append(d.graph.id_all)
print_deps(d.graph, bstk[-1], sort_col_id, cacheable_only)
elif cmd == "c" or cmd == "cacheable_only":
cacheable_only = not cacheable_only
print("cacheable_only: %s" % cacheable_only)
elif cmd == "d" or cmd == "deps":
bstk.append(int(parts[1]))
print_deps(d.graph, bstk[-1], sort_col_id, cacheable_only)
elif cmd == "rd" or cmd == "parents":
bstk.append(int(parts[1]))
print_deps(d.graph, bstk[-1], sort_col_id, cacheable_only, True)
elif cmd == "b" or cmd == "back":
if len(bstk) > 1:
bstk.pop()
print_deps(d.graph, bstk[-1], sort_col_id, cacheable_only)
elif cmd == "s" or cmd == "sort":
sort_col_id = int(parts[1])
print_deps(d.graph, bstk[-1], sort_col_id, cacheable_only)
elif cmd == "p" or cmd == "print":
u = int(parts[1])
print_node(d.graph, u)
elif cmd == "ft" or cmd == "find_target":
pattern = parts[1]
for u in d.find_targets(pattern):
print(u, d.graph.nodes[u].target)
elif cmd == "lk" or cmd == "load_keys":
path = parts[1]
d.load_keys(path)
elif cmd == "lkr" or cmd == "load_keys_ref":
path = parts[1]
d.load_keys_ref(path)
elif cmd == "fk" or cmd == "find_keys":
criteria = zip(*[iter(parts[1:])] * 2)
for key in find_keys(d.keys, criteria):
print(key)
elif cmd == "fkr" or cmd == "find_keys_ref":
criteria = zip(*[iter(parts[1:])] * 2)
for key in find_keys(d.keys_ref, criteria):
print(key)
elif cmd == "pk" or cmd == "print_key":
rulekey_hash = parts[1]
print_rulekey(d.keys.get(rulekey_hash, []))
elif cmd == "pkr" or cmd == "print_key_ref":
rulekey_hash = parts[1]
print_rulekey(d.keys_ref.get(rulekey_hash, []))
elif cmd == "pt" or cmd == "print_targets":
d.print_targets_intersection(True, cacheable_only)
elif cmd == "pta" or cmd == "print_targets_all":
d.print_targets_intersection(False, cacheable_only)
elif cmd == "dt" or cmd == "diff_target":
try:
d.diff_keys_for_target(d.graph.nodes[int(parts[1])].target)
except ValueError:
d.diff_keys_for_target(parts[1])
elif cmd == "gv" or cmd == "save_graphviz":
path = parts[1]
d.save_graphviz(path)
else:
eprint("unknown command: ", parts)
except Exception:
eprint(
"Something went wrong. Make sure you loaded all the required data and\n"
"specified all the required arguments necessary for performing the command."
)
eprint(traceback.format_exc())
if __name__ == "__main__":
main()