in scripts/copyright_headers.py [0:0]
def get_header(filename):
state = ReadState.EMPTY
header = []
with open(filename, "r") as f:
for line_idx, line in enumerate(f.readlines()):
line = line.strip()
# Finite state machine to read "header" of Python source
# TODO: Can I write this much more compactly with regular expressions?
if state is ReadState.EMPTY:
if len(line) and line[0] == "#":
state = ReadState.COMMENT
header.append(line[1:].strip())
continue
elif len(line) >= 3 and line[:3] == '"""':
state = ReadState.TRIPLE_QUOTES
header.append(line[3:].strip())
continue
else:
# If the file doesn't begin with a comment we consider the
# header to be empty
return "\n".join(header).strip(), line_idx, state
elif state is ReadState.COMMENT:
if len(line) and line[0] == "#":
header.append(line[1:].strip())
continue
else:
return "\n".join(header).strip(), line_idx, state
elif state is ReadState.TRIPLE_QUOTES:
if len(line) >= 3 and '"""' in line:
char_idx = line.find('"""')
header.append(line[:char_idx].strip())
return "\n".join(header).strip(), line_idx, state
else:
header.append(line.strip())
continue
else:
raise RuntimeError("Invalid read state!")
# Return error if triple quotes don't terminate
if state is ReadState.TRIPLE_QUOTES:
raise RuntimeError(f"Unterminated multi-line string in {f}")
# If we get to here then the file is all header
return "\n".join(header).strip(), line_idx + 1, state