def get_header()

in scripts/copyright_headers.py [0:0]


def get_header(filename):
    state = ReadState.EMPTY
    header = []
    with open(filename, "r") as f:
        for line_idx, line in enumerate(f.readlines()):
            line = line.strip()
            # Finite state machine to read "header" of Python source
            # TODO: Can I write this much more compactly with regular expressions?
            if state is ReadState.EMPTY:
                if len(line) and line[0] == "#":
                    state = ReadState.COMMENT
                    header.append(line[1:].strip())
                    continue
                elif len(line) >= 3 and line[:3] == '"""':
                    state = ReadState.TRIPLE_QUOTES
                    header.append(line[3:].strip())
                    continue
                else:
                    # If the file doesn't begin with a comment we consider the
                    # header to be empty
                    return "\n".join(header).strip(), line_idx, state

            elif state is ReadState.COMMENT:
                if len(line) and line[0] == "#":
                    header.append(line[1:].strip())
                    continue
                else:
                    return "\n".join(header).strip(), line_idx, state

            elif state is ReadState.TRIPLE_QUOTES:
                if len(line) >= 3 and '"""' in line:
                    char_idx = line.find('"""')
                    header.append(line[:char_idx].strip())
                    return "\n".join(header).strip(), line_idx, state
                else:
                    header.append(line.strip())
                    continue

            else:
                raise RuntimeError("Invalid read state!")

    # Return error if triple quotes don't terminate
    if state is ReadState.TRIPLE_QUOTES:
        raise RuntimeError(f"Unterminated multi-line string in {f}")

    # If we get to here then the file is all header
    return "\n".join(header).strip(), line_idx + 1, state