in internal/git/catfile/parse_commit.go [96:252]
func (p *parser) ParseCommit(object git.Object) (*Commit, error) {
commit := &gitalypb.GitCommit{Id: object.ObjectID().String()}
var payload []byte
currentSignatureIndex := 0
signatures := [][]byte{}
bytesRemaining := object.ObjectSize()
p.bufferedReader.Reset(object)
for state := parseCommitStateHeader; state != parseCommitStateEnd; {
receivedEOF := false
line, err := p.bufferedReader.ReadString('\n')
if errors.Is(err, io.EOF) {
receivedEOF = true
} else if err != nil {
return nil, fmt.Errorf("parse raw commit: %w", err)
}
bytesRemaining -= int64(len(line))
// If the line only consists of a newline, we can skip
// the state to commit body.
if line == "\n" {
state = parseCommitStateBody
}
switch state {
case parseCommitStateHeader:
key, value, ok := strings.Cut(line, " ")
if !ok {
// TODO: Current tests allow empty commits, we might want
// to change this behavior.
goto loopEnd
}
// For headers, we trim the newline to make it easier
// to parse.
value = strings.TrimSuffix(value, "\n")
switch key {
case "parent":
commit.ParentIds = append(commit.ParentIds, value)
case "author":
commit.Author = parseCommitAuthor(value)
case "committer":
commit.Committer = parseCommitAuthor(value)
case "tree":
commit.TreeId = value
case "encoding":
commit.Encoding = value
case gpgSignaturePrefix, gpgSignaturePrefixSha256:
// Since Git only considers the first signature, we only
// capture the first signature's type.
commit.SignatureType = detectSignatureType(value)
state = parseCommitStateSignature
signatures = append(signatures, []byte(value+"\n"))
goto loopEnd
}
payload = append(payload, []byte(line)...)
case parseCommitStateSignature:
if after, ok := strings.CutPrefix(line, " "); ok {
// All signature lines, must start with a ' ' (space).
signatures[currentSignatureIndex] = append(signatures[currentSignatureIndex], []byte(after)...)
goto loopEnd
} else {
currentSignatureIndex++
// Multiple signatures might be present in the commit.
if key, value, ok := strings.Cut(line, " "); ok {
if key == gpgSignaturePrefix || key == gpgSignaturePrefixSha256 {
signatures = append(signatures, []byte(value))
goto loopEnd
}
}
// If there is no ' ' (space), it means there is some unexpected
// data.
//
// Note that we don't go back to parsing headers. This is because
// any headers which are present after the signature are not parsed
// by Git as information. But, they still constitute to the signature
// payload. So any data after the signature and before the commit body
// is considered unexpected.
state = parseCommitStateUnexpected
}
fallthrough
case parseCommitStateUnexpected:
// If the line is only a newline, that means we have reached
// the commit body. If not, we keep looping till we do.
if line != "\n" {
payload = append(payload, []byte(line)...)
goto loopEnd
}
fallthrough
case parseCommitStateBody:
payload = append(payload, []byte(line)...)
body := make([]byte, bytesRemaining)
if _, err := io.ReadFull(p.bufferedReader, body); err != nil {
return nil, fmt.Errorf("reading commit message: %w", err)
}
// After we have copied the body, we must make sure that there really is no
// additional data. For once, this is to detect bugs in our implementation where we
// would accidentally have truncated the commit message. On the other hand, we also
// need to do this such that we observe the EOF, which we must observe in order to
// unblock reading the next object.
//
// This all feels a bit complicated, where it would be much easier to just read into
// a preallocated `bytes.Buffer`. But this complexity is indeed required to optimize
// allocations. So if you want to change this, please make sure to execute the
// `BenchmarkListAllCommits` benchmark.
if n, err := io.Copy(io.Discard, p.bufferedReader); err != nil {
return nil, fmt.Errorf("reading commit message: %w", err)
} else if n != 0 {
return nil, fmt.Errorf(
"commit message exceeds expected length %v by %v bytes",
object.ObjectSize(), n,
)
}
if len(body) > 0 {
commit.Subject = subjectFromBody(body)
commit.BodySize = int64(len(body))
commit.Body = body
if max := helper.MaxCommitOrTagMessageSize; len(body) > max {
commit.Body = commit.GetBody()[:max]
}
payload = append(payload, body...)
}
state = parseCommitStateEnd
}
loopEnd:
if receivedEOF {
state = parseCommitStateEnd
}
}
for i, signature := range signatures {
signatures[i] = bytes.TrimSuffix(signature, []byte("\n"))
}
return &Commit{
GitCommit: commit,
SignatureData: SignatureData{Signatures: signatures, Payload: payload},
}, nil
}