func()

in internal/git/catfile/parse_commit.go [96:252]


func (p *parser) ParseCommit(object git.Object) (*Commit, error) {
	commit := &gitalypb.GitCommit{Id: object.ObjectID().String()}
	var payload []byte
	currentSignatureIndex := 0
	signatures := [][]byte{}

	bytesRemaining := object.ObjectSize()
	p.bufferedReader.Reset(object)

	for state := parseCommitStateHeader; state != parseCommitStateEnd; {
		receivedEOF := false

		line, err := p.bufferedReader.ReadString('\n')
		if errors.Is(err, io.EOF) {
			receivedEOF = true
		} else if err != nil {
			return nil, fmt.Errorf("parse raw commit: %w", err)
		}
		bytesRemaining -= int64(len(line))

		// If the line only consists of a newline, we can skip
		// the state to commit body.
		if line == "\n" {
			state = parseCommitStateBody
		}

		switch state {
		case parseCommitStateHeader:
			key, value, ok := strings.Cut(line, " ")
			if !ok {
				// TODO: Current tests allow empty commits, we might want
				// to change this behavior.
				goto loopEnd
			}

			// For headers, we trim the newline to make it easier
			// to parse.
			value = strings.TrimSuffix(value, "\n")

			switch key {
			case "parent":
				commit.ParentIds = append(commit.ParentIds, value)
			case "author":
				commit.Author = parseCommitAuthor(value)
			case "committer":
				commit.Committer = parseCommitAuthor(value)
			case "tree":
				commit.TreeId = value
			case "encoding":
				commit.Encoding = value
			case gpgSignaturePrefix, gpgSignaturePrefixSha256:
				// Since Git only considers the first signature, we only
				// capture the first signature's type.
				commit.SignatureType = detectSignatureType(value)

				state = parseCommitStateSignature
				signatures = append(signatures, []byte(value+"\n"))

				goto loopEnd
			}

			payload = append(payload, []byte(line)...)

		case parseCommitStateSignature:
			if after, ok := strings.CutPrefix(line, " "); ok {
				// All signature lines, must start with a ' ' (space).
				signatures[currentSignatureIndex] = append(signatures[currentSignatureIndex], []byte(after)...)
				goto loopEnd
			} else {
				currentSignatureIndex++

				// Multiple signatures might be present in the commit.
				if key, value, ok := strings.Cut(line, " "); ok {
					if key == gpgSignaturePrefix || key == gpgSignaturePrefixSha256 {
						signatures = append(signatures, []byte(value))
						goto loopEnd
					}
				}

				// If there is no ' ' (space), it means there is some unexpected
				// data.
				//
				// Note that we don't go back to parsing headers. This is because
				// any headers which are present after the signature are not parsed
				// by Git as information. But, they still constitute to the signature
				// payload. So any data after the signature and before the commit body
				// is considered unexpected.
				state = parseCommitStateUnexpected
			}

			fallthrough

		case parseCommitStateUnexpected:
			// If the line is only a newline, that means we have reached
			// the commit body. If not, we keep looping till we do.
			if line != "\n" {
				payload = append(payload, []byte(line)...)
				goto loopEnd
			}

			fallthrough

		case parseCommitStateBody:
			payload = append(payload, []byte(line)...)

			body := make([]byte, bytesRemaining)
			if _, err := io.ReadFull(p.bufferedReader, body); err != nil {
				return nil, fmt.Errorf("reading commit message: %w", err)
			}

			// After we have copied the body, we must make sure that there really is no
			// additional data. For once, this is to detect bugs in our implementation where we
			// would accidentally have truncated the commit message. On the other hand, we also
			// need to do this such that we observe the EOF, which we must observe in order to
			// unblock reading the next object.
			//
			// This all feels a bit complicated, where it would be much easier to just read into
			// a preallocated `bytes.Buffer`. But this complexity is indeed required to optimize
			// allocations. So if you want to change this, please make sure to execute the
			// `BenchmarkListAllCommits` benchmark.
			if n, err := io.Copy(io.Discard, p.bufferedReader); err != nil {
				return nil, fmt.Errorf("reading commit message: %w", err)
			} else if n != 0 {
				return nil, fmt.Errorf(
					"commit message exceeds expected length %v by %v bytes",
					object.ObjectSize(), n,
				)
			}

			if len(body) > 0 {
				commit.Subject = subjectFromBody(body)
				commit.BodySize = int64(len(body))
				commit.Body = body
				if max := helper.MaxCommitOrTagMessageSize; len(body) > max {
					commit.Body = commit.GetBody()[:max]
				}
				payload = append(payload, body...)
			}

			state = parseCommitStateEnd
		}

	loopEnd:
		if receivedEOF {
			state = parseCommitStateEnd
		}
	}

	for i, signature := range signatures {
		signatures[i] = bytes.TrimSuffix(signature, []byte("\n"))
	}

	return &Commit{
		GitCommit:     commit,
		SignatureData: SignatureData{Signatures: signatures, Payload: payload},
	}, nil
}