internal/gitaly/diff/patch.go (102 lines of code) (raw):

package diff import ( "bufio" "bytes" "errors" "fmt" "io" "gitlab.com/gitlab-org/gitaly/v16/internal/git" ) // PatchParser defines the parser state required for parsing diffs. Patch output from a git diff // family command is parsed line by line. Patches are delimited by git diff headers. type PatchParser struct { // reader contains the patch output for a diff command. reader *bufio.Reader // rawInfo specifies the list of patches in the expected parsing order. rawInfo []Raw // patchLimit specifies the max byte size of an individual patch. If exceeded, the patch data // is pruned. patchLimit int32 // objectHash specifies the format of the object IDs. objectHash git.ObjectHash // diff contains the last parsed diff and is reused for every patch. diff Diff // err records any error encountered during parsing operations. err error // nextSrcPath stores the path of the next patch to be parsed. A patch only gets read once the // parser is processing the corresponding raw entry. Raw output that does not have matching // patch output is treated as an empty patch. nextSrcPath []byte } // NewPatchParser initializes and returns a new diff parser. func NewPatchParser(reader io.Reader, rawInfo []Raw, patchLimit int32, objectHash git.ObjectHash) *PatchParser { return &PatchParser{ reader: bufio.NewReader(reader), rawInfo: rawInfo, patchLimit: patchLimit, objectHash: objectHash, } } // Parse parses a single diff. It returns true if successful, false if it finished parsing all // diffs or when it encounters an error, in which case use Parser.Err() to get the error. func (p *PatchParser) Parse() bool { if len(p.rawInfo) == 0 { // If there is nothing left to parse, there should also be no remaining patch output to // consume. Discard any remaining output just to be sure. _, _ = io.Copy(io.Discard, p.reader) return false } raw := p.rawInfo[0] p.rawInfo = p.rawInfo[1:] // Type changes are reflected in Git diff output as a removal followed by an addition. The raw // diff info is used to define the expected order of parsed diff output and provide mode and // OID info. This also allows responses to be sent for diffs with no patch output. Therefore, // the type change status must be handled specially to maintain the correct expected order. if raw.Status == 'T' { addedRaw := raw addedRaw.SrcMode = 0 addedRaw.SrcOID = p.objectHash.ZeroOID.String() addedRaw.Status = 'A' raw.DstMode = 0 raw.DstOID = p.objectHash.ZeroOID.String() raw.Status = 'R' p.rawInfo = append([]Raw{addedRaw}, p.rawInfo...) } p.diff.Reset() p.diff.FromID = raw.SrcOID p.diff.ToID = raw.DstOID p.diff.FromPath = raw.SrcPath p.diff.ToPath = raw.DstPath if p.nextSrcPath == nil { srcPath, err := readDiffHeaderFromPath(p.reader) if err != nil && !errors.Is(err, io.EOF) { p.err = fmt.Errorf("read diff header: %w", err) return false } p.nextSrcPath = srcPath } if bytes.Equal(p.nextSrcPath, raw.SrcPath) { p.nextSrcPath = nil if err := readNextDiff(p.reader, &p.diff, false); err != nil { p.err = fmt.Errorf("read diff content: %w", err) return false } p.diff.PatchSize = int32(len(p.diff.Patch)) if p.patchLimit != 0 && p.diff.PatchSize > p.patchLimit { p.diff.TooLarge = true p.diff.ClearPatch() } } return true } // Diff returns a successfully parsed diff. It should be called only when Parser.Parse() returns // true. The return value is valid only until the next call to Parser.Parse(). func (p *PatchParser) Diff() *Diff { return &p.diff } // Err returns the error encountered (if any) when parsing the diff stream. It should be called // only when Parser.Parse() returns false. func (p *PatchParser) Err() error { return p.err } // Raw defines raw formatted diff output. See https://git-scm.com/docs/diff-format#_raw_output_format. type Raw struct { SrcMode int32 DstMode int32 SrcOID string DstOID string Status byte Score int32 SrcPath []byte DstPath []byte } // ToBytes output a raw diff line in the nul-delimited format. func (r Raw) ToBytes() []byte { var raw bytes.Buffer raw.WriteString(fmt.Sprintf(":%06o %06o %s %s %c", r.SrcMode, r.DstMode, r.SrcOID, r.DstOID, r.Status)) if r.Status == 'R' || r.Status == 'C' { raw.WriteString(fmt.Sprintf("%03d", r.Score)) } raw.WriteByte(0x00) raw.Write(r.SrcPath) if len(r.DstPath) > 0 { raw.WriteByte(0x00) raw.Write(r.DstPath) } raw.WriteByte(0x00) return raw.Bytes() }