func unpackObject()

in internal/gitfs/pack.go [68:184]


func unpackObject(s *store, objs []byte, off int) (typ objType, h Hash, content []byte, encSize int, err error) {
	fail := func(err error) (objType, Hash, []byte, int, error) {
		return 0, Hash{}, nil, 0, err
	}
	if off < 0 || off >= len(objs) {
		return fail(fmt.Errorf("invalid object offset"))
	}

	// Object starts with varint-encoded type and length n.
	// (The length n is the length of the compressed data that follows,
	// not the length of the actual object.)
	u, size := binary.Uvarint(objs[off:])
	if size <= 0 {
		return fail(fmt.Errorf("invalid object: bad varint header"))
	}
	typ = objType((u >> 4) & 7)
	n := int(u&15 | u>>7<<4)

	// Git often stores objects that differ very little (different revs of a file).
	// It can save space by encoding one as "start with this other object and apply these diffs".
	// There are two ways to specify "this other object": an object ref (20-byte SHA1)
	// or as a relative offset to an earlier position in the objs slice.
	// For either of these, we need to fetch the other object's type and data (deltaTyp and deltaBase).
	// The Git docs call this the "deltified representation".
	var deltaTyp objType
	var deltaBase []byte
	switch typ {
	case objRefDelta:
		if len(objs)-(off+size) < 20 {
			return fail(fmt.Errorf("invalid object: bad delta ref"))
		}
		// Base block identified by SHA1 of an already unpacked hash.
		var h Hash
		copy(h[:], objs[off+size:])
		size += 20
		deltaTyp, deltaBase = s.object(h)
		if deltaTyp == 0 {
			return fail(fmt.Errorf("invalid object: unknown delta ref %v", h))
		}

	case objOfsDelta:
		i := off + size
		if len(objs)-i < 20 {
			return fail(fmt.Errorf("invalid object: too short"))
		}
		// Base block identified by relative offset to earlier position in objs,
		// using a varint-like but not-quite-varint encoding.
		// Look for "offset encoding:" in https://git-scm.com/docs/pack-format.
		d := int64(objs[i] & 0x7f)
		for objs[i]&0x80 != 0 {
			i++
			if i-(off+size) > 10 {
				return fail(fmt.Errorf("invalid object: malformed delta offset"))
			}
			d = d<<7 | int64(objs[i]&0x7f)
			d += 1 << 7
		}
		i++
		size = i - off

		// Re-unpack the object at the earlier offset to find its type and content.
		if d == 0 || d > int64(off) {
			return fail(fmt.Errorf("invalid object: bad delta offset"))
		}
		var err error
		deltaTyp, _, deltaBase, _, err = unpackObject(s, objs, off-int(d))
		if err != nil {
			return fail(fmt.Errorf("invalid object: bad delta offset"))
		}
	}

	// The main encoded data is a zlib-compressed stream.
	br := bytes.NewReader(objs[off+size:])
	zr, err := zlib.NewReader(br)
	if err != nil {
		return fail(fmt.Errorf("invalid object deflate: %v", err))
	}
	data, err := ioutil.ReadAll(zr)
	if err != nil {
		return fail(fmt.Errorf("invalid object: bad deflate: %v", err))
	}
	if len(data) != n {
		return fail(fmt.Errorf("invalid object: deflate size %d != %d", len(data), n))
	}
	encSize = len(objs[off:]) - br.Len()

	// If we fetched a base object above, the stream is an encoded delta.
	// Otherwise it is the raw data.
	switch typ {
	default:
		return fail(fmt.Errorf("invalid object: unknown object type"))
	case objCommit, objTree, objBlob, objTag:
		// ok
	case objRefDelta, objOfsDelta:
		// Actual object type is the type of the base object.
		typ = deltaTyp

		// Delta encoding starts with size of base object and size of new object.
		baseSize, s := binary.Uvarint(data)
		data = data[s:]
		if baseSize != uint64(len(deltaBase)) {
			return fail(fmt.Errorf("invalid object: mismatched delta src size"))
		}
		targSize, s := binary.Uvarint(data)
		data = data[s:]

		// Apply delta to base object, producing new object.
		targ := make([]byte, targSize)
		if err := applyDelta(targ, deltaBase, data); err != nil {
			return fail(fmt.Errorf("invalid object: %v", err))
		}
		data = targ
	}

	h, data = s.add(typ, data)
	return typ, h, data, encSize, nil
}