in internal/mode/advanced/indexer/blob.go [92:146]
func BuildBlob(file *git.File, parentID int64, commitSHA string, blobType string, encoder *Encoder, isProjectDocument bool) (*Blob, error) {
content := NoCodeContentMsgHolder
language := defaultLanguage
filename := file.Path
// Do not read files that are too large
if !file.SkipTooLarge {
reader, err := file.Blob()
if err != nil {
return nil, err
}
defer reader.Close() //nolint:errcheck
// FIXME(nick): This doesn't look cheap. Check the RAM & CPU pressure, esp.
// for large blobs
b, err := io.ReadAll(reader)
if err != nil {
return nil, err
}
if !DetectBinary(b) {
content = encoder.tryEncodeBytes(b)
}
language = DetectLanguage(filename, b)
}
var id string
if blobType == "wiki_blob" {
id = GenerateWikiBlobId(parentID, filename, isProjectDocument)
} else {
id = GenerateBlobID(parentID, filename)
}
blob := &Blob{
ID: id,
OID: file.Oid,
CommitSHA: commitSHA,
Content: content,
Path: filename,
Filename: path.Base(filename),
Language: language,
}
switch blobType {
case "blob":
blob.Type = "blob"
blob.RepoID = strconv.FormatInt(parentID, 10)
case "wiki_blob":
blob.Type = "wiki_blob"
blob.RepoID = fmt.Sprintf("wiki_%d", parentID)
}
return blob, nil
}