func byteOffsetForLSPColumn()

in internal/filesystem/position.go [30:73]


func byteOffsetForLSPColumn(l source.Line, lspCol int) int {
	if lspCol < 0 {
		return l.Range().Start.Byte
	}

	// Normally ASCII-only lines could be short-circuited here
	// but it's not as easy to tell whether a line is ASCII-only
	// based on column/byte differences as we also scan newlines
	// and a single line range technically spans 2 lines.

	// If there are non-ASCII characters then we need to edge carefully
	// along the line while counting UTF-16 code units in our UTF-8 buffer,
	// since LSP columns are a count of UTF-16 units.
	byteCt := 0
	utf16Ct := 0
	colIdx := 1
	remain := l.Bytes()
	for {
		if len(remain) == 0 { // ran out of characters on the line, so given column is invalid
			return l.Range().End.Byte
		}
		if utf16Ct >= lspCol { // we've found it
			return l.Range().Start.Byte + byteCt
		}
		// Unlike our other conversion functions we're intentionally using
		// individual UTF-8 sequences here rather than grapheme clusters because
		// an LSP position might point into the middle of a grapheme cluster.

		adv, chBytes, _ := textseg.ScanUTF8Sequences(remain, true)
		remain = remain[adv:]
		byteCt += adv
		colIdx++
		for len(chBytes) > 0 {
			r, l := utf8.DecodeRune(chBytes)
			chBytes = chBytes[l:]
			c1, c2 := utf16.EncodeRune(r)
			if c1 == 0xfffd && c2 == 0xfffd {
				utf16Ct++ // codepoint fits in one 16-bit unit
			} else {
				utf16Ct += 2 // codepoint requires a surrogate pair
			}
		}
	}
}