in crates/llm-ls/src/main.rs [141:233]
fn build_prompt(
pos: Position,
text: &Rope,
fim: &FimParams,
tokenizer: Option<Arc<Tokenizer>>,
context_window: usize,
) -> Result<String> {
let t = Instant::now();
if fim.enabled {
let mut remaining_token_count = context_window - 3; // account for FIM tokens
let mut before_iter = text.lines_at(pos.line as usize + 1).reversed();
let mut after_iter = text.lines_at(pos.line as usize);
let mut before_line = before_iter.next();
if let Some(line) = before_line {
let col = (pos.character as usize).clamp(0, line.len_chars());
before_line = Some(line.slice(0..col));
}
let mut after_line = after_iter.next();
if let Some(line) = after_line {
let col = (pos.character as usize).clamp(0, line.len_chars());
after_line = Some(line.slice(col..));
}
let mut before = vec![];
let mut after = String::new();
while before_line.is_some() || after_line.is_some() {
if let Some(before_line) = before_line {
let before_line = before_line.to_string();
let tokens = if let Some(tokenizer) = tokenizer.clone() {
tokenizer.encode(before_line.clone(), false)?.len()
} else {
before_line.len()
};
if tokens > remaining_token_count {
break;
}
remaining_token_count -= tokens;
before.push(before_line);
}
if let Some(after_line) = after_line {
let after_line = after_line.to_string();
let tokens = if let Some(tokenizer) = tokenizer.clone() {
tokenizer.encode(after_line.clone(), false)?.len()
} else {
after_line.len()
};
if tokens > remaining_token_count {
break;
}
remaining_token_count -= tokens;
after.push_str(&after_line);
}
before_line = before_iter.next();
after_line = after_iter.next();
}
let prompt = format!(
"{}{}{}{}{}",
fim.prefix,
before.into_iter().rev().collect::<Vec<_>>().join(""),
fim.suffix,
after,
fim.middle
);
let time = t.elapsed().as_millis();
info!(prompt, build_prompt_ms = time, "built prompt in {time} ms");
Ok(prompt)
} else {
let mut remaining_token_count = context_window;
let mut before = vec![];
let mut first = true;
for mut line in text.lines_at(pos.line as usize + 1).reversed() {
if first {
let col = (pos.character as usize).clamp(0, line.len_chars());
line = line.slice(0..col);
first = false;
}
let line = line.to_string();
let tokens = if let Some(tokenizer) = tokenizer.clone() {
tokenizer.encode(line.clone(), false)?.len()
} else {
line.len()
};
if tokens > remaining_token_count {
break;
}
remaining_token_count -= tokens;
before.push(line);
}
let prompt = before.into_iter().rev().collect::<Vec<_>>().join("");
let time = t.elapsed().as_millis();
info!(prompt, build_prompt_ms = time, "built prompt in {time} ms");
Ok(prompt)
}
}