[RFC v2 6/7] LLMinus: Add prompt token limit enforcement

public inbox for tools@linux.kernel.org
 help / color / mirror / Atom feed

From: Sasha Levin <sashal@kernel.org>
To: tools@kernel.org
Cc: linux-kernel@vger.kernel.org, torvalds@linux-foundation.org,
	broonie@kernel.org, Sasha Levin <sashal@kernel.org>
Subject: [RFC v2 6/7] LLMinus: Add prompt token limit enforcement
Date: Sun, 11 Jan 2026 16:29:14 -0500	[thread overview]
Message-ID: <20260111212915.195056-7-sashal@kernel.org> (raw)
In-Reply-To: <20260111212915.195056-1-sashal@kernel.org>

Add the max-tokens option with a 100K default to prevent prompt overflow
errors with various LLM providers. Token count is estimated at roughly
4 characters per token. When prompts exceed the limit, RAG examples are
progressively removed until the prompt fits. The token count is displayed
in invoke output.

Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 tools/llminus/src/main.rs | 114 +++++++++++++++++++++++++++++++-------
 1 file changed, 95 insertions(+), 19 deletions(-)

diff --git a/tools/llminus/src/main.rs b/tools/llminus/src/main.rs
index ff668244688f..5c469e23f09a 100644
--- a/tools/llminus/src/main.rs
+++ b/tools/llminus/src/main.rs
@@ -14,6 +14,18 @@
 
 const STORE_PATH: &str = ".llminus-resolutions.json";
 
+/// Default maximum tokens for prompt (conservative for broad provider compatibility)
+/// Most providers support at least 128K; we use 100K as a safe default.
+const DEFAULT_MAX_TOKENS: usize = 100_000;
+
+/// Approximate characters per token (for English text)
+const CHARS_PER_TOKEN: usize = 4;
+
+/// Estimate the number of tokens in a text string
+fn estimate_tokens(text: &str) -> usize {
+    text.len() / CHARS_PER_TOKEN
+}
+
 #[derive(Parser)]
 #[command(name = "llminus")]
 #[command(about = "LLM-powered git conflict resolution tool")]
@@ -45,6 +57,9 @@ enum Commands {
     Resolve {
         /// Command to invoke. The prompt will be passed via stdin.
         command: String,
+        /// Maximum tokens for prompt (reduces RAG examples if exceeded)
+        #[arg(short, long, default_value_t = DEFAULT_MAX_TOKENS)]
+        max_tokens: usize,
     },
     /// Pull a kernel patch/pull request from lore.kernel.org and merge it
     Pull {
@@ -53,6 +68,9 @@ enum Commands {
         /// Command to invoke for LLM assistance
         #[arg(short, long, default_value = "llm")]
         command: String,
+        /// Maximum tokens for prompt (reduces RAG examples if exceeded)
+        #[arg(long, default_value_t = DEFAULT_MAX_TOKENS)]
+        max_tokens: usize,
     },
 }
 
@@ -825,6 +843,7 @@ fn parse_conflict_file(path: &str) -> Result<Vec<ConflictFile>> {
 }
 
 /// Result of a similarity search
+#[derive(Clone)]
 struct SimilarResolution {
     resolution: MergeResolution,
     similarity: f32,
@@ -1632,7 +1651,7 @@ fn build_resolve_prompt(
     prompt
 }
 
-fn resolve(command: &str) -> Result<()> {
+fn resolve(command: &str, max_tokens: usize) -> Result<()> {
     // Get merge context (what branch/tag is being merged)
     let merge_ctx = get_merge_context();
     if let Some(ref source) = merge_ctx.merge_source {
@@ -1649,17 +1668,45 @@ fn resolve(command: &str) -> Result<()> {
 
     // Try to find similar historical resolutions (gracefully handles missing database)
     println!("Looking for similar historical conflicts...");
-    let similar = try_find_similar_resolutions(3, &conflicts);
+    let all_similar = try_find_similar_resolutions(3, &conflicts);
 
-    if similar.is_empty() {
+    if all_similar.is_empty() {
         println!("No historical resolution database found (run 'llminus learn' and 'llminus vectorize' to build one)");
         println!("Proceeding without historical examples...");
     } else {
-        println!("Found {} similar historical resolutions", similar.len());
+        println!("Found {} similar historical resolutions", all_similar.len());
+    }
+
+    // Build the prompt with adaptive RAG example reduction
+    let mut similar = all_similar.clone();
+    let mut prompt = build_resolve_prompt(&conflicts, &similar, &merge_ctx, None);
+    let mut tokens = estimate_tokens(&prompt);
+
+    // Reduce RAG examples until we're under the token limit
+    while tokens > max_tokens && !similar.is_empty() {
+        let original_count = all_similar.len();
+        similar.pop(); // Remove the least similar (last) example
+        prompt = build_resolve_prompt(&conflicts, &similar, &merge_ctx, None);
+        tokens = estimate_tokens(&prompt);
+
+        if similar.len() < original_count {
+            println!(
+                "Reduced RAG examples from {} to {} to fit token limit (~{} tokens, limit: {})",
+                original_count,
+                similar.len(),
+                tokens,
+                max_tokens
+            );
+        }
+    }
+
+    if tokens > max_tokens {
+        println!(
+            "Warning: Prompt still exceeds token limit (~{} tokens, limit: {}) even without RAG examples",
+            tokens, max_tokens
+        );
     }
 
-    // Build the prompt and invoke LLM
-    let prompt = build_resolve_prompt(&conflicts, &similar, &merge_ctx, None);
     invoke_llm(command, &prompt)
 }
 
@@ -1668,7 +1715,8 @@ fn invoke_llm(command: &str, prompt: &str) -> Result<()> {
     use std::io::Write;
     use std::process::Stdio;
 
-    println!("Invoking: {} (prompt: {} bytes)", command, prompt.len());
+    let tokens = estimate_tokens(prompt);
+    println!("Invoking: {} (prompt: {} bytes, ~{} tokens)", command, prompt.len(), tokens);
     println!("{}", "=".repeat(80));
 
     // Parse command (handle arguments)
@@ -1708,7 +1756,7 @@ fn invoke_llm(command: &str, prompt: &str) -> Result<()> {
 }
 
 /// Pull a kernel pull request from lore.kernel.org
-fn pull(message_id: &str, command: &str) -> Result<()> {
+fn pull(message_id: &str, command: &str, max_tokens: usize) -> Result<()> {
     check_repo()?;
 
     // Step 1: Fetch and parse the pull request email
@@ -1744,16 +1792,44 @@ fn pull(message_id: &str, command: &str) -> Result<()> {
 
     // Try to find similar historical resolutions
     println!("Looking for similar historical conflicts...");
-    let similar = try_find_similar_resolutions(3, &conflicts);
+    let all_similar = try_find_similar_resolutions(3, &conflicts);
 
-    if similar.is_empty() {
+    if all_similar.is_empty() {
         println!("No historical resolution database found (this is optional)");
     } else {
-        println!("Found {} similar historical resolutions", similar.len());
+        println!("Found {} similar historical resolutions", all_similar.len());
+    }
+
+    // Build the prompt with adaptive RAG example reduction
+    let mut similar = all_similar.clone();
+    let mut prompt = build_resolve_prompt(&conflicts, &similar, &merge_ctx, Some(&pull_req));
+    let mut tokens = estimate_tokens(&prompt);
+
+    // Reduce RAG examples until we're under the token limit
+    while tokens > max_tokens && !similar.is_empty() {
+        let original_count = all_similar.len();
+        similar.pop(); // Remove the least similar (last) example
+        prompt = build_resolve_prompt(&conflicts, &similar, &merge_ctx, Some(&pull_req));
+        tokens = estimate_tokens(&prompt);
+
+        if similar.len() < original_count {
+            println!(
+                "Reduced RAG examples from {} to {} to fit token limit (~{} tokens, limit: {})",
+                original_count,
+                similar.len(),
+                tokens,
+                max_tokens
+            );
+        }
+    }
+
+    if tokens > max_tokens {
+        println!(
+            "Warning: Prompt still exceeds token limit (~{} tokens, limit: {}) even without RAG examples",
+            tokens, max_tokens
+        );
     }
 
-    // Build the prompt with pull request context and invoke LLM
-    let prompt = build_resolve_prompt(&conflicts, &similar, &merge_ctx, Some(&pull_req));
     println!("\n=== Invoking LLM for Conflict Resolution ===");
     invoke_llm(command, &prompt)?;
 
@@ -1815,8 +1891,8 @@ fn main() -> Result<()> {
         Commands::Learn { range } => learn(range.as_deref()),
         Commands::Vectorize { batch_size } => vectorize(batch_size),
         Commands::Find { n } => find(n),
-        Commands::Resolve { command } => resolve(&command),
-        Commands::Pull { message_id, command } => pull(&message_id, &command),
+        Commands::Resolve { command, max_tokens } => resolve(&command, max_tokens),
+        Commands::Pull { message_id, command, max_tokens } => pull(&message_id, &command, max_tokens),
     }
 }
 
@@ -1890,7 +1966,7 @@ fn test_find_command_with_n() {
     fn test_resolve_command_parses() {
         let cli = Cli::try_parse_from(["llminus", "resolve", "my-llm"]).unwrap();
         match cli.command {
-            Commands::Resolve { command } => assert_eq!(command, "my-llm"),
+            Commands::Resolve { command, .. } => assert_eq!(command, "my-llm"),
             _ => panic!("Expected Resolve command"),
         }
     }
@@ -1899,7 +1975,7 @@ fn test_resolve_command_parses() {
     fn test_resolve_command_with_args() {
         let cli = Cli::try_parse_from(["llminus", "resolve", "my-llm --model fancy"]).unwrap();
         match cli.command {
-            Commands::Resolve { command } => assert_eq!(command, "my-llm --model fancy"),
+            Commands::Resolve { command, .. } => assert_eq!(command, "my-llm --model fancy"),
             _ => panic!("Expected Resolve command"),
         }
     }
@@ -2210,7 +2286,7 @@ fn test_parse_multiple_conflicts() {
     fn test_pull_command_parses() {
         let cli = Cli::try_parse_from(["llminus", "pull", "test@kernel.org"]).unwrap();
         match cli.command {
-            Commands::Pull { message_id, command } => {
+            Commands::Pull { message_id, command, .. } => {
                 assert_eq!(message_id, "test@kernel.org");
                 assert_eq!(command, "llm"); // default
             }
@@ -2224,7 +2300,7 @@ fn test_pull_command_with_custom_command() {
             "llminus", "pull", "test@kernel.org", "-c", "my-llm --model fancy"
         ]).unwrap();
         match cli.command {
-            Commands::Pull { message_id, command } => {
+            Commands::Pull { message_id, command, .. } => {
                 assert_eq!(message_id, "test@kernel.org");
                 assert_eq!(command, "my-llm --model fancy");
             }
-- 
2.51.0

next prev parent reply	other threads:[~2026-01-11 21:29 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-19 18:16 [RFC 0/5] LLMinus: LLM-Assisted Merge Conflict Resolution Sasha Levin
2025-12-19 18:16 ` [RFC 1/5] LLMinus: Add skeleton project with learn command Sasha Levin
2025-12-19 18:16 ` [RFC 2/5] LLMinus: Add vectorize command with fastembed Sasha Levin
2025-12-19 18:16 ` [RFC 3/5] LLMinus: Add find command for similarity search Sasha Levin
2025-12-19 18:16 ` [RFC 4/5] LLMinus: Add resolve command for LLM-assisted conflict resolution Sasha Levin
2025-12-19 18:16 ` [RFC 5/5] LLMinus: Add pull command for LLM-assisted kernel pull request merging Sasha Levin
2025-12-21 16:10 ` [RFC 0/5] LLMinus: LLM-Assisted Merge Conflict Resolution Sasha Levin
2025-12-22 14:50   ` Mark Brown
2025-12-23 12:36     ` Sasha Levin
2025-12-23 17:47       ` Mark Brown
2026-01-05 18:00         ` Sasha Levin
2026-01-05 18:30           ` Mark Brown
2026-01-11 21:29 ` [RFC v2 0/7] " Sasha Levin
2026-01-11 21:29   ` [RFC v2 1/7] LLMinus: Add skeleton project with learn command Sasha Levin
2026-01-11 21:29   ` [RFC v2 2/7] LLMinus: Add vectorize command with fastembed Sasha Levin
2026-01-11 21:29   ` [RFC v2 3/7] LLMinus: Add find command for similarity search Sasha Levin
2026-01-11 21:29   ` [RFC v2 4/7] LLMinus: Add resolve command for LLM-assisted conflict resolution Sasha Levin
2026-01-11 21:29   ` [RFC v2 5/7] LLMinus: Add pull command for LLM-assisted kernel pull request merging Sasha Levin
2026-01-11 21:29   ` Sasha Levin [this message]
2026-01-11 21:29   ` [RFC v2 7/7] LLMinus: Add build test integration for semantic conflicts Sasha Levin

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:ff668244688 dfblob:5c469e23f09 )
 OR (
bs:"[RFC v2 6/7] LLMinus: Add prompt token limit enforcement" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260111212915.195056-7-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=broonie@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tools@kernel.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox