[RFC v2 6/7] LLMinus: Add prompt token limit enforcement

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Sasha Levin <sashal@kernel.org>
To: tools@kernel.org
Cc: linux-kernel@vger.kernel.org, torvalds@linux-foundation.org,
	broonie@kernel.org, Sasha Levin <sashal@kernel.org>
Subject: [RFC v2 6/7] LLMinus: Add prompt token limit enforcement
Date: Sun, 11 Jan 2026 16:29:14 -0500	[thread overview]
Message-ID: <20260111212915.195056-7-sashal@kernel.org> (raw)
In-Reply-To: <20260111212915.195056-1-sashal@kernel.org>

Add the max-tokens option with a 100K default to prevent prompt overflow
errors with various LLM providers. Token count is estimated at roughly
4 characters per token. When prompts exceed the limit, RAG examples are
progressively removed until the prompt fits. The token count is displayed
in invoke output.

Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 tools/llminus/src/main.rs | 114 +++++++++++++++++++++++++++++++-------
 1 file changed, 95 insertions(+), 19 deletions(-)

diff --git a/tools/llminus/src/main.rs b/tools/llminus/src/main.rs
index ff668244688f..5c469e23f09a 100644
--- a/tools/llminus/src/main.rs
+++ b/tools/llminus/src/main.rs
@@ -14,6 +14,18 @@
 
 const STORE_PATH: &str = ".llminus-resolutions.json";
 
+/// Default maximum tokens for prompt (conservative for broad provider compatibility)
+/// Most providers support at least 128K; we use 100K as a safe default.
+const DEFAULT_MAX_TOKENS: usize = 100_000;
+
+/// Approximate characters per token (for English text)
+const CHARS_PER_TOKEN: usize = 4;
+
+/// Estimate the number of tokens in a text string
+fn estimate_tokens(text: &str) -> usize {
+    text.len() / CHARS_PER_TOKEN
+}
+
 #[derive(Parser)]
 #[command(name = "llminus")]
 #[command(about = "LLM-powered git conflict resolution tool")]
@@ -45,6 +57,9 @@ enum Commands {
     Resolve {
         /// Command to invoke. The prompt will be passed via stdin.
         command: String,
+        /// Maximum tokens for prompt (reduces RAG examples if exceeded)
+        #[arg(short, long, default_value_t = DEFAULT_MAX_TOKENS)]
+        max_tokens: usize,
     },
     /// Pull a kernel patch/pull request from lore.kernel.org and merge it
     Pull {
@@ -53,6 +68,9 @@ enum Commands {
         /// Command to invoke for LLM assistance
         #[arg(short, long, default_value = "llm")]
         command: String,
+        /// Maximum tokens for prompt (reduces RAG examples if exceeded)
+        #[arg(long, default_value_t = DEFAULT_MAX_TOKENS)]
+        max_tokens: usize,
     },
 }
 
@@ -825,6 +843,7 @@ fn parse_conflict_file(path: &str) -> Result<Vec<ConflictFile>> {
 }
 
 /// Result of a similarity search
+#[derive(Clone)]
 struct SimilarResolution {
     resolution: MergeResolution,
     similarity: f32,
@@ -1632,7 +1651,7 @@ fn build_resolve_prompt(
     prompt
 }
 
-fn resolve(command: &str) -> Result<()> {
+fn resolve(command: &str, max_tokens: usize) -> Result<()> {
     // Get merge context (what branch/tag is being merged)
     let merge_ctx = get_merge_context();
     if let Some(ref source) = merge_ctx.merge_source {
@@ -1649,17 +1668,45 @@ fn resolve(command: &str) -> Result<()> {
 
     // Try to find similar historical resolutions (gracefully handles missing database)
     println!("Looking for similar historical conflicts...");
-    let similar = try_find_similar_resolutions(3, &conflicts);
+    let all_similar = try_find_similar_resolutions(3, &conflicts);
 
-    if similar.is_empty() {
+    if all_similar.is_empty() {
         println!("No historical resolution database found (run 'llminus learn' and 'llminus vectorize' to build one)");
         println!("Proceeding without historical examples...");
     } else {
-        println!("Found {} similar historical resolutions", similar.len());
+        println!("Found {} similar historical resolutions", all_similar.len());
+    }
+
+    // Build the prompt with adaptive RAG example reduction
+    let mut similar = all_similar.clone();
+    let mut prompt = build_resolve_prompt(&conflicts, &similar, &merge_ctx, None);
+    let mut tokens = estimate_tokens(&prompt);
+
+    // Reduce RAG examples until we're under the token limit
+    while tokens > max_tokens && !similar.is_empty() {
+        let original_count = all_similar.len();
+        similar.pop(); // Remove the least similar (last) example
+        prompt = build_resolve_prompt(&conflicts, &similar, &merge_ctx, None);
+        tokens = estimate_tokens(&prompt);
+
+        if similar.len() < original_count {
+            println!(
+                "Reduced RAG examples from {} to {} to fit token limit (~{} tokens, limit: {})",
+                original_count,
+                similar.len(),
+                tokens,
+                max_tokens
+            );
+        }
+    }
+
+    if tokens > max_tokens {
+        println!(
+            "Warning: Prompt still exceeds token limit (~{} tokens, limit: {}) even without RAG examples",
+            tokens, max_tokens
+        );
     }
 
-    // Build the prompt and invoke LLM
-    let prompt = build_resolve_prompt(&conflicts, &similar, &merge_ctx, None);
     invoke_llm(command, &prompt)
 }
 
@@ -1668,7 +1715,8 @@ fn invoke_llm(command: &str, prompt: &str) -> Result<()> {
     use std::io::Write;
     use std::process::Stdio;
 
-    println!("Invoking: {} (prompt: {} bytes)", command, prompt.len());
+    let tokens = estimate_tokens(prompt);
+    println!("Invoking: {} (prompt: {} bytes, ~{} tokens)", command, prompt.len(), tokens);
     println!("{}", "=".repeat(80));
 
     // Parse command (handle arguments)
@@ -1708,7 +1756,7 @@ fn invoke_llm(command: &str, prompt: &str) -> Result<()> {
 }
 
 /// Pull a kernel pull request from lore.kernel.org
-fn pull(message_id: &str, command: &str) -> Result<()> {
+fn pull(message_id: &str, command: &str, max_tokens: usize) -> Result<()> {
     check_repo()?;
 
     // Step 1: Fetch and parse the pull request email
@@ -1744,16 +1792,44 @@ fn pull(message_id: &str, command: &str) -> Result<()> {
 
     // Try to find similar historical resolutions
     println!("Looking for similar historical conflicts...");
-    let similar = try_find_similar_resolutions(3, &conflicts);
+    let all_similar = try_find_similar_resolutions(3, &conflicts);
 
-    if similar.is_empty() {
+    if all_similar.is_empty() {
         println!("No historical resolution database found (this is optional)");
     } else {
-        println!("Found {} similar historical resolutions", similar.len());
+        println!("Found {} similar historical resolutions", all_similar.len());
+    }
+
+    // Build the prompt with adaptive RAG example reduction
+    let mut similar = all_similar.clone();
+    let mut prompt = build_resolve_prompt(&conflicts, &similar, &merge_ctx, Some(&pull_req));
+    let mut tokens = estimate_tokens(&prompt);
+
+    // Reduce RAG examples until we're under the token limit
+    while tokens > max_tokens && !similar.is_empty() {
+        let original_count = all_similar.len();
+        similar.pop(); // Remove the least similar (last) example
+        prompt = build_resolve_prompt(&conflicts, &similar, &merge_ctx, Some(&pull_req));
+        tokens = estimate_tokens(&prompt);
+
+        if similar.len() < original_count {
+            println!(
+                "Reduced RAG examples from {} to {} to fit token limit (~{} tokens, limit: {})",
+                original_count,
+                similar.len(),
+                tokens,
+                max_tokens
+            );
+        }
+    }
+
+    if tokens > max_tokens {
+        println!(
+            "Warning: Prompt still exceeds token limit (~{} tokens, limit: {}) even without RAG examples",
+            tokens, max_tokens
+        );
     }
 
-    // Build the prompt with pull request context and invoke LLM
-    let prompt = build_resolve_prompt(&conflicts, &similar, &merge_ctx, Some(&pull_req));
     println!("\n=== Invoking LLM for Conflict Resolution ===");
     invoke_llm(command, &prompt)?;
 
@@ -1815,8 +1891,8 @@ fn main() -> Result<()> {
         Commands::Learn { range } => learn(range.as_deref()),
         Commands::Vectorize { batch_size } => vectorize(batch_size),
         Commands::Find { n } => find(n),
-        Commands::Resolve { command } => resolve(&command),
-        Commands::Pull { message_id, command } => pull(&message_id, &command),
+        Commands::Resolve { command, max_tokens } => resolve(&command, max_tokens),
+        Commands::Pull { message_id, command, max_tokens } => pull(&message_id, &command, max_tokens),
     }
 }
 
@@ -1890,7 +1966,7 @@ fn test_find_command_with_n() {
     fn test_resolve_command_parses() {
         let cli = Cli::try_parse_from(["llminus", "resolve", "my-llm"]).unwrap();
         match cli.command {
-            Commands::Resolve { command } => assert_eq!(command, "my-llm"),
+            Commands::Resolve { command, .. } => assert_eq!(command, "my-llm"),
             _ => panic!("Expected Resolve command"),
         }
     }
@@ -1899,7 +1975,7 @@ fn test_resolve_command_parses() {
     fn test_resolve_command_with_args() {
         let cli = Cli::try_parse_from(["llminus", "resolve", "my-llm --model fancy"]).unwrap();
         match cli.command {
-            Commands::Resolve { command } => assert_eq!(command, "my-llm --model fancy"),
+            Commands::Resolve { command, .. } => assert_eq!(command, "my-llm --model fancy"),
             _ => panic!("Expected Resolve command"),
         }
     }
@@ -2210,7 +2286,7 @@ fn test_parse_multiple_conflicts() {
     fn test_pull_command_parses() {
         let cli = Cli::try_parse_from(["llminus", "pull", "test@kernel.org"]).unwrap();
         match cli.command {
-            Commands::Pull { message_id, command } => {
+            Commands::Pull { message_id, command, .. } => {
                 assert_eq!(message_id, "test@kernel.org");
                 assert_eq!(command, "llm"); // default
             }
@@ -2224,7 +2300,7 @@ fn test_pull_command_with_custom_command() {
             "llminus", "pull", "test@kernel.org", "-c", "my-llm --model fancy"
         ]).unwrap();
         match cli.command {
-            Commands::Pull { message_id, command } => {
+            Commands::Pull { message_id, command, .. } => {
                 assert_eq!(message_id, "test@kernel.org");
                 assert_eq!(command, "my-llm --model fancy");
             }
-- 
2.51.0

next prev parent reply	other threads:[~2026-01-11 21:29 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-19 18:16 [RFC 0/5] LLMinus: LLM-Assisted Merge Conflict Resolution Sasha Levin
2025-12-19 18:16 ` [RFC 1/5] LLMinus: Add skeleton project with learn command Sasha Levin
2025-12-19 18:16 ` [RFC 2/5] LLMinus: Add vectorize command with fastembed Sasha Levin
2025-12-19 18:16 ` [RFC 3/5] LLMinus: Add find command for similarity search Sasha Levin
2025-12-19 18:16 ` [RFC 4/5] LLMinus: Add resolve command for LLM-assisted conflict resolution Sasha Levin
2025-12-19 18:16 ` [RFC 5/5] LLMinus: Add pull command for LLM-assisted kernel pull request merging Sasha Levin
2025-12-21 16:10 ` [RFC 0/5] LLMinus: LLM-Assisted Merge Conflict Resolution Sasha Levin
2025-12-22 14:50   ` Mark Brown
2025-12-23 12:36     ` Sasha Levin
2025-12-23 17:47       ` Mark Brown
2026-01-05 18:00         ` Sasha Levin
2026-01-05 18:30           ` Mark Brown
2026-01-11 21:29 ` [RFC v2 0/7] " Sasha Levin
2026-01-11 21:29   ` [RFC v2 1/7] LLMinus: Add skeleton project with learn command Sasha Levin
2026-01-11 21:29   ` [RFC v2 2/7] LLMinus: Add vectorize command with fastembed Sasha Levin
2026-01-11 21:29   ` [RFC v2 3/7] LLMinus: Add find command for similarity search Sasha Levin
2026-01-11 21:29   ` [RFC v2 4/7] LLMinus: Add resolve command for LLM-assisted conflict resolution Sasha Levin
2026-01-11 21:29   ` [RFC v2 5/7] LLMinus: Add pull command for LLM-assisted kernel pull request merging Sasha Levin
2026-01-11 21:29   ` Sasha Levin [this message]
2026-01-11 21:29   ` [RFC v2 7/7] LLMinus: Add build test integration for semantic conflicts Sasha Levin

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:ff668244688 dfblob:5c469e23f09 )
 OR (
bs:"[RFC v2 6/7] LLMinus: Add prompt token limit enforcement" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260111212915.195056-7-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=broonie@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tools@kernel.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.