Skip to content

Commit 975e66e

Browse files
authored
Merge pull request #185 from codelion/feat-spl-plugin
Feat spl plugin
2 parents 6096253 + 4b69ae9 commit 975e66e

18 files changed

+3264
-13
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,7 @@ Check this log file for connection issues, tool execution errors, and other diag
365365

366366
| Plugin | Slug | Description |
367367
| ----------------------- | ------------------ | ---------------------------------------------------------------------------------------------- |
368+
| System Prompt Learning | `spl` | Implements what [Andrej Karpathy called the third paradigm](https://x.com/karpathy/status/1921368644069765486) for LLM learning, this enables the model to acquire program solving knowledge and strategies |
368369
| Long-Context Cerebras Planning and Optimization | `longcepo` | Combines planning and divide-and-conquer processing of long documents to enable infinite context |
369370
| MCP Client | `mcp` | Implements the model context protocol (MCP) client, enabling you to use any LLM with any MCP Server |
370371
| Router | `router` | Uses the [optillm-modernbert-large](https://huggingface.co/codelion/optillm-modernbert-large) model to route requests to different approaches based on the user prompt |

optillm.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -594,12 +594,18 @@ def proxy():
594594
# Extract response_format if present
595595
response_format = data.get("response_format", None)
596596

597-
# Create request config with all parameters
598-
request_config = {
597+
# Explicit keys that we are already handling
598+
explicit_keys = {'stream', 'messages', 'model', 'n', 'response_format'}
599+
600+
# Copy the rest into request_config
601+
request_config = {k: v for k, v in data.items() if k not in explicit_keys}
602+
603+
# Add the explicitly handled ones
604+
request_config.update({
599605
"stream": stream,
600606
"n": n,
601-
"response_format": response_format # Add response_format to config
602-
}
607+
"response_format": response_format, # Add response_format to config
608+
})
603609

604610
optillm_approach = data.get('optillm_approach', server_config['approach'])
605611
logger.debug(data)

optillm/plugins/spl.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
"""
2+
System Prompt Learning (SPL) Plugin for OptiLLM
3+
4+
This plugin implements Andrej Karpathy's proposed system prompt learning paradigm,
5+
allowing LLMs to improve their problem-solving capabilities by:
6+
1. Identifying problem types
7+
2. Generating and refining strategies for solving different problems
8+
3. Building a knowledge base of problem-solving techniques
9+
4. Applying these techniques to new instances of similar problems
10+
5. Tracking the success of different strategies to prioritize effective ones
11+
12+
The plugin maintains a database of strategies that evolves over time, making the
13+
LLM incrementally better at solving problems by learning from its experiences.
14+
"""
15+
16+
from typing import Tuple
17+
from optillm.plugins.spl.main import run_spl
18+
19+
# Plugin identifier
20+
SLUG = "spl"
21+
22+
def run(system_prompt: str, initial_query: str, client, model: str, request_config: dict = None) -> Tuple[str, int]:
23+
"""
24+
Plugin entry point for System Prompt Learning.
25+
26+
Args:
27+
system_prompt: The system prompt
28+
initial_query: The user's query
29+
client: The LLM client
30+
model: The model identifier
31+
request_config: Optional request configuration
32+
Can include {'spl_learning': True} to enable learning mode
33+
34+
Returns:
35+
Tuple[str, int]: The LLM response and token count
36+
"""
37+
return run_spl(system_prompt, initial_query, client, model, request_config)

optillm/plugins/spl/README.md

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
# System Prompt Learning (SPL) Plugin for OptiLLM
2+
3+
This plugin implements Andrej Karpathy's [proposed](https://x.com/karpathy/status/1921368644069765486) "third paradigm" for LLM learning, enabling large language models to learn and improve their problem-solving strategies over time through experience and reflection.
4+
5+
## Introduction: The Evolution of LLM Learning
6+
7+
Large Language Models (LLMs) have traditionally learned in two primary ways:
8+
1. **Pretraining**: Learning facts, patterns, and language from massive text corpora
9+
2. **Finetuning**: Learning behaviors through supervised or reinforcement learning
10+
11+
System Prompt Learning introduces a third paradigm:
12+
3. **Strategy Learning**: The model learns explicit problem-solving strategies through experience, maintains them in a growing knowledge base, and applies them selectively based on problem types
13+
14+
This approach addresses a fundamental limitation of current LLMs—their inability to learn cumulatively from experience. While LLMs can solve individual problems impressively, they typically approach each new problem from scratch rather than building on past successes.
15+
16+
## The SPL Paradigm
17+
18+
System Prompt Learning represents a significant shift in how LLMs approach problem-solving:
19+
20+
- **Experience-Driven Learning**: Rather than relying solely on pretraining or supervised finetuning, SPL enables models to learn from their own problem-solving experiences
21+
- **Strategy Formalization**: The system explicitly generates, evaluates, and refines problem-solving strategies
22+
- **Performance Tracking**: SPL tracks which strategies work well for different problem types, creating a dynamic feedback loop
23+
- **Selective Application**: When faced with a new problem, the system selects the most relevant strategies based on similarity and past performance
24+
25+
This approach mirrors how human experts develop expertise—by accumulating strategies through experience and applying them selectively to new situations.
26+
27+
## Experimental Results
28+
29+
We conducted extensive experiments using the SPL plugin with gemini-2.0-flash-lite on various benchmarks. The learning phase used the OptILLMBench training split (400 instances), while evaluation was performed on the test split (100 instances) and additional popular mathematical benchmarks.
30+
31+
The results demonstrate consistent improvements across all benchmarks:
32+
33+
| Benchmark | Baseline | With SPL | Improvement |
34+
|-----------|----------|----------|-------------|
35+
| OptILLMBench | 61% | 65% | +4% |
36+
| MATH-500 | 85% | 85.6% | +0.6% |
37+
| Arena Auto Hard | 29% | 37.6% | +8.6% |
38+
| AIME24 | 23.33% | 30% | +6.67% |
39+
40+
These results are particularly notable for the challenging Arena Auto Hard and AIME24 benchmarks, where traditional approaches often struggle. The improvements suggest that SPL is especially effective for complex problem-solving tasks that benefit from strategic approaches.
41+
42+
![Performance Improvements with System Prompt Learning](performance-comparison.svg)
43+
44+
*Figure 1: Performance comparison between baseline gemini-2.0-flash-lite and the same model with SPL across multiple mathematical benchmarks.*
45+
46+
## Usage
47+
48+
### Basic Usage
49+
50+
Use the plugin by prefixing your model name with `spl-`:
51+
52+
```
53+
spl-gpt-4o
54+
```
55+
56+
### Combining with Other Plugins
57+
58+
SPL can be combined with other plugins using the `&` operator:
59+
60+
```
61+
spl&memory-gpt-4o
62+
```
63+
64+
### Learning Mode
65+
66+
By default, the plugin runs in inference-only mode, which uses existing strategies without creating or modifying them. To enable learning mode, which allows the plugin to create and refine strategies based on usage, add the `spl_learning` parameter to the request config:
67+
68+
```python
69+
client.chat.completions.create(
70+
model="spl-gpt-4o",
71+
messages=[
72+
{"role": "system", "content": system_prompt},
73+
{"role": "user", "content": query}
74+
],
75+
extra_body= {"spl_learning": True},
76+
)
77+
```
78+
79+
## How It Works
80+
81+
1. **Problem Classification**: The plugin analyzes each query to determine its problem type
82+
2. **Strategy Selection**: It selects relevant strategies from its database based on the problem type and content
83+
3. **System Prompt Augmentation**: Selected strategies (up to MAX_STRATEGIES_FOR_INFERENCE) are added to the system prompt
84+
85+
When learning mode is enabled, the plugin also performs:
86+
87+
4. **Effectiveness Evaluation**: After generating a response, the system evaluates how well each strategy worked
88+
5. **Strategy Creation & Refinement**: The system creates new strategies for unseen problem types and periodically refines existing strategies based on usage
89+
90+
The plugin maintains two separate limits:
91+
- **Storage Limit** (MAX_STRATEGIES_PER_TYPE): Controls how many strategies can be stored in the database per problem type
92+
- **Inference Limit** (MAX_STRATEGIES_FOR_INFERENCE): Controls how many strategies are used during inference for system prompt augmentation
93+
94+
![SPL Learning Workflow](learning-workflow.svg)
95+
96+
*Figure 2: The SPL learning and inference workflow showing how strategies are learned, refined, and applied.*
97+
98+
## Learning Metrics
99+
100+
After training on the OptILLMBench dataset, the system developed a rich knowledge base of strategies:
101+
102+
- **Total queries processed**: 500
103+
- **Strategies created**: 129
104+
- **Strategies refined**: 97
105+
- **Successful resolutions**: 346
106+
- **Strategies merged**: 28
107+
108+
These metrics indicate a healthy learning process with a balance between creation, refinement, and merging of similar strategies.
109+
110+
## Data Storage
111+
112+
Strategies are stored in JSON format in the `spl_data` directory:
113+
- `strategies.json`: Contains all learned strategies
114+
- `metrics.json`: Contains performance metrics and usage statistics
115+
116+
## Configuration
117+
118+
The SPL plugin maintains these core files:
119+
- **Strategy Database**: `/optillm/plugins/spl/data/strategies.json`
120+
- **Metrics**: `/optillm/plugins/spl/data/metrics.json`
121+
122+
You can:
123+
1. Backup these files to preserve learned strategies
124+
2. Edit the strategies.json file to manually add or modify strategies
125+
3. Reset the learning by deleting these files (they will be recreated)
126+
127+
## Example Strategy
128+
129+
Below is an example of a strategy learned by the system for word problems:
130+
131+
```json
132+
{
133+
"strategy_id": "strategy_3",
134+
"problem_type": "word_problem",
135+
"strategy_text": "**Refined Strategy for Solving Word Problems:**\n\n1. **Understand:**\n * Read the problem carefully (multiple times).\n * Identify the question (what are you trying to find?).\n * List all given information (facts, numbers, units).\n * Clarify ambiguous terms/units.\n\n2. **Organize Information & Identify Unknowns:**\n * Choose an organization method: (e.g., table, diagram, list, drawing).\n * Clearly identify the unknowns (what you need to solve for).\n\n3. **Plan and Translate:**\n * Define *all* variables with units (e.g., `p = number of pennies`, `c = number of compartments`).\n * Identify relationships between knowns and unknowns.\n * Convert units if necessary.\n * Write equations or expressions, including units, that relate the knowns and unknowns.\n * Ensure units are consistent throughout the equations.\n * Outline the solution steps.\n\n4. **Solve:**\n * Show work step-by-step.\n * Track units throughout calculations.\n * Calculate accurately.\n * Solve for the unknowns.\n\n5. **Evaluate and Verify:**\n * Check if the answer is reasonable.\n * Verify the answer.\n\n6. **Summarize:**\n * State the answer with units.",
136+
"success_count": 85,
137+
"total_attempts": 192,
138+
"confidence": 0.425
139+
}
140+
```
141+
142+
This strategy was developed through multiple refinement cycles and has a success rate of 44.3% (85/192). The system continuously updates these metrics as the strategy is applied to new problems.
143+
144+
## Motivations and Broader Impact
145+
146+
### The System Prompt Gap
147+
148+
Most LLM providers like Anthropic (Claude) and OpenAI (GPT) employ elaborate system prompts that encode sophisticated problem-solving strategies. However, the majority of users interact with these models using very basic or empty system prompts, missing out on the benefits of strategic guidance.
149+
150+
SPL bridges this gap by automatically learning and applying effective strategies, democratizing access to the benefits of well-crafted system prompts without requiring expertise in prompt engineering.
151+
152+
### Learning from Experience
153+
154+
Current LLMs are often described as "one-shot learners"—they can solve individual problems but don't accumulate knowledge from these experiences. SPL represents a step toward models that improve through use, similar to how humans develop expertise through practice and reflection.
155+
156+
### Human-Readable Learning
157+
158+
Unlike black-box learning approaches, SPL produces human-readable strategies that can be inspected, understood, and even manually edited. This transparency allows for:
159+
- Understanding how the model approaches different problems
160+
- Identifying potential biases or flaws in reasoning
161+
- Transferring strategies between models or domains
162+
163+
## Benefits
164+
165+
1. **Cumulative Learning**: The LLM improves on specific problem types over time
166+
2. **Explicit Knowledge**: Strategies are human-readable and provide insight into the LLM's reasoning
167+
3. **Efficiency**: Reuses successful approaches rather than solving each problem from scratch
168+
4. **Adaptability**: Different strategies for different problem types
169+
5. **Transparency**: Learning process and outcomes can be inspected and understood
170+
171+
## Conclusion and Future Work
172+
173+
System Prompt Learning represents a promising new direction for enabling LLMs to learn from experience in a transparent and interpretable way. Our experiments demonstrate significant performance improvements across multiple benchmarks, particularly for complex problem-solving tasks.
174+
175+
Future work will focus on:
176+
1. Expanding the range of problem types the system can recognize
177+
2. Improving the strategy refinement process
178+
3. Enabling cross-domain strategy transfer
179+
4. Developing mechanisms for human feedback on strategies
180+
5. Exploring hybrid approaches that combine SPL with other learning paradigms

optillm/plugins/spl/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
"""
2+
System Prompt Learning (SPL) plugin module initialization.
3+
"""

optillm/plugins/spl/config.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
"""
2+
Configuration settings for the System Prompt Learning (SPL) plugin.
3+
"""
4+
5+
import os
6+
from typing import List
7+
8+
# Plugin identifier
9+
SLUG = "spl"
10+
11+
# Base directory for storing strategy data
12+
PLUGIN_DIR = os.path.dirname(os.path.abspath(__file__))
13+
DATA_DIR = os.path.join(PLUGIN_DIR, 'data')
14+
STRATEGY_DB_PATH = os.path.join(DATA_DIR, 'strategies.json')
15+
STRATEGY_METRICS_PATH = os.path.join(DATA_DIR, 'metrics.json')
16+
17+
# Default max tokens for reasoning LLMs
18+
DEFAULT_MAX_TOKENS = 4096
19+
20+
# How often to perform maintenance operations (merge, prune)
21+
MAINTENANCE_INTERVAL = 40
22+
23+
# Strategy selection thresholds
24+
STRATEGY_CREATION_THRESHOLD = 0.7 # Higher threshold to avoid creating similar strategies
25+
STRATEGY_MERGING_THRESHOLD = 0.6 # Lower threshold to merge more similar strategies
26+
MIN_SUCCESS_RATE_FOR_INFERENCE = 0.4 # Minimum success rate for a strategy to be used during inference
27+
28+
# Limits for strategy management
29+
MAX_STRATEGIES_PER_TYPE = 10 # Maximum strategies to store in DB per problem type
30+
MAX_STRATEGIES_FOR_INFERENCE = 3 # Maximum strategies to use during inference
31+
32+
# Define valid problem types (used for strict classification)
33+
VALID_PROBLEM_TYPES: List[str] = [
34+
"arithmetic_calculation",
35+
"algebraic_equation",
36+
"statistical_analysis",
37+
"logical_reasoning",
38+
"word_problem",
39+
"coding_problem",
40+
"algorithm_design",
41+
"creative_writing",
42+
"text_summarization",
43+
"information_retrieval",
44+
"planning_task",
45+
"decision_making",
46+
"knowledge_question",
47+
"language_translation",
48+
"sequence_completion",
49+
"general_problem" # Fallback type
50+
]
51+
52+
# Ensure data directory exists
53+
os.makedirs(DATA_DIR, exist_ok=True)

optillm/plugins/spl/data/metrics.json

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"total_queries": 500,
3+
"strategy_applications": 1297,
4+
"strategies_created": 129,
5+
"strategies_refined": 97,
6+
"successful_resolutions": 346,
7+
"last_strategy_id": 129,
8+
"reasoning_examples_collected": 0,
9+
"strategies_merged": 28
10+
}

0 commit comments

Comments
 (0)