codelion
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md
Lines changed: 3 additions & 0 deletions b/‎README.md
Lines changed: 3 additions & 0 deletions
diff --git a/‎optillm.py
Lines changed: 43 additions & 6 deletions b/‎optillm.py
Lines changed: 43 additions & 6 deletions
diff --git a/‎optillm/plugins/deep_research/README.md
Lines changed: 263 additions & 0 deletions b/‎optillm/plugins/deep_research/README.md
Lines changed: 263 additions & 0 deletions
diff --git a/‎optillm/plugins/deep_research/__init__.py
Lines changed: 12 additions & 0 deletions b/‎optillm/plugins/deep_research/__init__.py
Lines changed: 12 additions & 0 deletions
@@ -171,3 +171,4 @@ cython_debug/
 scripts/results/
 results/
 test_results.json
+deep_research_reports/
@@ -378,6 +378,8 @@ Check this log file for connection issues, tool execution errors, and other diag
 | Execute Code            | `executecode`      | Enables use of code interpreter to execute python code in requests and LLM generated responses |
 | JSON                    | `json`             | Enables structured outputs using the outlines library, supports pydantic types and JSON schema |
 | GenSelect               | `genselect`        | Generative Solution Selection - generates multiple candidates and selects the best based on quality criteria |
+| Web Search              | `web_search`       | Performs Google searches using Chrome automation (Selenium) to gather search results and URLs |
+| [Deep Research](optillm/plugins/deep_research)           | `deep_research`    | Implements Test-Time Diffusion Deep Researcher (TTD-DR) for comprehensive research reports using iterative refinement |
 
 ## Available parameters
 
@@ -629,6 +631,7 @@ See `tests/README.md` for more details on the test structure and how to write ne
 - [Patched MOA: optimizing inference for diverse software development tasks](https://arxiv.org/abs/2407.18521) - [Implementation](optillm/moa.py)
 - [Patched RTC: evaluating LLMs for diverse software development tasks](https://arxiv.org/abs/2407.16557) - [Implementation](ptillm/rto.py)
 - [AIMO-2 Winning Solution: Building State-of-the-Art Mathematical Reasoning Models with OpenMathReasoning dataset](https://arxiv.org/abs/2504.16891) - [Implementation](optillm/plugins/genselect_plugin.py)
+- [Test-Time Diffusion Deep Researcher (TTD-DR): Think More, Research More, Answer Better!](https://arxiv.org/abs/2507.16075v1) - [Implementation](optillm/plugins/deep_research)
 
 ## Citation
 
 
@@ -773,7 +773,11 @@ def parse_args():
         if extra and extra[0]:  # Check if there are choices for this argument
             parser.add_argument(arg, type=type_, default=default, help=help_text, choices=extra[0])
         else:
-            parser.add_argument(arg, type=type_, default=default, help=help_text)
+            if type_ == bool:
+                # For boolean flags, use store_true action
+                parser.add_argument(arg, action='store_true', default=default, help=help_text)
+            else:
+                parser.add_argument(arg, type=type_, default=default, help=help_text)
 
     # Special handling for best_of_n to support both formats
     best_of_n_default = int(os.environ.get("OPTILLM_BEST_OF_N", 3))
@@ -855,12 +859,45 @@ def main():
             base_url = f"http://localhost:{port}/v1"
             logger.info(f"Launching Gradio interface connected to {base_url}")
 
-            # Launch Gradio interface
-            demo = gr.load_chat(
-                base_url,
-                model=server_config['model'],
-                token=None
+            # Create custom chat function with extended timeout
+            def chat_with_optillm(message, history):
+                import httpx
+                from openai import OpenAI
+                
+                # Create client with extended timeout and no retries
+                custom_client = OpenAI(
+                    api_key="optillm",
+                    base_url=base_url,
+                    timeout=httpx.Timeout(1800.0, connect=5.0),  # 30 min timeout
+                    max_retries=0  # No retries - prevents duplicate requests
+                )
+                
+                # Convert history to messages format
+                messages = []
+                for h in history:
+                    if h[0]:  # User message
+                        messages.append({"role": "user", "content": h[0]})
+                    if h[1]:  # Assistant message
+                        messages.append({"role": "assistant", "content": h[1]})
+                messages.append({"role": "user", "content": message})
+                
+                # Make request
+                try:
+                    response = custom_client.chat.completions.create(
+                        model=server_config['model'],
+                        messages=messages
+                    )
+                    return response.choices[0].message.content
+                except Exception as e:
+                    return f"Error: {str(e)}"
+            
+            # Create Gradio interface with queue for long operations
+            demo = gr.ChatInterface(
+                chat_with_optillm,
+                title="OptILLM Chat Interface",
+                description=f"Connected to OptILLM proxy at {base_url}"
             )
+            demo.queue()  # Enable queue to handle long operations properly
             demo.launch(server_name="0.0.0.0", share=False)
         except ImportError:
             logger.error("Gradio is required for GUI. Install it with: pip install gradio")
 
@@ -0,0 +1,263 @@
+# Deep Research Plugin
+
+## Overview
+
+The Deep Research plugin implements the **Test-Time Diffusion Deep Researcher (TTD-DR)** algorithm, a state-of-the-art approach for comprehensive research report generation. This implementation is based on the paper ["Deep Researcher with Test-Time Diffusion"](https://arxiv.org/abs/2507.16075v1) and provides iterative, in-depth research capabilities for complex queries.
+
+## Algorithm Overview
+
+The TTD-DR algorithm treats research as a **diffusion process** with iterative refinement through denoising and retrieval. Unlike traditional search approaches that return raw results, TTD-DR performs:
+
+1. **Query Decomposition** - Breaks complex queries into focused sub-questions
+2. **Iterative Search** - Performs multiple rounds of web search based on identified gaps
+3. **Content Synthesis** - Uses advanced memory processing for unbounded context
+4. **Completeness Evaluation** - Automatically assesses research quality and identifies missing aspects
+5. **Report Generation** - Produces structured, academic-quality reports with proper citations
+
+## Architecture
+
+```
+deep_research/
+├── __init__.py              # Package initialization
+├── research_engine.py       # Core TTD-DR implementation
+└── README.md               # This documentation
+
+../deep_research_plugin.py  # OptILLM plugin interface
+```
+
+### Key Components
+
+#### 1. `DeepResearcher` Class (`research_engine.py`)
+
+The core implementation of the TTD-DR algorithm with the following key methods:
+
+- **`decompose_query()`** - Implements query planning phase
+- **`perform_web_search()`** - Orchestrates web search using individual queries to avoid truncation
+- **`extract_and_fetch_urls()`** - Extracts sources and fetches content
+- **`synthesize_with_memory()`** - Processes unbounded context with citations
+- **`evaluate_completeness()`** - Assesses research gaps
+- **`generate_structured_report()`** - Creates academic-quality reports
+- **`research()`** - Main research loop implementing TTD-DR
+
+#### 2. Plugin Interface (`deep_research_plugin.py`)
+
+Minimal interface that integrates with OptILLM's plugin system:
+
+```python
+def run(system_prompt: str, initial_query: str, client, model: str, request_config: Optional[Dict] = None) -> Tuple[str, int]
+```
+
+## Implementation Details
+
+### Research Process Flow
+
+```mermaid
+graph TD
+    A[Initial Query] --> B[Query Decomposition]
+    B --> C[Web Search]
+    C --> D[Content Extraction]
+    D --> E[Memory Synthesis]
+    E --> F[Completeness Evaluation]
+    F --> G{Complete?}
+    G -->|No| H[Generate Focused Queries]
+    H --> C
+    G -->|Yes| I[Generate Structured Report]
+    I --> J[Final Report with Citations]
+```
+
+### Citation System
+
+The plugin implements a sophisticated citation tracking system:
+
+- **Inline Citations**: `[1]`, `[2]`, `[3]` format throughout the text
+- **Source Tracking**: Maps citation numbers to source metadata
+- **Deduplication**: Avoids duplicate citations for the same URL
+- **Academic Format**: Proper reference formatting with URLs and access dates
+
+### Report Structure
+
+Generated reports follow academic standards:
+
+1. **Executive Summary** - Key findings overview
+2. **Introduction** - Research question and significance
+3. **Background** - Context and foundational information
+4. **Key Findings** - Main discoveries with citations
+5. **Analysis and Discussion** - Interpretation and implications
+6. **Conclusion** - Summary and final thoughts
+7. **Recommendations** - Actionable suggestions (when applicable)
+8. **Limitations and Future Research** - Research constraints and future directions
+9. **References** - Complete source list with metadata
+
+## Configuration
+
+The plugin accepts the following configuration parameters:
+
+```python
+request_config = {
+    "max_iterations": 5,    # Maximum research iterations (default: 5)
+    "max_sources": 10       # Maximum sources per search (default: 10)
+}
+```
+
+## Dependencies
+
+The Deep Research plugin requires these OptILLM plugins:
+
+- **`web_search`** - Chrome-based Google search automation
+- **`readurls`** - Content extraction from URLs
+- **`memory`** - Unbounded context processing and synthesis
+
+## Usage Examples
+
+### Basic Usage
+
+```python
+from optillm.plugins.deep_research_plugin import run
+
+result, tokens = run(
+    system_prompt="You are a research assistant",
+    initial_query="What are the latest advances in quantum error correction?",
+    client=openai_client,
+    model="gpt-4o-mini"
+)
+```
+
+### Advanced Configuration
+
+```python
+result, tokens = run(
+    system_prompt="You are a research assistant",
+    initial_query="Analyze the impact of AI on healthcare diagnostics",
+    client=openai_client,
+    model="gpt-4o-mini",
+    request_config={
+        "max_iterations": 3,
+        "max_sources": 8
+    }
+)
+```
+
+### With OptILLM Server
+
+```python
+from openai import OpenAI
+
+client = OpenAI(base_url="http://localhost:8000/v1", api_key="optillm")
+
+response = client.chat.completions.create(
+    model="deep_research-gpt-4o-mini",
+    messages=[
+        {"role": "user", "content": "Research the latest developments in renewable energy storage"}
+    ],
+    extra_body={
+        "request_config": {
+            "max_iterations": 3,
+            "max_sources": 10
+        }
+    }
+)
+```
+
+## Performance Characteristics
+
+- **Time Complexity**: O(iterations × sources × content_size)
+- **Typical Duration**: 2-5 minutes per research query
+- **Token Usage**: 1,000-5,000 tokens per iteration
+- **Memory Requirements**: Scales with content volume and context size
+
+## Reasoning Model Compatibility
+
+The plugin is fully compatible with reasoning models that include internal thinking processes:
+
+- **Automatic Cleanup**: Removes `<think>`, `<thinking>`, `<reasoning>`, `<reflection>` tags from all responses
+- **Professional Output**: Ensures final reports contain only clean, formatted content
+- **Seamless Integration**: Works transparently with any model type
+- **Supported Tags**: `<think>`, `<thinking>`, `<reasoning>`, `<thought>`, `<reflect>`, `<reflection>`
+
+Example cleanup:
+```
+Input:  "<think>Let me analyze this</think>\n\n# Research Report\nContent here..."
+Output: "# Research Report\nContent here..."
+```
+
+## Error Handling
+
+The plugin includes comprehensive error handling:
+
+1. **Graceful Degradation** - Falls back to basic LLM response on critical failures
+2. **Timeout Management** - Handles web search and content fetching timeouts
+3. **Rate Limiting** - Includes delays to avoid search engine restrictions
+4. **Validation** - Input validation and configuration checks
+
+## Quality Assurance
+
+The implementation follows the TTD-DR paper's quality criteria:
+
+- **Comprehensive Coverage** - Addresses all aspects of the research query
+- **Source Diversity** - Incorporates multiple credible sources
+- **Citation Accuracy** - Proper attribution for all claims and findings
+- **Academic Rigor** - Maintains objectivity and scholarly tone
+- **Iterative Refinement** - Continuously improves research quality
+- **Clean Output** - Automatically removes reasoning tags (`<think>`, `<thinking>`, etc.) for professional reports
+
+## Comparison to Simple Search
+
+| Feature | Simple Search | Deep Research (TTD-DR) |
+|---------|---------------|------------------------|
+| Query Processing | Single query | Multi-query decomposition |
+| Iteration | Single pass | Multiple refinement cycles |
+| Content Synthesis | Raw results | Comprehensive analysis |
+| Gap Detection | None | Automatic completeness evaluation |
+| Citations | Manual | Automatic with tracking |
+| Report Format | Unstructured | Academic report structure |
+| Context Handling | Limited | Unbounded via memory plugin |
+
+## Future Enhancements
+
+Potential improvements aligned with research directions:
+
+1. **Parallel Processing** - Concurrent search execution
+2. **Domain Specialization** - Field-specific research strategies
+3. **Multimedia Integration** - Image and video content analysis
+4. **Real-time Updates** - Live research monitoring and updates
+5. **Collaborative Research** - Multi-agent research coordination
+
+## Troubleshooting
+
+### Common Issues
+
+1. **Chrome Browser Issues**
+   - Ensure Chrome is installed and accessible
+   - Check for CAPTCHA requirements (plugin supports manual solving)
+
+2. **Rate Limiting**
+   - Plugin includes automatic delays
+   - Consider increasing delay settings for aggressive rate limiting
+
+3. **Memory Constraints**
+   - Large research queries may consume significant memory
+   - Monitor token usage and consider iteration limits
+
+4. **Citation Extraction**
+   - URL parsing depends on search result format
+   - Plugin includes fallback parsing methods
+
+5. **Search Query Processing**
+   - Plugin uses individual searches for each sub-query to prevent truncation
+   - If search results seem incomplete, check that decomposed queries are reasonable
+   - Each sub-query is processed separately to ensure complete coverage
+
+### Debug Mode
+
+Enable debug output by checking the console logs during research execution. The plugin provides detailed logging of each research phase.
+
+## Contributing
+
+When contributing to the Deep Research plugin:
+
+1. Maintain compatibility with the TTD-DR algorithm
+2. Preserve citation tracking functionality
+3. Ensure academic report structure compliance
+4. Test with various query types and complexity levels
+5. Document any new configuration options
+
@@ -0,0 +1,12 @@
+"""
+Deep Research Plugin Package
+
+Implementation of Test-Time Diffusion Deep Researcher (TTD-DR) algorithm
+for comprehensive research report generation.
+"""
+
+from .research_engine import DeepResearcher
+
+__version__ = "1.0.0"
+__author__ = "OptILLM Contributors"
+__description__ = "TTD-DR Implementation for Deep Research"