codelion · codelion · Jul 24, 2025 · Jul 19, 2025 · Jul 19, 2025 · Jul 19, 2025
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -0,0 +1,86 @@
+name: Run Tests
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.12']
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Cache pip packages
+      uses: actions/cache@v3
+      with:
+        path: ~/.cache/pip
+        key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
+        restore-keys: |
+          ${{ runner.os }}-pip-
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+        pip install -r tests/requirements.txt
+
+    - name: Run unit tests
+      run: |
+        # Run quick CI tests
+        python tests/test_ci_quick.py
+
+        # Run plugin tests with pytest if available
+        python -m pytest tests/test_plugins.py -v --tb=short || python tests/test_plugins.py
+
+        # Run approach tests
+        python tests/test_approaches.py
+
+  integration-test:
+    runs-on: ubuntu-latest
+    needs: test
+    if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
+    # Only run integration tests on PRs from the same repository (not forks)
+    # This ensures secrets are available
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.12'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+
+    - name: Run integration test with OpenAI
+      if: env.OPENAI_API_KEY != ''
+      run: |
+        # Start OptILLM server
+        python optillm.py &
+        SERVER_PID=$!
+
+        # Wait for server
+        sleep 5
+
+        # Run simple integration test
+        python tests/test.py --approaches none --single-test "Simple Math Problem" --base-url http://localhost:8000/v1 --model gpt-4o-mini || true
+
+        # Stop server
+        kill $SERVER_PID || true
+      env:
+        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+      continue-on-error: true
diff --git a/.gitignore b/.gitignore
@@ -170,3 +170,4 @@ cython_debug/
 
 scripts/results/
 results/
+test_results.json
diff --git a/README.md b/README.md
@@ -377,6 +377,7 @@ Check this log file for connection issues, tool execution errors, and other diag
 | Read URLs               | `readurls`         | Reads all URLs found in the request, fetches the content at the URL and adds it to the context |
 | Execute Code            | `executecode`      | Enables use of code interpreter to execute python code in requests and LLM generated responses |
 | JSON                    | `json`             | Enables structured outputs using the outlines library, supports pydantic types and JSON schema |
+| GenSelect               | `genselect`        | Generative Solution Selection - generates multiple candidates and selects the best based on quality criteria |
 
 ## Available parameters
 
@@ -564,6 +565,46 @@ called patchflows. We saw huge performance gains across all the supported patchf
 
 ![Results showing optillm mixture of agents approach used with patchflows](https://raw.githubusercontent.com/codelion/optillm/main/moa-patchwork-results.png)
 
+## Testing
+
+OptILLM includes a comprehensive test suite to ensure reliability and compatibility.
+
+### Running Tests
+
+The main test suite can be run from the project root:
+```bash
+# Test all approaches with default test cases
+python tests/test.py
+
+# Test specific approaches
+python tests/test.py --approaches moa bon mcts
+
+# Run a single test
+python tests/test.py --single-test "Simple Math Problem"
+```
+
+### Unit and Integration Tests
+
+Additional tests are available in the `tests/` directory:
+```bash
+# Run all tests (requires pytest)
+./tests/run_tests.sh
+
+# Run specific test modules
+pytest tests/test_plugins.py -v
+pytest tests/test_api_compatibility.py -v
+```
+
+### CI/CD
+
+All tests are automatically run on pull requests via GitHub Actions. The workflow tests:
+- Multiple Python versions (3.10, 3.11, 3.12)
+- Unit tests for plugins and core functionality
+- API compatibility tests
+- Integration tests with various approaches
+
+See `tests/README.md` for more details on the test structure and how to write new tests.
+
 ## References
 - [Eliciting Fine-Tuned Transformer Capabilities via Inference-Time Techniques](https://arxiv.org/abs/2506.08060)
 - [AutoThink: efficient inference for reasoning LLMs](https://dx.doi.org/10.2139/ssrn.5253327) - [Implementation](optillm/autothink)
@@ -587,6 +628,7 @@ called patchflows. We saw huge performance gains across all the supported patchf
 - [Unsupervised Evaluation of Code LLMs with Round-Trip Correctness](https://arxiv.org/abs/2402.08699) - [Inspired the implementation of rto](optillm/rto.py)
 - [Patched MOA: optimizing inference for diverse software development tasks](https://arxiv.org/abs/2407.18521) - [Implementation](optillm/moa.py)
 - [Patched RTC: evaluating LLMs for diverse software development tasks](https://arxiv.org/abs/2407.16557) - [Implementation](ptillm/rto.py)
+- [AIMO-2 Winning Solution: Building State-of-the-Art Mathematical Reasoning Models with OpenMathReasoning dataset](https://arxiv.org/abs/2504.16891) - [Implementation](optillm/plugins/genselect_plugin.py)
 
 ## Citation
 

diff --git a/optillm.py b/optillm.py
@@ -302,9 +302,9 @@ def execute_single_approach(approach, system_prompt, initial_query, client, mode
             if hasattr(request, 'json'):
                 data = request.get_json()
                 messages = data.get('messages', [])
-                # Copy all parameters except 'stream', 'model' , 'n' and 'messages'
+                # Copy all parameters except 'stream', 'model' and 'messages'
                 kwargs = {k: v for k, v in data.items() 
-                         if k not in ['model', 'messages', 'stream', 'n', 'optillm_approach']}
+                         if k not in ['model', 'messages', 'stream', 'optillm_approach']}
             response = none_approach(original_messages=messages, client=client, model=model, **kwargs)
             # For none approach, we return the response and a token count of 0
             # since the full token count is already in the response
@@ -641,17 +641,8 @@ def proxy():
         contains_none = any(approach == 'none' for approach in approaches)
 
         if operation == 'SINGLE' and approaches[0] == 'none':
-            # For none approach with n>1, make n separate calls
-            if n > 1:
-                responses = []
-                completion_tokens = 0
-                for _ in range(n):
-                    result, tokens = execute_single_approach(approaches[0], system_prompt, initial_query, client, model, request_config)
-                    responses.append(result)
-                    completion_tokens += tokens
-                result = responses
-            else:
-                result, completion_tokens = execute_single_approach(approaches[0], system_prompt, initial_query, client, model, request_config)
+            # Pass through the request including the n parameter
+            result, completion_tokens = execute_single_approach(approaches[0], system_prompt, initial_query, client, model, request_config)
 
             logger.debug(f'Direct proxy response: {result}')
Original file line number	Diff line number	Diff line change
Expand Up		@@ -170,3 +170,4 @@ cython_debug/

		scripts/results/
		results/
		test_results.json