From 61d0b8267fcccecf83ba5b05a4c451250116c877 Mon Sep 17 00:00:00 2001
From: Asankhaya Sharma <codelion@users.noreply.github.com>
Date: Mon, 30 Jun 2025 14:43:28 +0800
Subject: [PATCH 1/2] Update MLX model patterns and reduce max_tokens in eval
 script

Added '-mlx-' to the list of MLX model patterns in should_use_mlx for broader matching. Reduced max_tokens from 32768 to 8192 in get_llm_response within eval_math500_benchmark.py to limit token usage.
---
 optillm/inference.py              | 3 ++-
 scripts/eval_math500_benchmark.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/optillm/inference.py b/optillm/inference.py
index 07640a49..206b357e 100644
--- a/optillm/inference.py
+++ b/optillm/inference.py
@@ -189,7 +189,8 @@ def should_use_mlx(model_id: str) -> bool:
     # Models that should use MLX
     mlx_patterns = [
         "mlx-community/",
-        "mlx-"
+        "mlx-",
+        "-mlx-"
     ]
     
     # Known problematic models that should prefer MLX on Apple Silicon
diff --git a/scripts/eval_math500_benchmark.py b/scripts/eval_math500_benchmark.py
index ad0e9f2d..165eefd5 100644
--- a/scripts/eval_math500_benchmark.py
+++ b/scripts/eval_math500_benchmark.py
@@ -692,7 +692,7 @@ def get_llm_response(problem: str, model: str) -> str:
             messages=[
                 {"role": "user", "content": SYSTEM_PROMPT + "\n" + problem}
             ],
-            max_tokens=32768, # for thinking models, we need to use a lot more tokens
+            max_tokens=8192, # for thinking models, we need to use a lot more tokens
             # extra_body = {
             #     "decoding" : "thinkdeeper",
             # }

From 0a6bc2085894107529744b2af3cd7f86ad8dd949 Mon Sep 17 00:00:00 2001
From: Asankhaya Sharma <codelion@users.noreply.github.com>
Date: Mon, 30 Jun 2025 14:45:18 +0800
Subject: [PATCH 2/2] Bump version to 0.1.18

Update version number in __init__.py and setup.py to 0.1.18 for new release.
---
 optillm/__init__.py | 2 +-
 setup.py            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/optillm/__init__.py b/optillm/__init__.py
index bc23fac9..610fc269 100644
--- a/optillm/__init__.py
+++ b/optillm/__init__.py
@@ -2,7 +2,7 @@
 import os
 
 # Version information
-__version__ = "0.1.17"
+__version__ = "0.1.18"
 
 # Get the path to the root optillm.py
 spec = util.spec_from_file_location(
diff --git a/setup.py b/setup.py
index fb6f59f9..5de8c1dd 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@
 
 setup(
     name="optillm",
-    version="0.1.17",
+    version="0.1.18",
     packages=find_packages(include=['optillm', 'optillm.*']),  # This ensures all subpackages are included
     py_modules=['optillm'],
     package_data={