Merge pull request #205 from codelion/fix-mlx-model-id

codelion · web-flow · commit 50f5f7a1384b · 2025-06-30T14:45:34.000+08:00
Update MLX model patterns and reduce max_tokens in eval script
diff --git a/optillm/__init__.py b/optillm/__init__.py
@@ -2,7 +2,7 @@
 import os
 
 # Version information
-__version__ = "0.1.17"
+__version__ = "0.1.18"
 
 # Get the path to the root optillm.py
 spec = util.spec_from_file_location(
diff --git a/optillm/inference.py b/optillm/inference.py
@@ -189,7 +189,8 @@ def should_use_mlx(model_id: str) -> bool:
     # Models that should use MLX
     mlx_patterns = [
         "mlx-community/",
-        "mlx-"
+        "mlx-",
+        "-mlx-"
     ]
     
     # Known problematic models that should prefer MLX on Apple Silicon
diff --git a/scripts/eval_math500_benchmark.py b/scripts/eval_math500_benchmark.py
@@ -692,7 +692,7 @@ def get_llm_response(problem: str, model: str) -> str:
             messages=[
                 {"role": "user", "content": SYSTEM_PROMPT + "\n" + problem}
             ],
-            max_tokens=32768, # for thinking models, we need to use a lot more tokens
+            max_tokens=8192, # for thinking models, we need to use a lot more tokens
             # extra_body = {
             #     "decoding" : "thinkdeeper",
             # }
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 
 setup(
     name="optillm",
-    version="0.1.17",
+    version="0.1.18",
     packages=find_packages(include=['optillm', 'optillm.*']),  # This ensures all subpackages are included
     py_modules=['optillm'],
     package_data={

Original file line number	Diff line number	Diff line change
`@@ -189,7 +189,8 @@ def should_use_mlx(model_id: str) -> bool:`
`189`	`189`	`# Models that should use MLX`
`190`	`190`	`mlx_patterns = [`
`191`	`191`	`"mlx-community/",`
`192`		`- "mlx-"`
	`192`	`+ "mlx-",`
	`193`	`+ "-mlx-"`
`193`	`194`	`]`
`194`	`195`
`195`	`196`	`# Known problematic models that should prefer MLX on Apple Silicon`