Skip to content

Commit e004b2e

Browse files
authored
Merge pull request #204 from codelion/fix-coc-execution
Fix coc execution
2 parents 2e4c0da + b43bc4d commit e004b2e

File tree

3 files changed

+132
-49
lines changed

3 files changed

+132
-49
lines changed

optillm/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import os
33

44
# Version information
5-
__version__ = "0.1.16"
5+
__version__ = "0.1.17"
66

77
# Get the path to the root optillm.py
88
spec = util.spec_from_file_location(

optillm/plugins/coc_plugin.py

Lines changed: 130 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,21 @@
1+
"""
2+
Chain of Code (CoC) plugin for OptILLM.
3+
4+
This plugin implements a chain-of-code approach that combines Chain-of-Thought (CoT)
5+
reasoning with code execution and LLM-based code simulation.
6+
7+
SAFETY NOTE: This plugin has been refactored to use Jupyter notebook kernel execution
8+
instead of direct exec() calls. This provides process isolation and prevents potentially
9+
dangerous code from crashing or affecting the main OptILLM process.
10+
11+
Key safety improvements:
12+
- Code runs in isolated notebook kernels (separate processes)
13+
- 30-second timeout prevents infinite loops
14+
- Main process is protected from crashes, system exits, and memory issues
15+
- Matplotlib/visualization code is safely removed to prevent display issues
16+
- Comprehensive error handling and recovery
17+
"""
18+
119
import re
220
import logging
321
from typing import Tuple, Dict, Any, List
@@ -6,6 +24,10 @@
624
import math
725
import importlib
826
import json
27+
import nbformat
28+
from nbconvert.preprocessors import ExecutePreprocessor
29+
import os
30+
import tempfile
931

1032
logger = logging.getLogger(__name__)
1133

@@ -15,12 +37,6 @@
1537
# Maximum attempts to fix code
1638
MAX_FIX_ATTEMPTS = 3
1739

18-
# List of allowed modules for execution
19-
ALLOWED_MODULES = {
20-
'math': math,
21-
'numpy': 'numpy', # String indicates module should be imported in execution context
22-
}
23-
2440
# Initial code generation prompt
2541
CHAIN_OF_CODE_PROMPT = '''
2642
Write Python code to solve this problem. The code should:
@@ -89,62 +105,129 @@ def extract_code_blocks(text: str) -> List[str]:
89105
return blocks
90106

91107
def sanitize_code(code: str) -> str:
92-
"""Prepare code for execution by adding necessary imports and safety checks."""
93-
# Add standard imports
94-
imports = "\n".join(f"import {mod}" for mod in ALLOWED_MODULES)
95-
108+
"""Prepare code for safe execution by removing problematic visualization code."""
96109
# Remove or modify problematic visualization code
97110
lines = code.split('\n')
98111
safe_lines = []
99112
for line in lines:
100-
# Skip matplotlib-related imports and plotting commands
113+
# Skip matplotlib-related imports and plotting commands that could cause issues
101114
if any(x in line.lower() for x in ['matplotlib', 'plt.', '.plot(', '.show(', 'figure', 'subplot']):
102-
continue
103-
# Keep the line if it's not visualization-related
104-
safe_lines.append(line)
105-
106-
safe_code = '\n'.join(safe_lines)
107-
safe_code = safe_code.replace('\n', '\n ')
115+
# Replace with a comment to maintain code structure
116+
safe_lines.append(f"# {line} # Removed for safety")
117+
else:
118+
# Keep the line if it's not visualization-related
119+
safe_lines.append(line)
108120

109-
# Add safety wrapper
110-
wrapper = f"""
111-
{imports}
112-
113-
def safe_execute():
114-
import numpy as np # Always allow numpy
115-
{safe_code}
116-
return answer if 'answer' in locals() else None
117-
118-
result = safe_execute()
119-
answer = result
120-
"""
121-
return wrapper
121+
return '\n'.join(safe_lines)
122122

123123
def execute_code(code: str) -> Tuple[Any, str]:
124-
"""Attempt to execute the code and return result or error."""
125-
logger.info("Attempting to execute code")
124+
"""Attempt to execute the code using Jupyter notebook kernel and return result or error."""
125+
logger.info("Attempting to execute code in notebook kernel")
126126
logger.info(f"Code:\n{code}")
127127

128128
try:
129-
# Create a clean environment
130-
execution_env = {}
129+
# Sanitize the code first
130+
sanitized_code = sanitize_code(code)
131131

132-
# Execute the code as-is
133-
exec(code, execution_env)
132+
# Create a notebook with the code
133+
notebook = nbformat.v4.new_notebook()
134134

135-
# Look for answer variable
136-
if 'answer' in execution_env:
137-
answer = execution_env['answer']
138-
logger.info(f"Execution successful. Answer: {answer}")
139-
return answer, None
140-
else:
141-
error = "Code executed but did not produce an answer variable"
142-
logger.warning(error)
143-
return None, error
135+
# Add code that captures the answer variable
136+
enhanced_code = f"""
137+
{sanitized_code}
138+
139+
# Capture the answer variable for output
140+
if 'answer' in locals():
141+
print(f"ANSWER_RESULT: {{answer}}")
142+
else:
143+
print("ANSWER_RESULT: No answer variable found")
144+
"""
145+
146+
notebook['cells'] = [nbformat.v4.new_code_cell(enhanced_code)]
147+
148+
# Convert notebook to JSON string and then to bytes
149+
notebook_json = nbformat.writes(notebook)
150+
notebook_bytes = notebook_json.encode('utf-8')
151+
152+
# Create temporary notebook file
153+
with tempfile.NamedTemporaryFile(mode='wb', suffix='.ipynb', delete=False) as tmp:
154+
tmp.write(notebook_bytes)
155+
tmp.flush()
156+
tmp_name = tmp.name
157+
158+
try:
159+
# Read and execute the notebook
160+
with open(tmp_name, 'r', encoding='utf-8') as f:
161+
nb = nbformat.read(f, as_version=4)
162+
163+
# Execute with timeout and isolation
164+
ep = ExecutePreprocessor(timeout=30, kernel_name='python3')
165+
ep.preprocess(nb, {'metadata': {'path': './'}})
166+
167+
# Extract the output
168+
output = ""
169+
error_output = ""
170+
171+
for cell in nb.cells:
172+
if cell.cell_type == 'code' and cell.outputs:
173+
for output_item in cell.outputs:
174+
if output_item.output_type == 'stream':
175+
if output_item.name == 'stdout':
176+
output += output_item.text
177+
elif output_item.name == 'stderr':
178+
error_output += output_item.text
179+
elif output_item.output_type == 'execute_result':
180+
output += str(output_item.data.get('text/plain', ''))
181+
elif output_item.output_type == 'error':
182+
error_output += f"{output_item.ename}: {output_item.evalue}"
183+
184+
# Check for errors first
185+
if error_output:
186+
logger.error(f"Execution failed: {error_output}")
187+
return None, error_output
188+
189+
# Parse the answer from output
190+
output = output.strip()
191+
192+
# Look for our special ANSWER_RESULT marker
193+
if "ANSWER_RESULT:" in output:
194+
answer_line = [line for line in output.split('\n') if 'ANSWER_RESULT:' in line][-1]
195+
answer_str = answer_line.split('ANSWER_RESULT:', 1)[1].strip()
196+
197+
if answer_str == "No answer variable found":
198+
error = "Code executed but did not produce an answer variable"
199+
logger.warning(error)
200+
return None, error
201+
202+
try:
203+
# Try to evaluate the answer to convert it to proper type
204+
answer = ast.literal_eval(answer_str)
205+
except (ValueError, SyntaxError):
206+
# If literal_eval fails, keep as string
207+
answer = answer_str
208+
209+
logger.info(f"Execution successful. Answer: {answer}")
210+
return answer, None
211+
else:
212+
# Fallback: try to extract answer from any output
213+
if output:
214+
logger.info(f"Execution completed with output: {output}")
215+
return output, None
216+
else:
217+
error = "Code executed but produced no output"
218+
logger.warning(error)
219+
return None, error
220+
221+
finally:
222+
# Clean up temporary file
223+
try:
224+
os.unlink(tmp_name)
225+
except:
226+
pass
144227

145228
except Exception as e:
146-
error = str(e)
147-
logger.error(f"Execution failed: {error}")
229+
error = f"Notebook execution failed: {str(e)}"
230+
logger.error(error)
148231
return None, error
149232

150233
def generate_fixed_code(original_code: str, error: str, client, model: str) -> Tuple[str, int]:

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
setup(
55
name="optillm",
6-
version="0.1.16",
6+
version="0.1.17",
77
packages=find_packages(include=['optillm', 'optillm.*']), # This ensures all subpackages are included
88
py_modules=['optillm'],
99
package_data={

0 commit comments

Comments
 (0)