|
| 1 | +""" |
| 2 | +Chain of Code (CoC) plugin for OptILLM. |
| 3 | +
|
| 4 | +This plugin implements a chain-of-code approach that combines Chain-of-Thought (CoT) |
| 5 | +reasoning with code execution and LLM-based code simulation. |
| 6 | +
|
| 7 | +SAFETY NOTE: This plugin has been refactored to use Jupyter notebook kernel execution |
| 8 | +instead of direct exec() calls. This provides process isolation and prevents potentially |
| 9 | +dangerous code from crashing or affecting the main OptILLM process. |
| 10 | +
|
| 11 | +Key safety improvements: |
| 12 | +- Code runs in isolated notebook kernels (separate processes) |
| 13 | +- 30-second timeout prevents infinite loops |
| 14 | +- Main process is protected from crashes, system exits, and memory issues |
| 15 | +- Matplotlib/visualization code is safely removed to prevent display issues |
| 16 | +- Comprehensive error handling and recovery |
| 17 | +""" |
| 18 | + |
1 | 19 | import re
|
2 | 20 | import logging
|
3 | 21 | from typing import Tuple, Dict, Any, List
|
|
6 | 24 | import math
|
7 | 25 | import importlib
|
8 | 26 | import json
|
| 27 | +import nbformat |
| 28 | +from nbconvert.preprocessors import ExecutePreprocessor |
| 29 | +import os |
| 30 | +import tempfile |
9 | 31 |
|
10 | 32 | logger = logging.getLogger(__name__)
|
11 | 33 |
|
|
15 | 37 | # Maximum attempts to fix code
|
16 | 38 | MAX_FIX_ATTEMPTS = 3
|
17 | 39 |
|
18 |
| -# List of allowed modules for execution |
19 |
| -ALLOWED_MODULES = { |
20 |
| - 'math': math, |
21 |
| - 'numpy': 'numpy', # String indicates module should be imported in execution context |
22 |
| -} |
23 |
| - |
24 | 40 | # Initial code generation prompt
|
25 | 41 | CHAIN_OF_CODE_PROMPT = '''
|
26 | 42 | Write Python code to solve this problem. The code should:
|
@@ -89,62 +105,129 @@ def extract_code_blocks(text: str) -> List[str]:
|
89 | 105 | return blocks
|
90 | 106 |
|
91 | 107 | def sanitize_code(code: str) -> str:
|
92 |
| - """Prepare code for execution by adding necessary imports and safety checks.""" |
93 |
| - # Add standard imports |
94 |
| - imports = "\n".join(f"import {mod}" for mod in ALLOWED_MODULES) |
95 |
| - |
| 108 | + """Prepare code for safe execution by removing problematic visualization code.""" |
96 | 109 | # Remove or modify problematic visualization code
|
97 | 110 | lines = code.split('\n')
|
98 | 111 | safe_lines = []
|
99 | 112 | for line in lines:
|
100 |
| - # Skip matplotlib-related imports and plotting commands |
| 113 | + # Skip matplotlib-related imports and plotting commands that could cause issues |
101 | 114 | if any(x in line.lower() for x in ['matplotlib', 'plt.', '.plot(', '.show(', 'figure', 'subplot']):
|
102 |
| - continue |
103 |
| - # Keep the line if it's not visualization-related |
104 |
| - safe_lines.append(line) |
105 |
| - |
106 |
| - safe_code = '\n'.join(safe_lines) |
107 |
| - safe_code = safe_code.replace('\n', '\n ') |
| 115 | + # Replace with a comment to maintain code structure |
| 116 | + safe_lines.append(f"# {line} # Removed for safety") |
| 117 | + else: |
| 118 | + # Keep the line if it's not visualization-related |
| 119 | + safe_lines.append(line) |
108 | 120 |
|
109 |
| - # Add safety wrapper |
110 |
| - wrapper = f""" |
111 |
| -{imports} |
112 |
| -
|
113 |
| -def safe_execute(): |
114 |
| - import numpy as np # Always allow numpy |
115 |
| - {safe_code} |
116 |
| - return answer if 'answer' in locals() else None |
117 |
| -
|
118 |
| -result = safe_execute() |
119 |
| -answer = result |
120 |
| -""" |
121 |
| - return wrapper |
| 121 | + return '\n'.join(safe_lines) |
122 | 122 |
|
123 | 123 | def execute_code(code: str) -> Tuple[Any, str]:
|
124 |
| - """Attempt to execute the code and return result or error.""" |
125 |
| - logger.info("Attempting to execute code") |
| 124 | + """Attempt to execute the code using Jupyter notebook kernel and return result or error.""" |
| 125 | + logger.info("Attempting to execute code in notebook kernel") |
126 | 126 | logger.info(f"Code:\n{code}")
|
127 | 127 |
|
128 | 128 | try:
|
129 |
| - # Create a clean environment |
130 |
| - execution_env = {} |
| 129 | + # Sanitize the code first |
| 130 | + sanitized_code = sanitize_code(code) |
131 | 131 |
|
132 |
| - # Execute the code as-is |
133 |
| - exec(code, execution_env) |
| 132 | + # Create a notebook with the code |
| 133 | + notebook = nbformat.v4.new_notebook() |
134 | 134 |
|
135 |
| - # Look for answer variable |
136 |
| - if 'answer' in execution_env: |
137 |
| - answer = execution_env['answer'] |
138 |
| - logger.info(f"Execution successful. Answer: {answer}") |
139 |
| - return answer, None |
140 |
| - else: |
141 |
| - error = "Code executed but did not produce an answer variable" |
142 |
| - logger.warning(error) |
143 |
| - return None, error |
| 135 | + # Add code that captures the answer variable |
| 136 | + enhanced_code = f""" |
| 137 | +{sanitized_code} |
| 138 | +
|
| 139 | +# Capture the answer variable for output |
| 140 | +if 'answer' in locals(): |
| 141 | + print(f"ANSWER_RESULT: {{answer}}") |
| 142 | +else: |
| 143 | + print("ANSWER_RESULT: No answer variable found") |
| 144 | +""" |
| 145 | + |
| 146 | + notebook['cells'] = [nbformat.v4.new_code_cell(enhanced_code)] |
| 147 | + |
| 148 | + # Convert notebook to JSON string and then to bytes |
| 149 | + notebook_json = nbformat.writes(notebook) |
| 150 | + notebook_bytes = notebook_json.encode('utf-8') |
| 151 | + |
| 152 | + # Create temporary notebook file |
| 153 | + with tempfile.NamedTemporaryFile(mode='wb', suffix='.ipynb', delete=False) as tmp: |
| 154 | + tmp.write(notebook_bytes) |
| 155 | + tmp.flush() |
| 156 | + tmp_name = tmp.name |
| 157 | + |
| 158 | + try: |
| 159 | + # Read and execute the notebook |
| 160 | + with open(tmp_name, 'r', encoding='utf-8') as f: |
| 161 | + nb = nbformat.read(f, as_version=4) |
| 162 | + |
| 163 | + # Execute with timeout and isolation |
| 164 | + ep = ExecutePreprocessor(timeout=30, kernel_name='python3') |
| 165 | + ep.preprocess(nb, {'metadata': {'path': './'}}) |
| 166 | + |
| 167 | + # Extract the output |
| 168 | + output = "" |
| 169 | + error_output = "" |
| 170 | + |
| 171 | + for cell in nb.cells: |
| 172 | + if cell.cell_type == 'code' and cell.outputs: |
| 173 | + for output_item in cell.outputs: |
| 174 | + if output_item.output_type == 'stream': |
| 175 | + if output_item.name == 'stdout': |
| 176 | + output += output_item.text |
| 177 | + elif output_item.name == 'stderr': |
| 178 | + error_output += output_item.text |
| 179 | + elif output_item.output_type == 'execute_result': |
| 180 | + output += str(output_item.data.get('text/plain', '')) |
| 181 | + elif output_item.output_type == 'error': |
| 182 | + error_output += f"{output_item.ename}: {output_item.evalue}" |
| 183 | + |
| 184 | + # Check for errors first |
| 185 | + if error_output: |
| 186 | + logger.error(f"Execution failed: {error_output}") |
| 187 | + return None, error_output |
| 188 | + |
| 189 | + # Parse the answer from output |
| 190 | + output = output.strip() |
| 191 | + |
| 192 | + # Look for our special ANSWER_RESULT marker |
| 193 | + if "ANSWER_RESULT:" in output: |
| 194 | + answer_line = [line for line in output.split('\n') if 'ANSWER_RESULT:' in line][-1] |
| 195 | + answer_str = answer_line.split('ANSWER_RESULT:', 1)[1].strip() |
| 196 | + |
| 197 | + if answer_str == "No answer variable found": |
| 198 | + error = "Code executed but did not produce an answer variable" |
| 199 | + logger.warning(error) |
| 200 | + return None, error |
| 201 | + |
| 202 | + try: |
| 203 | + # Try to evaluate the answer to convert it to proper type |
| 204 | + answer = ast.literal_eval(answer_str) |
| 205 | + except (ValueError, SyntaxError): |
| 206 | + # If literal_eval fails, keep as string |
| 207 | + answer = answer_str |
| 208 | + |
| 209 | + logger.info(f"Execution successful. Answer: {answer}") |
| 210 | + return answer, None |
| 211 | + else: |
| 212 | + # Fallback: try to extract answer from any output |
| 213 | + if output: |
| 214 | + logger.info(f"Execution completed with output: {output}") |
| 215 | + return output, None |
| 216 | + else: |
| 217 | + error = "Code executed but produced no output" |
| 218 | + logger.warning(error) |
| 219 | + return None, error |
| 220 | + |
| 221 | + finally: |
| 222 | + # Clean up temporary file |
| 223 | + try: |
| 224 | + os.unlink(tmp_name) |
| 225 | + except: |
| 226 | + pass |
144 | 227 |
|
145 | 228 | except Exception as e:
|
146 |
| - error = str(e) |
147 |
| - logger.error(f"Execution failed: {error}") |
| 229 | + error = f"Notebook execution failed: {str(e)}" |
| 230 | + logger.error(error) |
148 | 231 | return None, error
|
149 | 232 |
|
150 | 233 | def generate_fixed_code(original_code: str, error: str, client, model: str) -> Tuple[str, int]:
|
|
0 commit comments