Skip to content

Commit 66c895b

Browse files
committed
Incoming
1 parent 6ada942 commit 66c895b

File tree

14 files changed

+2939
-664
lines changed

14 files changed

+2939
-664
lines changed

interpreter/computer_use/loop.py

Lines changed: 232 additions & 37 deletions
Large diffs are not rendered by default.

interpreter/computer_use/tools/bash.py

Lines changed: 105 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import asyncio
22
import os
3+
import pty
4+
import sys
35
from typing import ClassVar, Literal
46

7+
import pyte
58
from anthropic.types.beta import BetaToolBash20241022Param
69

710
from .base import BaseAnthropicTool, CLIResult, ToolError, ToolResult
@@ -21,20 +24,43 @@ class _BashSession:
2124
def __init__(self):
2225
self._started = False
2326
self._timed_out = False
27+
# Create a terminal screen and stream
28+
self._screen = pyte.Screen(80, 24) # Standard terminal size
29+
self._stream = pyte.Stream(self._screen)
2430

2531
async def start(self):
2632
if self._started:
2733
return
2834

29-
self._process = await asyncio.create_subprocess_shell(
30-
self.command,
31-
preexec_fn=os.setsid,
32-
shell=True,
33-
bufsize=0,
34-
stdin=asyncio.subprocess.PIPE,
35-
stdout=asyncio.subprocess.PIPE,
36-
stderr=asyncio.subprocess.PIPE,
37-
)
35+
try:
36+
# Try to create process with PTY
37+
master, slave = pty.openpty()
38+
self._process = await asyncio.create_subprocess_shell(
39+
self.command,
40+
preexec_fn=os.setsid,
41+
shell=True,
42+
bufsize=0,
43+
stdin=asyncio.subprocess.PIPE,
44+
stdout=slave,
45+
stderr=slave,
46+
)
47+
# Store master fd for reading
48+
self._master_fd = master
49+
self._using_pty = True
50+
print("using pty")
51+
except (ImportError, OSError):
52+
print("using pipes")
53+
# Fall back to regular pipes if PTY is not available
54+
self._process = await asyncio.create_subprocess_shell(
55+
self.command,
56+
preexec_fn=os.setsid,
57+
shell=True,
58+
bufsize=0,
59+
stdin=asyncio.subprocess.PIPE,
60+
stdout=asyncio.subprocess.PIPE,
61+
stderr=asyncio.subprocess.PIPE,
62+
)
63+
self._using_pty = False
3864

3965
self._started = True
4066

@@ -45,18 +71,11 @@ def stop(self):
4571
if self._process.returncode is not None:
4672
return
4773
self._process.terminate()
74+
if hasattr(self, "_master_fd"):
75+
os.close(self._master_fd)
4876

4977
async def run(self, command: str):
5078
"""Execute a command in the bash shell."""
51-
# Ask for user permission before executing the command
52-
print(f"Do you want to execute the following command?\n{command}")
53-
user_input = input("Enter 'yes' to proceed, anything else to cancel: ")
54-
55-
if user_input.lower() != "yes":
56-
return ToolResult(
57-
system="Command execution cancelled by user",
58-
error="User did not provide permission to execute the command.",
59-
)
6079
if not self._started:
6180
raise ToolError("Session has not started.")
6281
if self._process.returncode is not None:
@@ -71,29 +90,70 @@ async def run(self, command: str):
7190

7291
# we know these are not None because we created the process with PIPEs
7392
assert self._process.stdin
74-
assert self._process.stdout
75-
assert self._process.stderr
7693

7794
# send command to the process
7895
self._process.stdin.write(
7996
command.encode() + f"; echo '{self._sentinel}'\n".encode()
8097
)
8198
await self._process.stdin.drain()
8299

83-
# read output from the process, until the sentinel is found
84100
try:
85101
async with asyncio.timeout(self._timeout):
86-
while True:
87-
await asyncio.sleep(self._output_delay)
88-
# if we read directly from stdout/stderr, it will wait forever for
89-
# EOF. use the StreamReader buffer directly instead.
90-
output = (
91-
self._process.stdout._buffer.decode()
92-
) # pyright: ignore[reportAttributeAccessIssue]
93-
if self._sentinel in output:
94-
# strip the sentinel and break
95-
output = output[: output.index(self._sentinel)]
96-
break
102+
if self._using_pty:
103+
# Reset screen state
104+
self._screen.reset()
105+
output = ""
106+
while True:
107+
try:
108+
raw_chunk = os.read(self._master_fd, 1024)
109+
chunk_str = raw_chunk.decode()
110+
111+
# Update output before checking sentinel
112+
output += chunk_str
113+
114+
# Check for sentinel
115+
if self._sentinel in chunk_str:
116+
# Clean the output for display
117+
clean_chunk = chunk_str[
118+
: chunk_str.index(self._sentinel)
119+
].encode()
120+
if clean_chunk:
121+
os.write(sys.stdout.fileno(), clean_chunk)
122+
# Clean the stored output
123+
if self._sentinel in output:
124+
output = output[: output.index(self._sentinel)]
125+
break
126+
127+
os.write(sys.stdout.fileno(), raw_chunk)
128+
except OSError:
129+
break
130+
await asyncio.sleep(0.01)
131+
error = ""
132+
else:
133+
# Real-time output for pipe-based reading
134+
output = ""
135+
while True:
136+
chunk = await self._process.stdout.read(1024)
137+
if not chunk:
138+
break
139+
chunk_str = chunk.decode()
140+
output += chunk_str
141+
142+
# Check for sentinel
143+
if self._sentinel in chunk_str:
144+
# Clean the chunk for display
145+
clean_chunk = chunk_str[
146+
: chunk_str.index(self._sentinel)
147+
].encode()
148+
if clean_chunk:
149+
os.write(sys.stdout.fileno(), clean_chunk)
150+
# Clean the stored output
151+
if self._sentinel in output:
152+
output = output[: output.index(self._sentinel)]
153+
break
154+
155+
os.write(sys.stdout.fileno(), chunk)
156+
await asyncio.sleep(0.01)
97157
except asyncio.TimeoutError:
98158
self._timed_out = True
99159
raise ToolError(
@@ -102,19 +162,24 @@ async def run(self, command: str):
102162

103163
if output.endswith("\n"):
104164
output = output[:-1]
105-
106-
error = (
107-
self._process.stderr._buffer.decode()
108-
) # pyright: ignore[reportAttributeAccessIssue]
109-
if error.endswith("\n"):
165+
if not self._using_pty and error.endswith("\n"):
110166
error = error[:-1]
111167

112-
# clear the buffers so that the next output can be read correctly
113-
self._process.stdout._buffer.clear() # pyright: ignore[reportAttributeAccessIssue]
114-
self._process.stderr._buffer.clear() # pyright: ignore[reportAttributeAccessIssue]
168+
# Clear buffers only when using pipes
169+
if not self._using_pty:
170+
self._process.stdout._buffer.clear()
171+
self._process.stderr._buffer.clear()
115172

116173
return CLIResult(output=output, error=error)
117174

175+
@staticmethod
176+
def _strip_ansi(text: str) -> str:
177+
"""Remove ANSI escape sequences from text."""
178+
import re
179+
180+
ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
181+
return ansi_escape.sub("", text)
182+
118183

119184
class BashTool(BaseAnthropicTool):
120185
"""

interpreter/computer_use/tools/computer.py

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,24 +3,21 @@
33
import math
44
import os
55
import platform
6-
import shlex
7-
import shutil
86
import tempfile
97
import time
108
from enum import StrEnum
119
from pathlib import Path
1210
from typing import Literal, TypedDict
1311
from uuid import uuid4
1412

15-
# Add import for PyAutoGUI
1613
import pyautogui
1714
from anthropic.types.beta import BetaToolComputerUse20241022Param
15+
from PIL import Image
16+
from screeninfo import get_monitors
1817

1918
from .base import BaseAnthropicTool, ToolError, ToolResult
2019
from .run import run
2120

22-
OUTPUT_DIR = "/tmp/outputs"
23-
2421
TYPING_DELAY_MS = 12
2522
TYPING_GROUP_SIZE = 50
2623

@@ -93,15 +90,15 @@ def smooth_move_to(x, y, duration=1.2):
9390

9491
class ComputerTool(BaseAnthropicTool):
9592
"""
96-
A tool that allows the agent to interact with the primary monitor's screen, keyboard, and mouse.
93+
A tool that allows the agent to interact with the screen, keyboard, and mouse of the current computer.
9794
The tool parameters are defined by Anthropic and are not editable.
9895
"""
9996

10097
name: Literal["computer"] = "computer"
10198
api_type: Literal["computer_20241022"] = "computer_20241022"
10299
width: int
103100
height: int
104-
display_num: None # Simplified to always be None since we're only using primary display
101+
display_num: int | None
105102

106103
_screenshot_delay = 2.0
107104
_scaling_enabled = True
@@ -122,8 +119,25 @@ def to_params(self) -> BetaToolComputerUse20241022Param:
122119

123120
def __init__(self):
124121
super().__init__()
122+
125123
self.width, self.height = pyautogui.size()
124+
125+
# Get display number and set up display offset
126126
self.display_num = None
127+
self._display_offset_x = 0
128+
if (display_num := os.getenv("DISPLAY_NUM")) is not None:
129+
self.display_num = int(display_num)
130+
# Get all displays using screeninfo
131+
try:
132+
monitors = get_monitors()
133+
# Calculate x offset based on display number
134+
# Assuming displays are arranged horizontally
135+
self._display_offset_x = sum(
136+
m.width for m in monitors[: self.display_num]
137+
)
138+
except ImportError:
139+
# Fallback if screeninfo not available
140+
self._display_offset_x = 0
127141

128142
async def __call__(
129143
self,
@@ -136,15 +150,23 @@ async def __call__(
136150
if action in ("mouse_move", "left_click_drag"):
137151
if coordinate is None:
138152
raise ToolError(f"coordinate is required for {action}")
153+
if text is not None:
154+
raise ToolError(f"text is not accepted for {action}")
155+
if not isinstance(coordinate, list) or len(coordinate) != 2:
156+
raise ToolError(f"{coordinate} must be a tuple of length 2")
157+
if not all(isinstance(i, int) and i >= 0 for i in coordinate):
158+
raise ToolError(f"{coordinate} must be a tuple of non-negative ints")
159+
139160
x, y = self.scale_coordinates(
140161
ScalingSource.API, coordinate[0], coordinate[1]
141162
)
142163

143164
if action == "mouse_move":
144165
smooth_move_to(x, y)
145166
elif action == "left_click_drag":
167+
pyautogui.mouseDown(button="left")
146168
smooth_move_to(x, y)
147-
pyautogui.dragTo(x, y, button="left")
169+
pyautogui.mouseUp(button="left")
148170

149171
elif action in ("key", "type"):
150172
if text is None:
@@ -232,7 +254,6 @@ async def screenshot(self):
232254
ScalingSource.COMPUTER, self.width, self.height
233255
)
234256
# Use PIL directly instead of shell convert command
235-
from PIL import Image
236257

237258
with Image.open(path) as img:
238259
img = img.resize((x, y), Image.Resampling.LANCZOS)

interpreter/computer_use/tools/edit.py

Lines changed: 6 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -50,40 +50,18 @@ async def __call__(
5050
insert_line: int | None = None,
5151
**kwargs,
5252
):
53-
# Ask for user permission before executing the command
54-
print(f"Do you want to execute the following command?")
55-
print(f"Command: {command}")
56-
print(f"Path: {path}")
57-
if file_text:
58-
print(f"File text: {file_text}")
59-
if view_range:
60-
print(f"View range: {view_range}")
61-
if old_str:
62-
print(f"Old string: {old_str}")
63-
if new_str:
64-
print(f"New string: {new_str}")
65-
if insert_line is not None:
66-
print(f"Insert line: {insert_line}")
67-
68-
user_input = input("Enter 'yes' to proceed, anything else to cancel: ")
69-
70-
if user_input.lower() != "yes":
71-
return ToolResult(
72-
system="Command execution cancelled by user",
73-
error="User did not provide permission to execute the command.",
74-
)
7553
_path = Path(path)
7654
self.validate_path(command, _path)
7755
if command == "view":
7856
return await self.view(_path, view_range)
7957
elif command == "create":
80-
if not file_text:
58+
if file_text is None:
8159
raise ToolError("Parameter `file_text` is required for command: create")
8260
self.write_file(_path, file_text)
8361
self._file_history[_path].append(file_text)
8462
return ToolResult(output=f"File created successfully at: {_path}")
8563
elif command == "str_replace":
86-
if not old_str:
64+
if old_str is None:
8765
raise ToolError(
8866
"Parameter `old_str` is required for command: str_replace"
8967
)
@@ -93,7 +71,7 @@ async def __call__(
9371
raise ToolError(
9472
"Parameter `insert_line` is required for command: insert"
9573
)
96-
if not new_str:
74+
if new_str is None:
9775
raise ToolError("Parameter `new_str` is required for command: insert")
9876
return self.insert(_path, insert_line, new_str)
9977
elif command == "undo_edit":
@@ -155,15 +133,15 @@ async def view(self, path: Path, view_range: list[int] | None = None):
155133
init_line, final_line = view_range
156134
if init_line < 1 or init_line > n_lines_file:
157135
raise ToolError(
158-
f"Invalid `view_range`: {view_range}. It's first element `{init_line}` should be within the range of lines of the file: {[1, n_lines_file]}"
136+
f"Invalid `view_range`: {view_range}. Its first element `{init_line}` should be within the range of lines of the file: {[1, n_lines_file]}"
159137
)
160138
if final_line > n_lines_file:
161139
raise ToolError(
162-
f"Invalid `view_range`: {view_range}. It's second element `{final_line}` should be smaller than the number of lines in the file: `{n_lines_file}`"
140+
f"Invalid `view_range`: {view_range}. Its second element `{final_line}` should be smaller than the number of lines in the file: `{n_lines_file}`"
163141
)
164142
if final_line != -1 and final_line < init_line:
165143
raise ToolError(
166-
f"Invalid `view_range`: {view_range}. It's second element `{final_line}` should be larger or equal than its first `{init_line}`"
144+
f"Invalid `view_range`: {view_range}. Its second element `{final_line}` should be larger or equal than its first `{init_line}`"
167145
)
168146

169147
if final_line == -1:

interpreter/computer_use/ui/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)