Skip to content

Commit 3a17f7a

Browse files
authored
fix: update searchbyfilename tool to paginate (#896)
# Motivation <!-- Why is this change necessary? --> # Content Add pagination fields and paginated response to the SearchByFileName tool, to prevent large results blowing up the context. <!-- Please include a summary of the change --> # Testing <!-- How was the change tested? --> # Please check the following before marking your PR as ready for review - [ ] I have added tests for my changes - [ ] I have updated the documentation or added new documentation as needed
1 parent 5a55008 commit 3a17f7a

File tree

3 files changed

+65
-10
lines changed

3 files changed

+65
-10
lines changed

src/codegen/extensions/langchain/tools.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -1091,24 +1091,27 @@ class SearchFilesByNameInput(BaseModel):
10911091
"""Input for searching files by name pattern."""
10921092

10931093
pattern: str = Field(..., description="`fd`-compatible glob pattern to search for (e.g. '*.py', 'test_*.py')")
1094-
1094+
page: int = Field(default=1, description="Page number to return (1-based)")
1095+
files_per_page: int | float = Field(default=10, description="Number of files per page to return, use math.inf to return all files")
10951096

10961097
class SearchFilesByNameTool(BaseTool):
10971098
"""Tool for searching files by filename across a codebase."""
10981099

10991100
name: ClassVar[str] = "search_files_by_name"
11001101
description: ClassVar[str] = """
1101-
Search for files and directories by glob pattern across the active codebase. This is useful when you need to:
1102+
Search for files and directories by glob pattern (with pagination) across the active codebase. This is useful when you need to:
11021103
- Find specific file types (e.g., '*.py', '*.tsx')
11031104
- Locate configuration files (e.g., 'package.json', 'requirements.txt')
11041105
- Find files with specific names (e.g., 'README.md', 'Dockerfile')
11051106
"""
11061107
args_schema: ClassVar[type[BaseModel]] = SearchFilesByNameInput
11071108
codebase: Codebase = Field(exclude=True)
11081109

1110+
1111+
11091112
def __init__(self, codebase: Codebase):
11101113
super().__init__(codebase=codebase)
11111114

11121115
def _run(self, pattern: str) -> str:
11131116
"""Execute the glob pattern search using fd."""
1114-
return search_files_by_name(self.codebase, pattern).render()
1117+
return search_files_by_name(self.codebase, pattern, page=self.page, files_per_page=self.files_per_page).render()

src/codegen/extensions/tools/global_replacement_edit.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import difflib
44
import logging
5+
import math
56
import re
67
from typing import ClassVar
78

@@ -103,7 +104,7 @@ def replacement_edit_global(
103104
)
104105

105106
diffs = []
106-
for file in search_files_by_name(codebase, file_pattern).files:
107+
for file in search_files_by_name(codebase, file_pattern, page=1, files_per_page=math.inf).files:
107108
if count is not None and count <= 0:
108109
break
109110
try:

src/codegen/extensions/tools/search_files_by_name.py

+57-6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1+
import math
12
import shutil
23
import subprocess
3-
from typing import ClassVar
4+
from typing import ClassVar, Optional
45

56
from pydantic import Field
67

@@ -20,33 +21,55 @@ class SearchFilesByNameResultObservation(Observation):
2021
files: list[str] = Field(
2122
description="List of matching file paths",
2223
)
24+
page: int = Field(
25+
description="Current page number (1-based)",
26+
)
27+
total_pages: int = Field(
28+
description="Total number of pages available",
29+
)
30+
total_files: int = Field(
31+
description="Total number of files with matches",
32+
)
33+
files_per_page: int | float = Field(
34+
description="Number of files shown per page",
35+
)
2336

24-
str_template: ClassVar[str] = "Found {total} files matching pattern: {pattern}"
37+
str_template: ClassVar[str] = "Found {total_files} files matching pattern: {pattern} (page {page}/{total_pages})"
2538

2639
@property
2740
def total(self) -> int:
28-
return len(self.files)
41+
return self.total_files
2942

3043

3144
def search_files_by_name(
3245
codebase: Codebase,
3346
pattern: str,
47+
page: int = 1,
48+
files_per_page: int | float = 10,
3449
) -> SearchFilesByNameResultObservation:
3550
"""Search for files by name pattern in the codebase.
3651
3752
Args:
3853
codebase: The codebase to search in
3954
pattern: Glob pattern to search for (e.g. "*.py", "test_*.py")
55+
page: Page number to return (1-based, default: 1)
56+
files_per_page: Number of files to return per page (default: 10)
4057
"""
4158
try:
59+
# Validate pagination parameters
60+
if page < 1:
61+
page = 1
62+
if files_per_page is not None and files_per_page < 1:
63+
files_per_page = 20
64+
4265
if shutil.which("fd") is None:
4366
logger.warning("fd is not installed, falling back to find")
4467
results = subprocess.check_output(
4568
["find", "-name", pattern],
4669
cwd=codebase.repo_path,
4770
timeout=30,
4871
)
49-
files = [path.removeprefix("./") for path in results.decode("utf-8").strip().split("\n")] if results.strip() else []
72+
all_files = [path.removeprefix("./") for path in results.decode("utf-8").strip().split("\n")] if results.strip() else []
5073

5174
else:
5275
logger.info(f"Searching for files with pattern: {pattern}")
@@ -55,12 +78,36 @@ def search_files_by_name(
5578
cwd=codebase.repo_path,
5679
timeout=30,
5780
)
58-
files = results.decode("utf-8").strip().split("\n") if results.strip() else []
81+
all_files = results.decode("utf-8").strip().split("\n") if results.strip() else []
82+
83+
# Sort files for consistent pagination
84+
all_files.sort()
85+
86+
# Calculate pagination
87+
total_files = len(all_files)
88+
if files_per_page == math.inf:
89+
files_per_page = total_files
90+
total_pages = 1
91+
else:
92+
total_pages = (total_files + files_per_page - 1) // files_per_page if total_files > 0 else 1
93+
94+
95+
# Ensure page is within valid range
96+
page = min(page, total_pages)
97+
98+
# Get paginated results
99+
start_idx = (page - 1) * files_per_page
100+
end_idx = start_idx + files_per_page
101+
paginated_files = all_files[start_idx:end_idx]
59102

60103
return SearchFilesByNameResultObservation(
61104
status="success",
62105
pattern=pattern,
63-
files=files,
106+
files=paginated_files,
107+
page=page,
108+
total_pages=total_pages,
109+
total_files=total_files,
110+
files_per_page=files_per_page,
64111
)
65112

66113
except Exception as e:
@@ -69,4 +116,8 @@ def search_files_by_name(
69116
error=f"Error searching files: {e!s}",
70117
pattern=pattern,
71118
files=[],
119+
page=page,
120+
total_pages=0,
121+
total_files=0,
122+
files_per_page=files_per_page,
72123
)

0 commit comments

Comments
 (0)