Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix search_files_by_name to properly handle glob patterns with ** #914

Draft
wants to merge 2 commits into
base: develop
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 38 additions & 10 deletions src/codegen/extensions/tools/search_files_by_name.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import math
import shutil
import subprocess
from typing import ClassVar, Optional
from typing import ClassVar

from pydantic import Field

Expand Down Expand Up @@ -51,7 +51,7 @@

Args:
codebase: The codebase to search in
pattern: Glob pattern to search for (e.g. "*.py", "test_*.py")
pattern: Glob pattern to search for (e.g. "*.py", "test_*.py", "**/.github/workflows/*.yml")
page: Page number to return (1-based, default: 1)
files_per_page: Number of files to return per page (default: 10)
"""
Expand All @@ -62,24 +62,53 @@
if files_per_page is not None and files_per_page < 1:
files_per_page = 20

# Handle patterns that start with **/ by removing the leading ** and searching from root
# This is a common pattern for finding files at any depth
search_pattern = pattern
search_dir = codebase.repo_path

if pattern.startswith("**/"):
# Remove the **/ prefix for the search pattern
search_pattern = pattern[3:]

if shutil.which("fd") is None:
logger.warning("fd is not installed, falling back to find")

# For find, we need to handle the pattern differently
find_args = ["find", ".", "-type", "f"]

# If the pattern contains **, we need to use -path instead of -name
if "**" in pattern:
# Convert ** glob pattern to find's -path syntax
path_pattern = pattern.replace("**/", "**/")
find_args.extend(["-path", f"*{search_pattern}"])
else:
# Use -name for simple patterns
find_args.extend(["-name", search_pattern])

results = subprocess.check_output(
["find", "-name", pattern],
cwd=codebase.repo_path,
find_args,
cwd=search_dir,
timeout=30,
)
all_files = [path.removeprefix("./") for path in results.decode("utf-8").strip().split("\n")] if results.strip() else []

else:
logger.info(f"Searching for files with pattern: {pattern}")

# fd handles ** patterns natively
fd_args = ["fd", "-t", "f", "-g", pattern]

results = subprocess.check_output(
["fd", "-g", pattern],
cwd=codebase.repo_path,
fd_args,
cwd=search_dir,
timeout=30,
)
all_files = results.decode("utf-8").strip().split("\n") if results.strip() else []

# Filter out empty strings
all_files = [f for f in all_files if f]

# Sort files for consistent pagination
all_files.sort()

Expand All @@ -88,17 +117,16 @@
if files_per_page == math.inf:
files_per_page = total_files
total_pages = 1
else:
else:
total_pages = (total_files + files_per_page - 1) // files_per_page if total_files > 0 else 1

Check failure on line 121 in src/codegen/extensions/tools/search_files_by_name.py

View workflow job for this annotation

GitHub Actions / mypy

error: Incompatible types in assignment (expression has type "int | float", variable has type "int") [assignment]



# Ensure page is within valid range
page = min(page, total_pages)

# Get paginated results
start_idx = (page - 1) * files_per_page
end_idx = start_idx + files_per_page
paginated_files = all_files[start_idx:end_idx]

Check failure on line 129 in src/codegen/extensions/tools/search_files_by_name.py

View workflow job for this annotation

GitHub Actions / mypy

error: Slice index must be an integer, SupportsIndex or None [misc]

return SearchFilesByNameResultObservation(
status="success",
Expand Down
Loading