Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit ef4ff12

Browse files
committedJan 17, 2025
lazy FileInfo extraction
Most symbols are filtered out regardless of the file location. This commit creates a FileInfo cache and only builds the FileInfo when necessary. The search paths are also lazily iterated to find the best matches. #perf
1 parent b893feb commit ef4ff12

File tree

4 files changed

+200
-150
lines changed

4 files changed

+200
-150
lines changed
 

‎include/mrdocs/Support/Path.hpp

+6
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,12 @@ std::string
241241
makePosixStyle(
242242
std::string_view pathName);
243243

244+
/** Check if the path is posix style.
245+
*/
246+
MRDOCS_DECL
247+
bool
248+
isPosixStyle(std::string_view pathName);
249+
244250
/** Return the filename with a new or different extension.
245251
246252
@param fileName The absolute or relative path

‎src/lib/AST/ASTVisitor.cpp

+129-123
Original file line numberDiff line numberDiff line change
@@ -44,61 +44,6 @@
4444

4545
namespace clang::mrdocs {
4646

47-
ASTVisitor::FileInfo
48-
ASTVisitor::FileInfo::build(
49-
std::span<std::string> const search_dirs,
50-
std::string_view const file_path,
51-
std::string_view const sourceRoot) {
52-
FileInfo file_info;
53-
file_info.full_path = std::string(file_path);
54-
file_info.short_path = std::string(file_path);
55-
56-
// Find the best match for the file path
57-
for (auto const& search_dir : search_dirs)
58-
{
59-
if (files::startsWith(file_path, search_dir))
60-
{
61-
file_info.short_path.erase(0, search_dir.size());
62-
if (file_info.short_path.starts_with('/'))
63-
{
64-
file_info.short_path.erase(0, 1);
65-
}
66-
return file_info;
67-
}
68-
}
69-
70-
// Fallback to sourceRoot
71-
if (files::startsWith(file_path, sourceRoot))
72-
{
73-
file_info.short_path.erase(0, sourceRoot.size());
74-
if (file_info.short_path.starts_with('/'))
75-
{
76-
file_info.short_path.erase(0, 1);
77-
}
78-
return file_info;
79-
}
80-
81-
// Fallback to system search paths in PATH
82-
std::optional<std::string> const optEnvPathsStr = llvm::sys::Process::GetEnv("PATH");
83-
MRDOCS_CHECK_OR(optEnvPathsStr, file_info);
84-
std::string const& envPathsStr = *optEnvPathsStr;
85-
for (auto const envPaths = llvm::split(envPathsStr, llvm::sys::EnvPathSeparator);
86-
auto envPath: envPaths)
87-
{
88-
auto normEnvPath = files::makePosixStyle(envPath);
89-
if (files::startsWith(file_path, normEnvPath))
90-
{
91-
file_info.short_path.erase(0, normEnvPath.size());
92-
if (file_info.short_path.starts_with('/'))
93-
{
94-
file_info.short_path.erase(0, 1);
95-
}
96-
return file_info;
97-
}
98-
}
99-
return file_info;
100-
}
101-
10247
ASTVisitor::
10348
ASTVisitor(
10449
const ConfigImpl& config,
@@ -123,42 +68,6 @@ ASTVisitor(
12368
// used somewhere
12469
MRDOCS_ASSERT(context_.getTraversalScope() ==
12570
std::vector<Decl*>{context_.getTranslationUnitDecl()});
126-
127-
// Store the search directories
128-
auto const& cwd = source_.getFileManager().getFileSystemOpts().WorkingDir;
129-
Preprocessor& PP = sema_.getPreprocessor();
130-
HeaderSearch& HS = PP.getHeaderSearchInfo();
131-
search_dirs_.reserve(HS.search_dir_size());
132-
// first, convert all the include search directories into POSIX style
133-
for (const DirectoryLookup& DL : HS.search_dir_range())
134-
{
135-
OptionalDirectoryEntryRef DR = DL.getDirRef();
136-
// only consider normal directories
137-
if (!DL.isNormalDir() || !DR)
138-
{
139-
continue;
140-
}
141-
// store the normalized path
142-
auto normPath = files::makePosixStyle(files::makeAbsolute(DR->getName(), cwd));
143-
search_dirs_.push_back(std::move(normPath));
144-
}
145-
146-
// Store preprocessed information about all file entries
147-
std::string const sourceRoot = files::makePosixStyle(files::makeAbsolute(config_->sourceRoot, cwd));
148-
auto cacheFileInfo = [&](FileEntry const* entry) {
149-
std::string_view const file_path = entry->tryGetRealPathName();
150-
MRDOCS_CHECK_OR(!file_path.empty());
151-
auto const normPath = files::makePosixStyle(
152-
files::makeAbsolute(file_path, cwd));
153-
auto FI = FileInfo::build(search_dirs_, normPath, sourceRoot);
154-
files_.emplace(entry, FI);
155-
};
156-
FileEntry const* mainFileEntry = source_.getFileEntryForID(source_.getMainFileID());
157-
cacheFileInfo(mainFileEntry);
158-
for(const FileEntry* file : PP.getIncludedFiles())
159-
{
160-
cacheFileInfo(file);
161-
}
16271
}
16372

16473
void
@@ -2514,26 +2423,26 @@ shouldExtract(
25142423
// This is not a scoped promotion because
25152424
// parents and members should also assume
25162425
// the same base extraction mode.
2517-
if (checkFileFilters(D) &&
2518-
checkSymbolFilters(D))
2426+
if (checkSymbolFilters(D) &&
2427+
checkFileFilters(D))
25192428
{
25202429
mode_ = ExtractionMode::Regular;
25212430
}
25222431
// But we return true either way
25232432
return true;
25242433
}
25252434

2435+
// Check if this symbol should be extracted according
2436+
// to its qualified name. This checks if it matches
2437+
// the symbol patterns and if it's not excluded.
2438+
MRDOCS_CHECK_OR(checkSymbolFilters(D), false);
2439+
25262440
// Check if this symbol should be extracted according
25272441
// to its location. This checks if it's in one of the
25282442
// input directories, if it matches the file patterns,
25292443
// and it's not in an excluded file.
25302444
MRDOCS_CHECK_OR(checkFileFilters(D), false);
25312445

2532-
// Check if this symbol should be extracted according
2533-
// to its qualified name. This checks if it matches
2534-
// the symbol patterns and if it's not excluded.
2535-
MRDOCS_CHECK_OR(checkSymbolFilters(D), false);
2536-
25372446
return true;
25382447
}
25392448

@@ -2776,36 +2685,132 @@ ASTVisitor::FileInfo*
27762685
ASTVisitor::
27772686
findFileInfo(clang::SourceLocation const loc)
27782687
{
2779-
if (loc.isInvalid())
2780-
{
2781-
return nullptr;
2782-
}
2783-
// KRYSTIAN FIXME: we really should not be
2784-
// calling getPresumedLoc this often,
2688+
MRDOCS_CHECK_OR(!loc.isInvalid(), nullptr);
2689+
2690+
// KRYSTIAN FIXME: we really should not be calling getPresumedLoc this often,
27852691
// it's quite expensive
27862692
auto const presumed = source_.getPresumedLoc(loc, false);
2787-
if (presumed.isInvalid())
2693+
MRDOCS_CHECK_OR(!presumed.isInvalid(), nullptr);
2694+
2695+
const FileEntry* entry = source_.getFileEntryForID( presumed.getFileID());
2696+
MRDOCS_CHECK_OR(entry, nullptr);
2697+
2698+
// Find in the cache
2699+
if (auto const it = files_.find(entry); it != files_.end())
27882700
{
2789-
return nullptr;
2701+
return std::addressof(it->second);
27902702
}
2791-
const FileEntry* file =
2792-
source_.getFileEntryForID(
2793-
presumed.getFileID());
2794-
// KRYSTIAN NOTE: i have no idea under what
2795-
// circumstances the file entry would be null
2796-
if (!file)
2703+
2704+
// Build FileInfo
2705+
auto const FI = buildFileInfo(entry);
2706+
MRDOCS_CHECK_OR(FI, nullptr);
2707+
auto [it, inserted] = files_.try_emplace(entry, std::move(*FI));
2708+
return std::addressof(it->second);
2709+
}
2710+
2711+
std::optional<ASTVisitor::FileInfo>
2712+
ASTVisitor::
2713+
buildFileInfo(FileEntry const* entry)
2714+
{
2715+
std::string_view const file_path = entry->tryGetRealPathName();
2716+
MRDOCS_CHECK_OR(!file_path.empty(), std::nullopt);
2717+
return buildFileInfo(file_path);
2718+
}
2719+
2720+
ASTVisitor::FileInfo
2721+
ASTVisitor::
2722+
buildFileInfo(std::string_view const file_path)
2723+
{
2724+
FileInfo file_info;
2725+
file_info.full_path = file_path;
2726+
if (!files::isPosixStyle(file_info.full_path))
27972727
{
2798-
return nullptr;
2728+
file_info.full_path = files::makePosixStyle(file_info.full_path);
27992729
}
2800-
// KRYSTIAN NOTE: i have no idea under what
2801-
// circumstances the file would not be in either
2802-
// the main file, or an included file
2803-
auto const it = files_.find(file);
2804-
if (it == files_.end())
2730+
2731+
// Attempts to get a relative path for the prefix
2732+
auto tryGetRelativePosixPath = [&file_info](std::string_view const prefix)
2733+
-> std::optional<std::string_view>
28052734
{
2806-
return nullptr;
2735+
if (files::startsWith(file_info.full_path, prefix))
2736+
{
2737+
std::string_view res = file_info.full_path;
2738+
res.remove_prefix(prefix.size());
2739+
if (res.starts_with('/'))
2740+
{
2741+
res.remove_prefix(1);
2742+
}
2743+
return res;
2744+
}
2745+
return std::nullopt;
2746+
};
2747+
2748+
auto tryGetRelativePath = [&tryGetRelativePosixPath](std::string_view const prefix)
2749+
-> std::optional<std::string_view>
2750+
{
2751+
if (!files::isAbsolute(prefix))
2752+
{
2753+
return std::nullopt;
2754+
}
2755+
if (files::isPosixStyle(prefix))
2756+
{
2757+
// If already posix, we use the string view directly
2758+
// to avoid creating a new string for the check
2759+
return tryGetRelativePosixPath(prefix);
2760+
}
2761+
std::string const posixPrefix = files::makePosixStyle(prefix);
2762+
return tryGetRelativePosixPath(posixPrefix);
2763+
};
2764+
2765+
// Find the best match for the file path in the search directories
2766+
for (HeaderSearch& HS = sema_.getPreprocessor().getHeaderSearchInfo();
2767+
DirectoryLookup const& DL : HS.search_dir_range())
2768+
{
2769+
OptionalDirectoryEntryRef DR = DL.getDirRef();
2770+
if (!DL.isNormalDir() || !DR)
2771+
{
2772+
// Only consider normal directories
2773+
continue;
2774+
}
2775+
StringRef searchDir = DR->getName();
2776+
if (auto shortPath = tryGetRelativePath(searchDir))
2777+
{
2778+
file_info.short_path = std::string(*shortPath);
2779+
return file_info;
2780+
}
2781+
}
2782+
2783+
// Fallback to sourceRoot
2784+
if (files::isAbsolute(config_->sourceRoot))
2785+
{
2786+
if (auto shortPath = tryGetRelativePath(config_->sourceRoot))
2787+
{
2788+
file_info.short_path = std::string(*shortPath);
2789+
return file_info;
2790+
}
28072791
}
2808-
return &it->second;
2792+
2793+
// Fallback to system search paths in PATH
2794+
std::optional<std::string> const optEnvPathsStr = llvm::sys::Process::GetEnv("PATH");
2795+
MRDOCS_CHECK_OR(optEnvPathsStr, file_info);
2796+
std::string const& envPathsStr = *optEnvPathsStr;
2797+
for (auto const envPaths = llvm::split(envPathsStr, llvm::sys::EnvPathSeparator);
2798+
auto envPath: envPaths)
2799+
{
2800+
if (!files::isAbsolute(envPath))
2801+
{
2802+
continue;
2803+
}
2804+
if (auto shortPath = tryGetRelativePath(envPath))
2805+
{
2806+
file_info.short_path = std::string(*shortPath);
2807+
return file_info;
2808+
}
2809+
}
2810+
2811+
// Fallback to the full path
2812+
file_info.short_path = file_info.full_path;
2813+
return file_info;
28092814
}
28102815

28112816
template <std::derived_from<Info> InfoTy>
@@ -2840,9 +2845,10 @@ ASTVisitor::
28402845
upsert(DeclType* D)
28412846
{
28422847
AccessSpecifier access = getAccess(D);
2843-
MRDOCS_CHECK_MSG(
2844-
shouldExtract(D, access),
2845-
"Symbol should not be extracted");
2848+
if (!shouldExtract(D, access))
2849+
{
2850+
return Unexpected(Error("Symbol should not be extracted"));
2851+
}
28462852

28472853
SymbolID const id = generateID(D);
28482854
MRDOCS_CHECK_MSG(id, "Failed to extract symbol ID");

‎src/lib/AST/ASTVisitor.hpp

+46-25
Original file line numberDiff line numberDiff line change
@@ -79,17 +79,6 @@ class ASTVisitor
7979
// An unordered set of all extracted Info declarations
8080
InfoSet info_;
8181

82-
/* Preprocessed information about search directories
83-
84-
This vector stores information about the search directories
85-
used by the translation unit.
86-
87-
Whenever we extract information about where a symbol is located,
88-
we store the full path of the symbol and the path relative
89-
to the search directory in this vector.
90-
*/
91-
std::vector<std::string> search_dirs_;
92-
9382
/* Struct to hold pre-processed file information.
9483
9584
This struct stores information about a file, including its full path,
@@ -100,13 +89,6 @@ class ASTVisitor
10089
*/
10190
struct FileInfo
10291
{
103-
static
104-
FileInfo
105-
build(
106-
std::span<std::string> search_dirs,
107-
std::string_view file_path,
108-
std::string_view sourceRoot);
109-
11092
// The full path of the file.
11193
std::string full_path;
11294

@@ -910,13 +892,9 @@ class ASTVisitor
910892
This function will return a pointer to a FileInfo
911893
object for a given Clang FileEntry object.
912894
913-
If the FileInfo object does not exist, the function
914-
will construct a new FileInfo object and add it to
915-
the files_ map.
916-
917-
The map of files is created during the object
918-
construction, and is populated with the FileInfo
919-
for each file in the translation unit.
895+
If the FileInfo object does not exist in the cache,
896+
the function will build a new FileInfo object and
897+
add it to the files_ map.
920898
921899
@param file The Clang FileEntry object to get the FileInfo for.
922900
@@ -925,6 +903,49 @@ class ASTVisitor
925903
FileInfo*
926904
findFileInfo(clang::SourceLocation loc);
927905

906+
/* Build a FileInfo for a FileEntry
907+
908+
This function will build a FileInfo object for a
909+
given Clang FileEntry object.
910+
911+
The function will extract the full path and short
912+
path of the file, and store the information in the
913+
FileInfo object.
914+
915+
This function is used as an auxiliary function to
916+
`findFileInfo` when the FileInfo object does not
917+
exist in the cache.
918+
919+
@param file The Clang FileEntry object to build the FileInfo for.
920+
@return the FileInfo object.
921+
922+
*/
923+
std::optional<FileInfo>
924+
buildFileInfo(FileEntry const* entry);
925+
926+
/* Build a FileInfo for a string path
927+
928+
This function will build a FileInfo object for a
929+
given file path.
930+
931+
The function will extract the full path and short
932+
path of the file, and store the information in the
933+
FileInfo object.
934+
935+
The search paths will be used to identify the
936+
short path of the file relative to the search
937+
directories.
938+
939+
This function is used as an auxiliary function to
940+
`buildFileInfo` once the file path has been extracted
941+
from the FileEntry object.
942+
943+
@param file_path The file path to build the FileInfo for.
944+
@return the FileInfo object.
945+
*/
946+
FileInfo
947+
buildFileInfo(std::string_view file_path);
948+
928949
/* Result of an upsert operation
929950
930951
This struct is used to return the result of an

‎src/lib/Support/Path.cpp

+19-2
Original file line numberDiff line numberDiff line change
@@ -263,14 +263,31 @@ makeAbsolute(
263263
}
264264

265265
std::string
266-
makePosixStyle(
267-
std::string_view pathName)
266+
makePosixStyle(std::string_view pathName)
268267
{
269268
SmallPathString result(pathName);
270269
llvm::sys::path::native(result, llvm::sys::path::Style::posix);
271270
return std::string(result);
272271
}
273272

273+
bool
274+
isPosixStyle(std::string_view pathName)
275+
{
276+
namespace path = llvm::sys::path;
277+
278+
if(pathName.empty())
279+
{
280+
return true;
281+
}
282+
llvm::StringRef separator = llvm::sys::path::get_separator(path::Style::windows);
283+
if (pathName.find(separator) != llvm::StringRef::npos)
284+
{
285+
return false;
286+
}
287+
return true;
288+
}
289+
290+
274291
std::string
275292
withExtension(
276293
std::string_view fileName,

0 commit comments

Comments
 (0)
Please sign in to comment.