Entropic 2.3.8
Local-first agentic inference engine
Loading...
Searching...
No Matches
ignore_matcher.cpp
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0
29
30#include <fstream>
31#include <sstream>
32
33namespace fs = std::filesystem;
34static auto logger = entropic::log::get("mcp.filesystem.ignore");
35
36namespace entropic {
37
38namespace {
39
47std::string trim(const std::string& s) {
48 auto begin = s.find_first_not_of(" \t\r\n");
49 auto end = s.find_last_not_of(" \t\r\n");
50 return (begin == std::string::npos)
51 ? std::string{}
52 : s.substr(begin, end - begin + 1);
53}
54
62std::string to_slash(const fs::path& p) {
63 auto s = p.generic_string();
64 return s;
65}
66
78bool is_regex_meta(char c) {
79 switch (c) {
80 case '.': case '+': case '(': case ')':
81 case '|': case '^': case '$': case '{':
82 case '}': case '\\':
83 return true;
84 default:
85 return false;
86 }
87}
88
89} // namespace
90
91// ── Pattern compilation ──────────────────────────────────
92
93namespace {
94
106void emit_star(const std::string& pattern, size_t& i, std::string& out) {
107 bool double_star = (i + 1 < pattern.size())
108 && pattern[i + 1] == '*';
109 if (!double_star) { out += "[^/]*"; return; }
110 out += ".*";
111 ++i;
112 if (i + 1 < pattern.size() && pattern[i + 1] == '/') {
113 ++i;
114 }
115}
116
123void emit_bracket(const std::string& pattern, size_t& i,
124 std::string& out) {
125 out += '[';
126 ++i;
127 while (i < pattern.size() && pattern[i] != ']') {
128 out += pattern[i];
129 ++i;
130 }
131 out += ']';
132}
133
139void emit_escape(const std::string& pattern, size_t& i,
140 std::string& out) {
141 char next = pattern[i + 1];
142 if (is_regex_meta(next)) { out += '\\'; }
143 out += next;
144 ++i;
145}
146
147} // namespace
148
159namespace {
160
170void emit_one(const std::string& pattern, size_t& i,
171 std::string& out) {
172 char c = pattern[i];
173 bool handled = true;
174 switch (c) {
175 case '*': emit_star(pattern, i, out); break;
176 case '?': out += "[^/]"; break;
177 case '[': emit_bracket(pattern, i, out); break;
178 case '\\':
179 if (i + 1 < pattern.size()) {
180 emit_escape(pattern, i, out);
181 } else {
182 handled = false;
183 }
184 break;
185 default: handled = false; break;
186 }
187 if (!handled) {
188 if (is_regex_meta(c)) { out += '\\'; }
189 out += c;
190 }
191}
192
193} // namespace
194
207std::string IgnoreMatcher::pattern_to_regex(const std::string& pattern) {
208 std::string out;
209 out.reserve(pattern.size() * 2);
210 for (size_t i = 0; i < pattern.size(); ++i) {
211 emit_one(pattern, i, out);
212 }
213 return out;
214}
215
227namespace {
228
235void strip_flags(std::string& body, IgnoreMatcher::Rule& rule) {
236 if (!body.empty() && body[0] == '!') {
237 rule.negate = true;
238 body.erase(0, 1);
239 }
240 if (!body.empty() && body.back() == '/') {
241 rule.dir_only = true;
242 body.pop_back();
243 }
244}
245
255std::string make_base_prefix(const std::string& base) {
256 if (base.empty()) { return {}; }
257 std::string raw = base + "/";
258 std::string out;
259 for (char c : raw) {
260 if (is_regex_meta(c) || c == '*' || c == '?' || c == '[') {
261 out += '\\';
262 }
263 out += c;
264 }
265 return out;
266}
267
274std::regex compile_or_never(const std::string& src,
275 const std::string& original_pattern) {
276 try {
277 return std::regex(src);
278 } catch (const std::regex_error& e) {
279 logger->warn("Skipping malformed ignore pattern '{}': {}",
280 original_pattern, e.what());
281 return std::regex("(?!)");
282 }
283}
284
285} // namespace
286
304IgnoreMatcher::Rule IgnoreMatcher::compile_pattern(
305 const std::string& pattern, const std::string& base) {
306 Rule rule;
307 rule.original = pattern;
308 rule.base = base;
309
310 std::string body = pattern;
311 strip_flags(body, rule);
312 bool root_anchored = !body.empty() && body[0] == '/';
313 if (root_anchored) { body.erase(0, 1); }
314 bool anchored = root_anchored
315 || body.find('/') != std::string::npos;
316
317 std::string regex_body = pattern_to_regex(body);
318 std::string base_prefix = make_base_prefix(base);
319 std::string anchor_left = anchored
320 ? ("^" + base_prefix)
321 : ("^" + base_prefix + "(?:.*/)?");
322
323 rule.re_exact = compile_or_never(
324 anchor_left + regex_body + "$", pattern);
325 rule.re_under = compile_or_never(
326 anchor_left + regex_body + "/.*$", pattern);
327 return rule;
328}
329
330// ── Public API ───────────────────────────────────────────
331
337void IgnoreMatcher::add_pattern(const std::string& pattern,
338 const fs::path& base) {
339 std::string trimmed = trim(pattern);
340 if (trimmed.empty() || trimmed[0] == '#') { return; }
341 rules_.push_back(compile_pattern(trimmed, to_slash(base)));
342}
343
349void IgnoreMatcher::load_file(const fs::path& path,
350 const std::string& base) {
351 std::ifstream in(path);
352 if (!in.is_open()) { return; }
353 std::string line;
354 int loaded = 0;
355 while (std::getline(in, line)) {
356 std::string trimmed = trim(line);
357 if (trimmed.empty() || trimmed[0] == '#') { continue; }
358 rules_.push_back(compile_pattern(trimmed, base));
359 ++loaded;
360 }
361 std::string base_label = base.empty() ? std::string("<root>") : base;
362 logger->info("Loaded {} ignore rules from {} (base='{}')",
363 loaded, path.string(), base_label);
364}
365
371void IgnoreMatcher::load(const fs::path& root) {
372 rules_.clear();
373 if (!fs::exists(root) || !fs::is_directory(root)) {
374 logger->warn("IgnoreMatcher::load: root does not exist: {}",
375 root.string());
376 return;
377 }
378
379 auto canonical_root = fs::weakly_canonical(root);
380 fs::path root_gi = canonical_root / ".gitignore";
381 if (fs::exists(root_gi)) { load_file(root_gi, ""); }
382
383 load_nested_gitignores(canonical_root, root_gi);
384
385 fs::path explorer = canonical_root / ".explorerignore";
386 if (fs::exists(explorer)) { load_file(explorer, ""); }
387}
388
396void IgnoreMatcher::load_nested_gitignores(
397 const fs::path& canonical_root, const fs::path& root_gi) {
398 // Recursively discover .gitignore files in subdirectories. We skip
399 // the root one (already loaded) and directories the accumulated
400 // rule set already excludes (avoids descending into node_modules
401 // just to find an irrelevant .gitignore).
402 try {
403 auto it = fs::recursive_directory_iterator(
404 canonical_root,
405 fs::directory_options::skip_permission_denied);
406 for (auto& entry : it) {
407 if (!entry.is_regular_file()) { continue; }
408 if (entry.path().filename() != ".gitignore") { continue; }
409 if (entry.path() == root_gi) { continue; }
410 auto rel_dir = fs::relative(entry.path().parent_path(),
411 canonical_root);
412 load_file(entry.path(), to_slash(rel_dir));
413 }
414 } catch (const std::exception& e) {
415 logger->warn("Recursive gitignore scan aborted: {}", e.what());
416 }
417}
418
424bool IgnoreMatcher::is_ignored(const std::string& rel_path,
425 bool is_dir) const {
426 bool ignored = false;
427 for (const auto& rule : rules_) {
428 bool match_under = std::regex_match(rel_path, rule.re_under);
429 bool match_exact = std::regex_match(rel_path, rule.re_exact);
430 // For dir_only rules, an exact match only counts when the path
431 // is itself a directory (a regular file with the same name as
432 // a `dir/` pattern is NOT excluded). re_under always counts —
433 // any descendant inherits the parent's exclusion.
434 bool exact_counts = match_exact
435 && (!rule.dir_only || is_dir);
436 if (match_under || exact_counts) {
437 ignored = !rule.negate;
438 }
439 }
440 return ignored;
441}
442
443} // namespace entropic
void load(const std::filesystem::path &root)
Load gitignore + explorerignore from a workspace root.
void add_pattern(const std::string &pattern, const std::filesystem::path &base={})
Add a single pattern programmatically (test surface).
bool is_ignored(const std::string &rel_path, bool is_dir) const
Test whether a path is ignored.
Path-relative ignore matching honoring .gitignore + .explorerignore.
spdlog initialization and logger access.
ENTROPIC_EXPORT std::shared_ptr< spdlog::logger > get(const std::string &name)
Get or create a named logger.
Definition logging.cpp:211
Activate model on GPU (WARM → ACTIVE).