18#include <nlohmann/json.hpp>
27namespace fs = std::filesystem;
28using json = nlohmann::json;
46 logger->info(
"Tracked read: {}", path);
58 const std::string& path,
59 size_t current_hash)
const {
60 auto it = reads_.find(path);
61 if (it == reads_.end()) {
64 return it->second == current_hash;
75 return reads_.count(path) > 0;
87const std::vector<std::string> SKIP_DIRS = {
88 ".git",
"node_modules",
"__pycache__",
".venv"
98bool should_skip_dir(
const std::string& name) {
99 for (
const auto& skip : SKIP_DIRS) {
115std::string read_file_contents(
const fs::path& path) {
116 std::ifstream in(path, std::ios::binary);
118 throw std::runtime_error(
119 "Cannot open file: " + path.string());
121 std::ostringstream ss;
133void write_file_contents(
const fs::path& path,
134 const std::string& content) {
135 fs::create_directories(path.parent_path());
136 std::ofstream out(path, std::ios::binary | std::ios::trunc);
137 if (!out.is_open()) {
138 throw std::runtime_error(
139 "Cannot write file: " + path.string());
151size_t hash_content(
const std::string& s) {
152 return std::hash<std::string>{}(s);
163std::string make_error(
const std::string& code,
164 const std::string& message) {
167 j[
"message"] = message;
179std::string build_read_result(
const std::string& path,
180 const std::string& content) {
182 result[
"path"] = path;
184 std::istringstream stream(content);
186 json lines = json::object();
189 while (std::getline(stream, line)) {
191 lines[std::to_string(num)] = line;
194 result[
"total"] = num;
195 result[
"lines"] = std::move(lines);
196 return result.dump();
216bool glob_match(
const std::string& filename,
217 const std::string& pattern) {
218 std::string regex_str;
219 regex_str.reserve(pattern.size() * 2);
221 for (
char ch : pattern) {
224 }
else if (ch ==
'?') {
226 }
else if (ch ==
'.') {
234 std::regex re(regex_str, std::regex::icase);
235 return std::regex_match(filename, re);
236 }
catch (
const std::regex_error& e) {
238 "glob_match: malformed pattern '{}' → {} — treated as "
239 "non-match", pattern, e.what());
272std::vector<std::string> split_brace_alternatives(
273 const std::string& body) {
274 std::vector<std::string> out;
276 for (
char c : body) {
278 out.push_back(std::move(current));
284 out.push_back(std::move(current));
293std::vector<std::string> multiply_alternatives(
294 const std::vector<std::string>& bases,
295 const std::vector<std::string>& alternatives) {
296 std::vector<std::string> next;
297 next.reserve(bases.size() * alternatives.size());
298 for (
const auto& base : bases) {
299 for (
const auto& alt : alternatives) {
300 next.push_back(base + alt);
317std::vector<std::string> expand_braces(
const std::string& pattern) {
318 std::vector<std::string> out{
""};
320 while (i < pattern.size()) {
322 auto close = (c ==
'{')
323 ? pattern.find(
'}', i + 1)
325 bool is_group = (c ==
'{') && (close != std::string::npos);
327 for (
auto& s : out) { s += c; }
331 auto body = pattern.substr(i + 1, close - i - 1);
332 out = multiply_alternatives(
333 out, split_brace_alternatives(body));
347std::string check_read_before_write(
348 const FileAccessTracker& tracker,
349 const std::string& path) {
350 if (fs::exists(path) && !tracker.was_read(path)) {
351 logger->warn(
"Read-before-write violation: {}", path);
352 return make_error(
"read_before_write",
353 "File must be read before writing: " + path);
371int count_occurrences(
const std::string& content,
372 const std::string& needle) {
375 while ((pos = content.find(needle, pos)) != std::string::npos) {
377 pos += needle.size();
393std::optional<std::string>
394apply_str_replace(
const std::string& content,
const std::string& old_str,
const std::string& new_str,
bool replace_all, std::string& error_type) {
396 int occurrences = count_occurrences(content, old_str);
397 if (occurrences == 0) {
398 error_type =
"not_found";
401 if (!replace_all && occurrences > 1) {
402 error_type =
"multiple_matches";
406 std::string result = content;
407 auto pos = result.find(old_str);
408 while (pos != std::string::npos) {
409 result.replace(pos, old_str.size(), new_str);
410 if (!replace_all) {
break; }
411 pos = result.find(old_str, pos + new_str.size());
425std::string apply_insert(
const std::string& content,
427 const std::string& new_str) {
430 std::istringstream stream(content);
431 std::ostringstream out;
435 while (std::getline(stream, line)) {
437 if (current == line_num) {
438 out << new_str <<
'\n';
443 if (line_num > current) {
444 out << new_str <<
'\n';
458bool any_glob_match(
const std::string& filename,
459 const std::vector<std::string>& patterns) {
460 for (
const auto& p : patterns) {
461 if (glob_match(filename, p)) {
return true; }
491enum class EntryAction {
509EntryAction classify_glob_entry(
510 const fs::directory_entry& entry,
511 const fs::path& root,
512 const std::vector<std::string>& patterns,
513 const IgnoreMatcher* ignore) {
514 bool is_dir = entry.is_directory();
515 EntryAction result = EntryAction::kSkip;
516 bool hardcoded_skip = is_dir
517 && should_skip_dir(entry.path().filename().string());
518 bool ignore_hit =
false;
519 if (!hardcoded_skip && ignore !=
nullptr) {
520 auto rel = fs::relative(entry.path(), root)
522 ignore_hit = !rel.empty()
523 && ignore->is_ignored(rel, is_dir);
525 if (hardcoded_skip || (ignore_hit && is_dir)) {
526 result = EntryAction::kSkipPrune;
527 }
else if (ignore_hit) {
528 result = EntryAction::kSkip;
529 }
else if (entry.is_regular_file()
531 entry.path().filename().string(), patterns)) {
532 result = EntryAction::kTake;
549std::vector<std::string> collect_glob_matches(
550 const fs::path& root,
551 const std::string& pattern,
553 const IgnoreMatcher* ignore =
nullptr) {
555 auto patterns = expand_braces(pattern);
556 std::vector<std::string> matches;
557 auto it = fs::recursive_directory_iterator(
558 root, fs::directory_options::skip_permission_denied);
560 for (
auto& entry : it) {
561 if (
static_cast<int>(matches.size()) >= max_results) {
564 auto action = classify_glob_entry(entry, root, patterns,
566 if (action == EntryAction::kSkipPrune) {
567 it.disable_recursion_pending();
568 }
else if (action == EntryAction::kTake) {
569 matches.push_back(entry.path().string());
584void grep_file(
const fs::path& path,
585 const std::regex& re,
586 std::vector<json>& matches,
588 std::ifstream in(path);
595 while (std::getline(in, line)) {
597 if (
static_cast<int>(matches.size()) >= limit) {
600 if (!std::regex_search(line, re)) {
604 m[
"path"] = path.string();
605 m[
"line"] = line_num;
607 matches.push_back(std::move(m));
618json entry_to_json(
const fs::directory_entry& entry) {
620 j[
"name"] = entry.path().filename().string();
622 if (entry.is_directory()) {
623 j[
"type"] =
"directory";
627 j[
"size"] = entry.is_regular_file()
628 ?
static_cast<int64_t
>(entry.file_size())
643std::vector<json> collect_entries(
const fs::path& dir,
646 std::vector<json> entries;
649 for (
auto& entry : fs::directory_iterator(dir)) {
650 entries.push_back(entry_to_json(entry));
655 auto it = fs::recursive_directory_iterator(
656 dir, fs::directory_options::skip_permission_denied);
657 for (
auto& entry : it) {
658 if (it.depth() > max_depth) {
659 it.disable_recursion_pending();
662 entries.push_back(entry_to_json(entry));
676std::string do_str_replace(
const json& args,
677 const std::string& content,
679 auto old_str = args.at(
"old_string").get<std::string>();
680 auto new_str = args.at(
"new_string").get<std::string>();
681 bool replace_all = args.value(
"replace_all",
false);
683 std::string error_type;
684 auto result = apply_str_replace(
685 content, old_str, new_str, replace_all, error_type);
686 if (!result.has_value()) {
687 auto msg = (error_type ==
"multiple_matches")
688 ?
"old_string found multiple times — use replace_all"
689 :
"old_string not found in file";
690 return make_error(error_type, msg);
692 out = result.value();
705std::string do_insert(
const json& args,
706 const std::string& content,
708 auto line_num = args.at(
"insert_line").get<
int>();
709 auto new_str = args.at(
"new_string").get<std::string>();
710 out = apply_insert(content, line_num, new_str);
723std::string apply_edit(
const json& args,
724 const std::filesystem::path& resolved,
725 const std::string& path_str) {
726 auto content = read_file_contents(resolved);
730 if (args.contains(
"old_string")) {
731 err = do_str_replace(args, content, edited);
732 }
else if (args.contains(
"insert_line")) {
733 err = do_insert(args, content, edited);
735 return make_error(
"invalid_args",
736 "edit_file requires old_string or insert_line");
743 write_file_contents(resolved, edited);
744 logger->info(
"Edited file: {}", path_str);
747 j[
"path"] = path_str;
748 j[
"message"] =
"Edit applied successfully";
771 const std::string& data_dir)
773 "read_file",
"filesystem",
774 data_dir +
"/tools")),
804 const std::string& args_json)
const override {
805 auto args = json::parse(args_json);
806 return "file:" + args.at(
"path").get<std::string>();
830 const fs::path& resolved,
831 const std::string& path_str) {
833 if (!fs::exists(resolved)) {
834 err = make_error(
"not_found",
835 "File not found: " + path_str);
837 auto rel = fs::relative(resolved, server.
root_dir())
839 bool ignored = !rel.empty()
841 int size =
static_cast<int>(fs::file_size(resolved));
844 err = make_error(
"ignored",
845 "Path '" + rel +
"' is excluded by .gitignore or "
847 }
else if (limit > 0 && size > limit) {
848 err = make_error(
"size_exceeded",
849 "File " + path_str +
" is " +
850 std::to_string(size) +
" bytes (limit: " +
851 std::to_string(limit) +
")");
867 auto args = json::parse(args_json);
868 auto requested = args.at(
"path").get<std::string>();
870 auto path_str = resolved.string();
873 if (!err.empty()) {
return {err, {}}; }
875 auto content = read_file_contents(resolved);
877 hash_content(content));
878 auto size =
static_cast<int>(fs::file_size(resolved));
879 logger->info(
"Read file: {} ({} bytes)", path_str, size);
880 return {build_read_result(path_str, content), {}};
900 const std::string& data_dir)
902 "write_file",
"filesystem",
903 data_dir +
"/tools")),
927 const std::string& args_json) {
929 auto args = json::parse(args_json);
930 auto requested = args.at(
"path").get<std::string>();
931 auto content = args.at(
"content").get<std::string>();
933 auto path_str = resolved.string();
935 auto violation = check_read_before_write(
937 if (!violation.empty()) {
938 return {violation, {}};
941 write_file_contents(resolved, content);
942 logger->info(
"Wrote file: {} ({} bytes)",
943 path_str, content.size());
946 result[
"path"] = path_str;
947 result[
"bytes_written"] = content.size();
948 result[
"message"] =
"File written successfully";
949 return {result.dump(), {}};
969 const std::string& data_dir)
971 "edit_file",
"filesystem",
972 data_dir +
"/tools")),
996 auto args = json::parse(args_json);
997 auto requested = args.at(
"path").get<std::string>();
999 auto path_str = resolved.string();
1001 auto violation = check_read_before_write(
1003 if (!violation.empty()) {
1004 return {violation, {}};
1007 auto result = apply_edit(args, resolved, path_str);
1031 "glob",
"filesystem",
1032 data_dir +
"/tools")),
1070 auto args = json::parse(args_json);
1071 auto pattern = args.at(
"pattern").get<std::string>();
1072 constexpr int MAX_GLOB_RESULTS = 500;
1078 auto matches = collect_glob_matches(
1079 server_.
root_dir(), pattern, MAX_GLOB_RESULTS,
1082 logger->info(
"Glob '{}': {} matches (after ignore filtering)",
1083 pattern, matches.size());
1084 json result = matches;
1085 return {result.dump(), {}};
1105 "grep",
"filesystem",
1106 data_dir +
"/tools")),
1145 return std::regex(pattern);
1146 }
catch (
const std::regex_error& e) {
1147 err = make_error(
"invalid_regex", e.what());
1148 return std::regex(
"(?!)");
1173 const fs::path& root,
const std::vector<std::string>& file_patterns,
1175 constexpr int MAX_GREP_RESULTS = 100;
1176 std::vector<json> matches;
1177 auto it = fs::recursive_directory_iterator(
1178 root, fs::directory_options::skip_permission_denied);
1179 for (
auto& entry : it) {
1180 if (
static_cast<int>(matches.size()) >= MAX_GREP_RESULTS) {
1183 auto action = classify_glob_entry(entry, root, file_patterns,
1185 if (action == EntryAction::kSkipPrune) {
1186 it.disable_recursion_pending();
1187 }
else if (action == EntryAction::kTake) {
1188 grep_file(entry.path(), re, matches, MAX_GREP_RESULTS);
1200 auto args = json::parse(args_json);
1201 auto pattern = args.at(
"pattern").get<std::string>();
1202 auto file_glob = args.value(
"glob", std::string(
"*"));
1206 if (!err.empty()) {
return {err, {}}; }
1208 auto file_patterns = expand_braces(file_glob);
1212 logger->info(
"Grep '{}': {} matches (after ignore filtering)",
1213 pattern, matches.size());
1214 json result = matches;
1215 return {result.dump(), {}};
1235 const std::string& data_dir)
1237 "list_directory",
"filesystem",
1238 data_dir +
"/tools")),
1272 const std::string& args_json) {
1274 auto args = json::parse(args_json);
1275 auto requested = args.at(
"path").get<std::string>();
1276 auto recursive = args.value(
"recursive",
false);
1277 auto max_depth = args.value(
"max_depth", 3);
1280 if (!fs::is_directory(resolved)) {
1281 return {make_error(
"not_directory",
1282 "Not a directory: " + resolved.string()), {}};
1285 auto entries = collect_entries(
1286 resolved, recursive, max_depth);
1288 logger->info(
"Listed {}: {} entries",
1289 resolved.string(), entries.size());
1290 json result = entries;
1291 return {result.dump(), {}};
1305 int model_context_bytes) {
1309 if (model_context_bytes <= 0) {
1315 return static_cast<int>(
1333 const fs::path& root_dir,
1335 const std::string& data_dir,
1336 int model_context_bytes)
1338 root_dir_(fs::weakly_canonical(root_dir)),
1341 config, model_context_bytes)) {
1343 create_fs_tools(data_dir);
1344 register_fs_tools();
1349 ignore_.
load(root_dir_);
1351 logger->info(
"FilesystemServer initialized: root={}, "
1352 "max_read_bytes={}, ignore_rules={}",
1364void FilesystemServer::create_fs_tools(
const std::string& data_dir) {
1365 read_file_ = std::make_unique<ReadFileTool>(*
this, data_dir);
1366 write_file_ = std::make_unique<WriteFileTool>(*
this, data_dir);
1367 edit_file_ = std::make_unique<EditFileTool>(*
this, data_dir);
1368 glob_ = std::make_unique<GlobTool>(*
this, data_dir);
1369 grep_ = std::make_unique<GrepTool>(*
this, data_dir);
1370 list_dir_ = std::make_unique<ListDirectoryTool>(*
this, data_dir);
1378void FilesystemServer::register_fs_tools() {
1402 const std::string& tool_name)
const {
1403 return tool_name ==
"read_file";
1418 auto canonical = fs::weakly_canonical(path);
1419 if (!fs::is_directory(canonical)) {
1420 logger->error(
"set_working_dir: not a directory: {}",
1424 root_dir_ = canonical;
1426 ignore_.
load(root_dir_);
1427 logger->info(
"Working directory changed to: {} (ignore_rules={})",
1479 return max_read_bytes_;
1499 const std::string& requested)
const {
1501 fs::path req_path(requested);
1502 fs::path resolved = req_path.is_absolute()
1503 ? fs::weakly_canonical(req_path)
1504 : fs::weakly_canonical(root_dir_ / req_path);
1506 fs::path rel = resolved.lexically_relative(root_dir_);
1507 bool under_root = !rel.empty()
1508 && *rel.begin() != fs::path(
"..")
1509 && rel != fs::path(
"..");
1512 logger->error(
"Path escape blocked: {} (root: {})",
1513 resolved.string(), root_dir_.string());
1514 throw std::runtime_error(
1515 "Path escapes project root: " + resolved.string());
Tracks file read state for read-before-write enforcement.
bool was_read(const std::string &path) const
Check if a file was ever read.
void record_read(const std::string &path, size_t hash)
Record that a file was read.
bool was_read_unchanged(const std::string &path, size_t current_hash) const
Check if a file was read and content unchanged.
Filesystem MCP server with read-before-write enforcement.
int max_read_bytes() const
Get max read bytes (size gate).
const IgnoreMatcher & ignore() const
Get the ignore matcher (#15, v2.1.4).
bool skip_duplicate_check(const std::string &tool_name) const override
read_file must always execute (updates FileAccessTracker).
bool set_working_dir(const std::string &path) override
Set working directory (changes root_dir).
~FilesystemServer() override
Destructor (default, unique_ptr cleanup).
std::filesystem::path resolve_path(const std::string &requested) const
Resolve and validate a path against root.
const FilesystemConfig & config() const
Get the filesystem config.
FileAccessTracker & tracker()
Get the file access tracker.
FilesystemServer(const std::filesystem::path &root_dir, const FilesystemConfig &config, const std::string &data_dir, int model_context_bytes=0)
Construct with root directory, config, and data dir.
const std::filesystem::path & root_dir() const
Get the root directory.
gitignore-style path matcher (#15, v2.1.4).
void load(const std::filesystem::path &root)
Load gitignore + explorerignore from a workspace root.
bool is_ignored(const std::string &rel_path, bool is_dir) const
Test whether a path is ignored.
std::size_t rule_count() const
Number of compiled rules (test surface).
Concrete base class for MCP servers (80% logic).
void register_tool(ToolBase *tool)
Register a tool with this server.
Filesystem MCP server — read/write/edit/glob/grep/list_directory.
spdlog initialization and logger access.
ENTROPIC_EXPORT std::shared_ptr< spdlog::logger > get(const std::string &name)
Get or create a named logger.
Activate model on GPU (WARM → ACTIVE).
ToolDefinition load_tool_definition(const std::string &tool_name, const std::string &server_prefix, const std::string &data_dir)
Load a tool definition from a JSON file.
static int compute_max_read_bytes(const FilesystemConfig &config, int model_context_bytes)
Compute max read bytes from config and model context.
std::regex compile_grep_or_error(const std::string &pattern, std::string &err)
Compile a regex or return a structured tool error.
MCPAccessLevel
MCP tool access level for per-identity authorization.
@ READ
Read-only operations (e.g., read_file, list_directory)
static std::vector< json > grep_search(const fs::path &root, const std::vector< std::string > &file_patterns, const std::regex &re, const IgnoreMatcher &ignore)
Execute grep: brace-expand the file glob, compile the content regex (error-safe), iterate the tree ap...
std::string check_read_gates(FilesystemServer &server, const fs::path &resolved, const std::string &path_str)
Execute read_file: resolve, size-check, read, hash, track.
MCPServerBase concrete base class + ServerResponse.
Filesystem MCP server configuration.
bool allow_outside_root
Allow file ops outside workspace root.
std::optional< int > max_read_bytes
Max file read size (nullopt = derive from context)
float max_read_context_pct
Max context % for single file read.
Structured result from tool execution.
std::string result
Human-readable result.