diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..bcd0fbc --- /dev/null +++ b/.gitattributes @@ -0,0 +1,10 @@ +* text=auto eol=lf + +*.png binary +*.jpg binary +*.jpeg binary +*.gif binary +*.pdf binary +*.so binary +*.dll binary +*.exe binary diff --git a/.gitignore b/.gitignore index 1722478..a376bd0 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,8 @@ **/.idea/* .cache/ bench/ -experiment/ \ No newline at end of file +experiment/ +**/results +**.pyc +**/_pychache__ +.artifacts/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 0b98cda..8851525 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,6 +6,19 @@ project(dsr include(GNUInstallDirs) +# -DTSAN=ON enables ThreadSanitizer across the whole build (library + tests/benchmarks). +# Must be applied before any add_subdirectory so every TU is instrumented. +# Incompatible with SANITIZER (ASan/UBSan) — enforce that here. +option(TSAN "Enable ThreadSanitizer" OFF) +if (TSAN) + if (SANITIZER) + message(FATAL_ERROR "TSAN and SANITIZER (ASan+UBSan) are mutually exclusive") + endif() + message(STATUS "ThreadSanitizer enabled") + add_compile_options(-fsanitize=thread -fno-omit-frame-pointer) + add_link_options(-fsanitize=thread) +endif() + add_definitions(-I/usr/include/x86_64-linux-gnu/qt6/QtOpenGLWidgets/) include_directories(/home/robocomp/robocomp/classes) @@ -27,3 +40,9 @@ if (WITH_TESTS) add_subdirectory(tests) endif() + +if (WITH_BENCHMARKS) + +add_subdirectory(benchmarks) + +endif() diff --git a/api/dsr_api.cpp b/api/dsr_api.cpp index 42207a9..a97a144 100644 --- a/api/dsr_api.cpp +++ b/api/dsr_api.cpp @@ -75,14 +75,14 @@ DSRGraph::DSRGraph(GraphSettings settings) : // RTPS Initialize publisher with general topic - auto [res, pub, writer] = dsrpub_node.init(participant_handle, dsrparticipant.getNodeTopic()); - auto [res2, pub2, writer2] = dsrpub_node_attrs.init(participant_handle, dsrparticipant.getAttNodeTopic()); + auto [res, pub, writer] = dsrpub_node.init(participant_handle, dsrparticipant.getNodeTopic(), dsrparticipant.get_domain_id()); + auto [res2, pub2, writer2] = dsrpub_node_attrs.init(participant_handle, dsrparticipant.getAttNodeTopic(), dsrparticipant.get_domain_id()); - auto [res3, pub3, writer3] = dsrpub_edge.init(participant_handle, dsrparticipant.getEdgeTopic()); - auto [res4, pub4, writer4] = dsrpub_edge_attrs.init(participant_handle, dsrparticipant.getAttEdgeTopic()); + auto [res3, pub3, writer3] = dsrpub_edge.init(participant_handle, dsrparticipant.getEdgeTopic(), dsrparticipant.get_domain_id()); + auto [res4, pub4, writer4] = dsrpub_edge_attrs.init(participant_handle, dsrparticipant.getAttEdgeTopic(), dsrparticipant.get_domain_id()); - auto [res5, pub5, writer5] = dsrpub_graph_request.init(participant_handle, dsrparticipant.getGraphRequestTopic()); - auto [res6, pub6, writer6] = dsrpub_request_answer.init(participant_handle, dsrparticipant.getGraphTopic()); + auto [res5, pub5, writer5] = dsrpub_graph_request.init(participant_handle, dsrparticipant.getGraphRequestTopic(), dsrparticipant.get_domain_id()); + auto [res6, pub6, writer6] = dsrpub_request_answer.init(participant_handle, dsrparticipant.getGraphTopic(), dsrparticipant.get_domain_id()); dsrparticipant.add_publisher(dsrparticipant.getNodeTopic()->get_name(), {pub, writer}); dsrparticipant.add_publisher(dsrparticipant.getAttNodeTopic()->get_name(), {pub2, writer2}); @@ -272,21 +272,18 @@ std::tuple>> DSRGraph::updat if (!deleted.contains(node.id())) { - if (nodes.contains(node.id()) and !nodes.at(node.id()).empty()) + auto nit = nodes.find(node.id()); + if (nit != nodes.end() && !nit->second.empty()) { - std::vector atts_deltas; - auto &iter = nodes.at(node.id()).read_reg().attrs(); + auto &iter = nit->second.read_reg().attrs(); //New attributes and updates. for (auto &[k, att]: node.attrs()) { - if (!iter.contains(k)) { - iter.emplace(k, mvreg()); - } - if (iter.at(k).empty() or att.read_reg() != iter.at(k).read_reg()) { - auto delta = iter.at(k).write(std::move(att.read_reg())); + auto &attr_reg = iter.try_emplace(k, mvreg()).first->second; + if (attr_reg.empty() or att.read_reg() != attr_reg.read_reg()) { + auto delta = attr_reg.write(std::move(att.read_reg())); atts_deltas.emplace_back( CRDTNodeAttr_to_IDL(agent_id, node.id(), node.id(), k, delta)); - } } //Remove old attributes. @@ -296,7 +293,7 @@ std::tuple>> DSRGraph::updat if (ignored_attributes.contains(k)) { it_a = iter.erase(it_a); } else if (!node.attrs().contains(k)) { - auto delta = iter.at(k).reset(); + auto delta = it_a->second.reset(); atts_deltas.emplace_back( CRDTNodeAttr_to_IDL(node.agent_id(), node.id(), node.id(), k, delta)); it_a = iter.erase(it_a); @@ -378,27 +375,23 @@ DSRGraph::delete_node_(uint64_t id) { // Get remove delta. auto delta = nodes[id].reset(); IDL::MvregNode delta_remove = CRDTNode_to_IDL(agent_id, id, delta); - //search and remove edges. - //For each node check if there is an edge to remove. - //TODO: use to_edges. - for (auto &[k, v] : nodes) + // Search and remove incoming edges using to_edges cache: O(k) instead of O(n). { - std::shared_lock lck_cache(_mutex_cache_maps); - if (!edges.contains({k, id})) continue; - // Remove all edges between them - auto &visited_node = v.read_reg(); - auto keys = deleted_edges.size(); - for (const auto &key : edges.at({k, id})) + decltype(to_edges)::mapped_type incoming; { - deleted_edges.emplace_back(visited_node.fano().at({id, key}).read_reg()); - auto delta_fano = visited_node.fano().at({id, key}).reset(); - delta_vec.emplace_back(CRDTEdge_to_IDL(agent_id, k, id, key, delta_fano)); - visited_node.fano().erase({id, key}); + std::shared_lock lck_cache(_mutex_cache_maps); + if (to_edges.contains(id)) + incoming = to_edges.at(id); } - lck_cache.unlock(); - //Remove all from cache - for (auto i = keys; i < deleted_edges.size(); i++) { - update_maps_edge_delete(k, id, deleted_edges[i].type()); + for (const auto &[from, type] : incoming) + { + if (!nodes.contains(from)) continue; + auto &visited_node = nodes.at(from).read_reg(); + deleted_edges.emplace_back(visited_node.fano().at({id, type}).read_reg()); + auto delta_fano = visited_node.fano().at({id, type}).reset(); + delta_vec.emplace_back(CRDTEdge_to_IDL(agent_id, from, id, type, delta_fano)); + visited_node.fano().erase({id, type}); + update_maps_edge_delete(from, id, type); } } update_maps_node_delete(id, node.value()); @@ -495,6 +488,7 @@ std::vector DSRGraph::get_nodes_by_type(const std::string &type) std::vector nodes_; if (nodeType.contains(type)) { + nodes_.reserve(nodeType.at(type).size()); for (auto &id: nodeType.at(type)) { std::optional n = get_(id); @@ -527,6 +521,12 @@ std::vector DSRGraph::get_nodes_by_types(const std::vector lck(_mutex_cache_maps); std::vector nodes_; + { + size_t total = 0; + for (const auto &type : types) + if (nodeType.contains(type)) total += nodeType.at(type).size(); + nodes_.reserve(total); + } for (auto &type : types) { if (nodeType.contains(type)) @@ -547,17 +547,17 @@ std::vector DSRGraph::get_nodes_by_types(const std::vector DSRGraph::get_edge_(uint64_t from, uint64_t to, const std::string &key) { - //std::shared_lock lock(_mutex); - if (nodes.contains(from) && nodes.contains(to)) - { - auto n = get_(from); - if (n.has_value()) { - auto edge = n.value().fano().find({to, key}); - if (edge != n.value().fano().end()) { - return edge->second.read_reg(); - } - } + auto from_it = nodes.find(from); + if (from_it == nodes.end() || from_it->second.empty() || !nodes.contains(to)) { + return {}; + } + + auto& fano = from_it->second.read_reg().fano(); + auto edge = fano.find({to, key}); + if (edge != fano.end() && !edge->second.empty()) { + return edge->second.read_reg(); } + return {}; } @@ -587,18 +587,17 @@ std::optional DSRGraph::get_edge(const Node &n, const std::string &to, con std::optional id_to = get_id_from_name(to); if (id_to.has_value()) { - return (n.fano().contains({id_to.value(), key})) ? - std::make_optional(n.fano().find({id_to.value(), key})->second) : - std::nullopt; + auto it = n.fano().find({id_to.value(), key}); + if (it != n.fano().end()) return it->second; } return {}; } std::optional DSRGraph::get_edge(const Node &n, uint64_t to, const std::string &key) { - return (n.fano().contains({to, key})) ? - std::make_optional(n.fano().find({to, key})->second) : - std::nullopt; + auto it = n.fano().find({to, key}); + if (it != n.fano().end()) return it->second; + return {}; } @@ -613,43 +612,33 @@ DSRGraph::insert_or_assign_edge_(CRDTEdge &&attrs, uint64_t from, uint64_t to) { auto &node = nodes.at(from).read_reg(); //check if we are creating an edge or we are updating it. - //Update - if (node.fano().contains({to, attrs.type()})) + auto fano_it = node.fano().find({to, attrs.type()}); + if (fano_it != node.fano().end()) { - auto iter = nodes.at(from).read_reg().fano().find({attrs.to(), attrs.type()}); - auto end = nodes.at(from).read_reg().fano().end(); - if (iter != end) { - std::vector atts_deltas; - auto &iter_edge = iter->second.read_reg().attrs(); - for (auto &[k, att]: attrs.attrs()) { - //comparar igualdad o inexistencia - if (!iter_edge.contains(k)) { - iter_edge.emplace(k, mvreg()); - } - if (iter_edge.at(k).empty() or - att.read_reg() != - iter_edge.at(k).read_reg()) { - auto delta = iter_edge.at(k).write(std::move(att.read_reg())); - atts_deltas.emplace_back( - CRDTEdgeAttr_to_IDL(agent_id, from, from, to, attrs.type(), k, delta)); - - } + //Update + std::vector atts_deltas; + auto &iter_edge = fano_it->second.read_reg().attrs(); + for (auto &[k, att]: attrs.attrs()) { + auto &attr_reg = iter_edge.try_emplace(k, mvreg()).first->second; + if (attr_reg.empty() or att.read_reg() != attr_reg.read_reg()) { + auto delta = attr_reg.write(std::move(att.read_reg())); + atts_deltas.emplace_back( + CRDTEdgeAttr_to_IDL(agent_id, from, from, to, attrs.type(), k, delta)); } - auto it = iter_edge.begin(); - while (it != iter_edge.end()) { - if (!attrs.attrs().contains(it->first)) { - std::string att = it->first; - auto delta = iter_edge.at(it->first).reset(); - it = iter_edge.erase(it); - atts_deltas.emplace_back( - CRDTEdgeAttr_to_IDL(agent_id, from, from, to, attrs.type(), att, delta)); - - } else { - ++it; - } + } + auto it = iter_edge.begin(); + while (it != iter_edge.end()) { + if (!attrs.attrs().contains(it->first)) { + std::string att = it->first; + auto delta = it->second.reset(); + it = iter_edge.erase(it); + atts_deltas.emplace_back( + CRDTEdgeAttr_to_IDL(agent_id, from, from, to, attrs.type(), att, delta)); + } else { + ++it; } - return {true, {}, std::move(atts_deltas)}; } + return {true, {}, std::move(atts_deltas)}; } else { // Insert //node.fano().insert({{to, attrs.type()}, mvreg()}); @@ -799,10 +788,18 @@ std::vector DSRGraph::get_edges_by_type(const std::string &type) std::shared_lock lock_cache(_mutex_cache_maps); std::vector edges_; if (edgeType.contains(type)) { + edges_.reserve(edgeType.at(type).size()); for (auto &[from, to] : edgeType.at(type)) { - auto n = get_edge_(from, to, type); - if (n.has_value()) - edges_.emplace_back(std::move(n.value())); + auto node_it = nodes.find(from); + if (node_it == nodes.end() || node_it->second.empty()) { + continue; + } + + auto &fano = node_it->second.read_reg().fano(); + auto edge_it = fano.find({to, type}); + if (edge_it != fano.end()) { + edges_.emplace_back(edge_it->second.read_reg()); + } } } return edges_; @@ -814,6 +811,7 @@ std::vector DSRGraph::get_edges_to_id(uint64_t id) std::shared_lock lock_cache(_mutex_cache_maps); std::vector edges_; if (to_edges.contains(id)) { + edges_.reserve(to_edges.at(id).size()); for (const auto &[k, v] : to_edges.at(id)) { auto n = get_edge_(k, id, v); if (n.has_value()) @@ -826,12 +824,16 @@ std::vector DSRGraph::get_edges_to_id(uint64_t id) std::optional, DSR::Edge>> DSRGraph::get_edges(uint64_t id) { std::shared_lock lock(_mutex); - std::optional n = get_node(id); - if (n.has_value()) - { - return n->fano(); + auto node_it = nodes.find(id); + if (node_it == nodes.end() || node_it->second.empty()) { + return std::nullopt; } - return std::nullopt; + + std::map, DSR::Edge> edges_; + for (const auto &[key, edge_reg] : node_it->second.read_reg().fano()) { + edges_.emplace(key, DSR::Edge(edge_reg.read_reg())); + } + return edges_; } @@ -952,7 +954,7 @@ inline void DSRGraph::update_maps_edge_delete(uint64_t from, uint64_t to, const std::unique_lock lck(_mutex_cache_maps); if (const auto tuple = std::pair{from, to}; edges.contains(tuple)) { edges.at(tuple).erase(key); - edges.erase({from, to}); + if (edges.at(tuple).empty()) edges.erase(tuple); } if (to_edges.contains(to)) { @@ -1102,11 +1104,20 @@ void DSRGraph::join_delta_node(IDL::MvregNode &&mvreg) }; std::optional,hash_tuple>> cache_map_to_edges = {}; + // Snapshot the data needed for signal emission while the lock is held. + // nodes.at(id) must NOT be accessed after the lock is released: a concurrent + // insert_node_/update_node call on the same id runs nodes[id].write() which + // calls dk.rmv() (clears dk.ds) followed by dk.add(), leaving a window where + // read_reg()'s assert(dk.ds.size() >= 1) would fire. + std::string node_type_snapshot; + std::vector> from_edges_snapshot; { std::unique_lock lock(_mutex); if (!deleted.contains(id)) { joined = true; - maybe_deleted_node = (nodes[id].empty()) ? std::nullopt : std::make_optional(nodes.at(id).read_reg()); + if (auto it = nodes.find(id); it != nodes.end() && !it->second.empty()) { + maybe_deleted_node = it->second.read_reg(); + } nodes[id].join(std::move(crdt_delta)); if (nodes.at(id).empty() or d_empty) { nodes.erase(id); @@ -1115,8 +1126,14 @@ void DSRGraph::join_delta_node(IDL::MvregNode &&mvreg) delete_unprocessed_deltas(); } else { signal = true; - update_maps_node_insert(id, nodes.at(id).read_reg()); + const auto& reg = nodes.at(id).read_reg(); + update_maps_node_insert(id, reg); consume_unprocessed_deltas(); + // Snapshot type and outgoing edges before the lock is released. + node_type_snapshot = reg.type(); + for (const auto &[k, v] : reg.fano()) { + from_edges_snapshot.emplace_back(k.first, k.second); + } } } else { delete_unprocessed_deltas(); @@ -1125,11 +1142,11 @@ void DSRGraph::join_delta_node(IDL::MvregNode &&mvreg) if (joined) { if (signal) { - DSR_LOG_DEBUG("[JOIN_NODE] node inserted/updated:", id, nodes.at(id).read_reg().type()); - emitter.update_node_signal(id, nodes.at(id).read_reg().type(), SignalInfo{ mvreg.agent_id() }); - for (const auto &[k, v] : nodes.at(id).read_reg().fano()) { - DSR_LOG_DEBUG("[JOIN_NODE] add edge FROM:", id, k.first, k.second); - emitter.update_edge_signal(id, k.first, k.second, SignalInfo{ mvreg.agent_id() }); + DSR_LOG_DEBUG("[JOIN_NODE] node inserted/updated:", id, node_type_snapshot); + emitter.update_node_signal(id, node_type_snapshot, SignalInfo{ mvreg.agent_id() }); + for (const auto &[to_id, edge_type] : from_edges_snapshot) { + DSR_LOG_DEBUG("[JOIN_NODE] add edge FROM:", id, to_id, edge_type); + emitter.update_edge_signal(id, to_id, edge_type, SignalInfo{ mvreg.agent_id() }); } for (const auto &[k, v]: map_new_to_edges) @@ -1452,7 +1469,10 @@ std::optional DSRGraph::join_delta_edge_attr(IDL::MvregEdgeAttr &&m void DSRGraph::join_full_graph(IDL::OrMap &&full_graph) { - std::vector>> updates; + // 5th element: post-join node snapshot captured inside the lock, used for + // signal emission after the lock is released to avoid racing with + // insert_node_/update_node (same pattern as join_delta_node). + std::vector, std::optional>> updates; uint64_t id{0}, timestamp{0}; uint32_t agent_id_ch{0}; @@ -1539,30 +1559,37 @@ void DSRGraph::join_full_graph(IDL::OrMap &&full_graph) auto mv = IDLNode_to_CRDT(std::move(val)); bool mv_empty = mv.empty(); agent_id_ch = val.agent_id(); - std::optional nd = (nodes[k].empty()) ? std::nullopt : std::make_optional(nodes[k].read_reg()); + auto it = nodes.find(k); + std::optional nd = + (it != nodes.end() and !it->second.empty()) ? std::make_optional(it->second.read_reg()) : std::nullopt; id = k; if (!deleted.contains(k)) { - nodes[k].join(std::move(mv)); - if (mv_empty or nodes.at(k).empty()) { + if (it == nodes.end()) { + it = nodes.emplace(k, mvreg{}).first; + } + it->second.join(std::move(mv)); + if (mv_empty or it->second.empty()) { update_maps_node_delete(k, nd); - updates.emplace_back(false, k, "", std::nullopt); + updates.emplace_back(false, k, "", std::nullopt, std::nullopt); delete_unprocessed_deltas(); } else { - update_maps_node_insert(k, nodes.at(k).read_reg()); - updates.emplace_back(true, k, nodes.at(k).read_reg().type(), nd); + const auto& reg = it->second.read_reg(); + update_maps_node_insert(k, reg); + updates.emplace_back(true, k, reg.type(), nd, reg); consume_unprocessed_deltas(); } } } } - for (auto &[signal, id, type, nd] : updates) + for (auto &[signal, id, type, nd, current_nd] : updates) if (signal) { - //check what change is joined - if (!nd.has_value() || nd->attrs() != nodes[id].read_reg().attrs()) { - emitter.update_node_signal(id, nodes[id].read_reg().type(), SignalInfo{ agent_id_ch }); - } else if (nd.value() != nodes[id].read_reg()) { - auto iter = nodes[id].read_reg().fano(); + //check what change is joined — use the snapshot captured inside the lock, + //not nodes[id], which races with concurrent insert_node_/update_node calls. + if (!nd.has_value() || nd->attrs() != current_nd->attrs()) { + emitter.update_node_signal(id, type, SignalInfo{ agent_id_ch }); + } else if (nd.value() != *current_nd) { + const auto& iter = current_nd->fano(); for (const auto &[k, v] : nd->fano()) { if (!iter.contains(k)) { emitter.del_edge_signal(id, k.first, k.second, SignalInfo{ agent_id_ch }); @@ -1681,7 +1708,7 @@ void DSRGraph::node_subscription_thread() catch (const std::exception &ex) { std::cerr << ex.what() << std::endl; } }; dsrpub_call_node = NewMessageFunctor(this, lambda_general_topic); - auto [res, sub, reader] = dsrsub_node.init(dsrparticipant.getParticipant(), dsrparticipant.getNodeTopic(), dsrpub_call_node, mtx_entity_creation); + auto [res, sub, reader] = dsrsub_node.init(dsrparticipant.getParticipant(), dsrparticipant.getNodeTopic(), dsrparticipant.get_domain_id(), dsrpub_call_node, mtx_entity_creation); dsrparticipant.add_subscriber(dsrparticipant.getNodeTopic()->get_name(), {sub, reader}); } @@ -1715,7 +1742,7 @@ void DSRGraph::edge_subscription_thread() catch (const std::exception &ex) { std::cerr << ex.what() << std::endl; } }; dsrpub_call_edge = NewMessageFunctor(this, lambda_general_topic); - auto [res, sub, reader] = dsrsub_edge.init(dsrparticipant.getParticipant(), dsrparticipant.getEdgeTopic(), dsrpub_call_edge, mtx_entity_creation); + auto [res, sub, reader] = dsrsub_edge.init(dsrparticipant.getParticipant(), dsrparticipant.getEdgeTopic(), dsrparticipant.get_domain_id(), dsrpub_call_edge, mtx_entity_creation); dsrparticipant.add_subscriber(dsrparticipant.getEdgeTopic()->get_name(), {sub, reader}); } @@ -1781,7 +1808,7 @@ void DSRGraph::edge_attrs_subscription_thread() }; dsrpub_call_edge_attrs = NewMessageFunctor(this, lambda_general_topic); - auto [res, sub, reader] = dsrsub_edge_attrs.init(dsrparticipant.getParticipant(), dsrparticipant.getAttEdgeTopic(), + auto [res, sub, reader] = dsrsub_edge_attrs.init(dsrparticipant.getParticipant(), dsrparticipant.getAttEdgeTopic(), dsrparticipant.get_domain_id(), dsrpub_call_edge_attrs, mtx_entity_creation); dsrparticipant.add_subscriber(dsrparticipant.getAttEdgeTopic()->get_name(), {sub, reader}); //dsrsub_edge_attrs_stream.init(dsrparticipant.getParticipant(), "DSR_EDGE_ATTRS_STREAM", dsrparticipant.getEdgeAttrTopicName(), @@ -1850,7 +1877,7 @@ void DSRGraph::node_attrs_subscription_thread() }; dsrpub_call_node_attrs = NewMessageFunctor(this, lambda_general_topic); - auto [res, sub, reader] = dsrsub_node_attrs.init(dsrparticipant.getParticipant(), dsrparticipant.getAttNodeTopic(), + auto [res, sub, reader] = dsrsub_node_attrs.init(dsrparticipant.getParticipant(), dsrparticipant.getAttNodeTopic(), dsrparticipant.get_domain_id(), dsrpub_call_node_attrs, mtx_entity_creation); dsrparticipant.add_subscriber(dsrparticipant.getAttNodeTopic()->get_name(), {sub, reader}); @@ -1904,7 +1931,7 @@ void DSRGraph::fullgraph_server_thread() } }; dsrpub_graph_request_call = NewMessageFunctor(this, lambda_graph_request); - auto [res, sub, reader] = dsrsub_graph_request.init(dsrparticipant.getParticipant(), dsrparticipant.getGraphRequestTopic(), + auto [res, sub, reader] = dsrsub_graph_request.init(dsrparticipant.getParticipant(), dsrparticipant.getGraphRequestTopic(), dsrparticipant.get_domain_id(), dsrpub_graph_request_call, mtx_entity_creation); dsrparticipant.add_subscriber(dsrparticipant.getGraphRequestTopic()->get_name(), {sub, reader}); @@ -1912,8 +1939,8 @@ void DSRGraph::fullgraph_server_thread() std::pair DSRGraph::fullgraph_request_thread() { - bool sync = false; - bool repeated = false; + std::atomic sync{false}; + std::atomic repeated{false}; auto lambda_request_answer = [&](eprosima::fastdds::dds::DataReader *reader, DSR::DSRGraph *graph) { while (true) @@ -1946,7 +1973,7 @@ std::pair DSRGraph::fullgraph_request_thread() }; dsrpub_request_answer_call = NewMessageFunctor(this, lambda_request_answer); - auto [res, sub, reader] = dsrsub_request_answer.init(dsrparticipant.getParticipant(), dsrparticipant.getGraphTopic(), + auto [res, sub, reader] = dsrsub_request_answer.init(dsrparticipant.getParticipant(), dsrparticipant.getGraphTopic(), dsrparticipant.get_domain_id(), dsrpub_request_answer_call, mtx_entity_creation); dsrparticipant.add_subscriber(dsrparticipant.getGraphTopic()->get_name(), {sub, reader}); diff --git a/api/dsr_inner_eigen_api.cpp b/api/dsr_inner_eigen_api.cpp index 97e8c0c..d75052e 100644 --- a/api/dsr_inner_eigen_api.cpp +++ b/api/dsr_inner_eigen_api.cpp @@ -123,10 +123,8 @@ std::optional InnerEigenAPI::get_transformation_matrix(const std::st } } // update node cache reference - uint64_t dst_id = G->get_node(dest).value().id(); - node_map[dst_id].push_back(key); - uint64_t orig_id = G->get_node(orig).value().id(); - node_map[orig_id].push_back(key); + node_map[bn.value().id()].push_back(key); + node_map[an.value().id()].push_back(key); // update cache auto ret = btotal.inverse() * atotal; @@ -212,7 +210,6 @@ std::optional InnerEigenAPI::transform_axis(const std::string &de std::optional InnerEigenAPI::transform_axis( const std::string &dest, const std::string & orig, std::uint64_t timestamp) { - Mat::Vector6d v; return transform_axis(dest, Mat::Vector6d::Zero(), orig, timestamp); } diff --git a/api/include/dsr/api/dsr_api.h b/api/include/dsr/api/dsr_api.h index d2c2ea6..6fbc18e 100644 --- a/api/include/dsr/api/dsr_api.h +++ b/api/include/dsr/api/dsr_api.h @@ -57,6 +57,7 @@ namespace DSR class DSRGraph : public QObject { friend RT_API; + friend class DSRGraphTestAccess; public: size_t size() const; @@ -584,7 +585,6 @@ namespace DSR const bool copy; std::unique_ptr utils; std::unordered_set ignored_attributes; - ThreadPool tp, tp_delta_attr; bool same_host; id_generator generator; GraphSettings::LOGLEVEL log_level; @@ -677,6 +677,11 @@ namespace DSR std::unordered_multimap, uint64_t>> unprocessed_delta_edge_to; std::unordered_multimap, std::tuple, uint64_t>, hash_tuple> unprocessed_delta_edge_att; + // ThreadPools are declared after all data they access so that their + // destructors (which join worker threads) run before the data members + // are destroyed, preventing use-after-free data races on shutdown. + ThreadPool tp, tp_delta_attr; + //Custom function for each rtps topic class NewMessageFunctor { public: diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt new file mode 100644 index 0000000..a72bfd7 --- /dev/null +++ b/benchmarks/CMakeLists.txt @@ -0,0 +1,160 @@ +cmake_minimum_required(VERSION 3.10) +project(dsr_benchmarks + VERSION 2024.12.01 + DESCRIPTION "DSR Benchmarking Suite" + LANGUAGES CXX) + +# Fetch Catch2 if not already available +Include(FetchContent) + +FetchContent_Declare( + Catch2 + GIT_REPOSITORY https://github.com/catchorg/Catch2.git + GIT_TAG v3.8.0 +) + +FetchContent_Declare( + nanobench + GIT_REPOSITORY https://github.com/martinus/nanobench.git + GIT_TAG v4.3.11 +) + +FetchContent_MakeAvailable(Catch2 nanobench) + +# Find required packages +find_package(Boost REQUIRED) +find_package(Qt6 COMPONENTS Core REQUIRED) +find_package(Eigen3 3.3 REQUIRED NO_MODULE) + +# Collect source files +set(BENCHMARK_SOURCES + benchmark_main.cpp + + # Latency benchmarks + latency/delta_propagation_bench.cpp + latency/signal_latency_bench.cpp + latency/crdt_join_bench.cpp + + # Throughput benchmarks + throughput/single_agent_ops_bench.cpp + throughput/concurrent_writers_bench.cpp + throughput/single_agent_ops_with_latency_bench.cpp + throughput/query_ops_bench.cpp + + # Scalability benchmarks + scalability/multi_agent_sync_bench.cpp + scalability/graph_size_impact_bench.cpp + scalability/thread_scaling_bench.cpp + scalability/graph_size_scaling_bench.cpp + scalability/agent_scaling_bench.cpp + + # Consistency benchmarks + consistency/convergence_time_bench.cpp + consistency/conflict_rate_bench.cpp +) + +# Header files for IDE integration +set(BENCHMARK_HEADERS + core/benchmark_config.h + core/timing_utils.h + core/metrics_collector.h + core/nanobench_adapter.h + core/report_generator.h + fixtures/multi_agent_fixture.h + fixtures/graph_generator.h +) + +# Create benchmark executable +add_executable(dsr_benchmarks + ${BENCHMARK_SOURCES} + ${BENCHMARK_HEADERS} +) + +# Set C++ standard +set_target_properties(dsr_benchmarks PROPERTIES + CMAKE_CXX_STANDARD 23 + CXX_STANDARD_REQUIRED ON + CXX_EXTENSIONS ON +) + +target_compile_options(dsr_benchmarks PUBLIC -g -std=c++23) + +# -DTSAN=ON enables ThreadSanitizer. Requires the dsr_api/dsr_core libraries +# to also be built with TSAN=ON (via the root CMakeLists.txt), otherwise TSan +# will report false positives from uninstrumented library code. +option(TSAN "Enable ThreadSanitizer" OFF) +if (TSAN) + message(STATUS "ThreadSanitizer enabled for benchmarks") + target_compile_options(dsr_benchmarks PRIVATE -fsanitize=thread -fno-omit-frame-pointer) + target_link_options(dsr_benchmarks PRIVATE -fsanitize=thread) +endif() + +# Include directories +target_include_directories(dsr_benchmarks PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/core + ${CMAKE_CURRENT_SOURCE_DIR}/fixtures +) + +# Link libraries +target_link_libraries(dsr_benchmarks PRIVATE + Catch2::Catch2 + nanobench + dsr_api + dsr_core + Qt6::Core + Eigen3::Eigen + fastdds + fastcdr +) + +# Create results directory +file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/results) + +# Copy results directory structure +add_custom_command(TARGET dsr_benchmarks POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory $/results + COMMENT "Creating results directory" +) + +# Flamegraph target — generates one SVG per benchmark test case via perf. +# Requires: perf, and FlameGraph scripts (flamegraph.pl + stackcollapse-perf.pl). +# Set FG_DIR to the FlameGraph checkout if the scripts aren't on PATH, e.g.: +# cmake --build . --target flamegraph -j1 -- FG_DIR=/opt/FlameGraph +# Or pass a Catch2 filter to profile a subset: +# cmake --build . --target flamegraph -j1 -- BENCH_FILTER=[LATENCY] +set(FLAMEGRAPH_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/flamegraph.sh) +set(FLAMEGRAPH_OUTDIR ${CMAKE_CURRENT_BINARY_DIR}/results/flamegraphs) +add_custom_target(flamegraph + COMMAND ${CMAKE_COMMAND} -E make_directory ${FLAMEGRAPH_OUTDIR} + COMMAND env + FG_DIR=$ENV{FG_DIR} + ${FLAMEGRAPH_SCRIPT} + -b $ + -o ${FLAMEGRAPH_OUTDIR} + $ENV{BENCH_FILTER} + DEPENDS dsr_benchmarks + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Generating per-benchmark flamegraphs in ${FLAMEGRAPH_OUTDIR}" + USES_TERMINAL +) + +# Register tests with CTest (optional) +# Disabled auto-discovery as it requires running the binary at build time +# which may fail if libraries are not in LD_LIBRARY_PATH +# include(Catch) +# catch_discover_tests(dsr_benchmarks) + +# Installation (optional) +install(TARGETS dsr_benchmarks + RUNTIME DESTINATION bin +) + +# Print configuration summary +message(STATUS "") +message(STATUS "DSR Benchmarks Configuration:") +message(STATUS " Build type: ${CMAKE_BUILD_TYPE}") +message(STATUS " C++ Standard: C++23") +message(STATUS " Catch2 version: 3.8.0") +message(STATUS " nanobench version: 4.3.11") +message(STATUS "") diff --git a/benchmarks/benchmark_main.cpp b/benchmarks/benchmark_main.cpp new file mode 100644 index 0000000..7321fcc --- /dev/null +++ b/benchmarks/benchmark_main.cpp @@ -0,0 +1,131 @@ +// DSR Benchmarking Suite +// Main entry point using Catch2 + +#define CATCH_CONFIG_RUNNER +#include +#include +#include +#include +#include + +// Custom Qt message handler to filter debug output during benchmarks +static bool g_verbose = false; + +namespace { + +bool hasCliFlag(int argc, char* argv[], const char* flag) { + for (int i = 1; i < argc; ++i) { + if (std::string(argv[i]) == flag) { + return true; + } + } + return false; +} + +bool shouldPrintBenchmarkPreamble(int argc, char* argv[]) { + return !hasCliFlag(argc, argv, "--help") + && !hasCliFlag(argc, argv, "-?") + && !hasCliFlag(argc, argv, "--list-tests") + && !hasCliFlag(argc, argv, "--list-tags") + && !hasCliFlag(argc, argv, "--list-reporters") + && !hasCliFlag(argc, argv, "--list-listeners"); +} + +} // namespace + +void benchmarkMessageHandler(QtMsgType type, const QMessageLogContext& context, const QString& msg) { + // In non-verbose mode, only show warnings and above + if (!g_verbose) { + switch (type) { + case QtDebugMsg: + case QtInfoMsg: + return; // Suppress debug and info messages + default: + break; + } + } + + // Format and output remaining messages + QByteArray localMsg = msg.toLocal8Bit(); + switch (type) { + case QtDebugMsg: + std::cout << "[DEBUG] " << localMsg.constData() << std::endl; + break; + case QtInfoMsg: + std::cout << "[INFO] " << localMsg.constData() << std::endl; + break; + case QtWarningMsg: + std::cout << "[WARNING] " << localMsg.constData() << std::endl; + break; + case QtCriticalMsg: + std::cout << "[CRITICAL] " << localMsg.constData() << std::endl; + break; + case QtFatalMsg: + std::cout << "[FATAL] " << localMsg.constData() << std::endl; + // Throw instead of abort() so the fixture's try/catch can catch it, + // mark the test as failed, and let Catch2 continue to the next test. + throw std::runtime_error(localMsg.constData()); + } +} + +int main(int argc, char* argv[]) { + // Install custom message handler before QCoreApplication + qInstallMessageHandler(benchmarkMessageHandler); + + // Check for verbose flag + for (int i = 1; i < argc; ++i) { + if (std::string(argv[i]) == "--verbose" || std::string(argv[i]) == "-v") { + g_verbose = true; + break; + } + } + + // Initialize Qt (required for signals/slots) + QCoreApplication app(argc, argv); + // Initialize Catch2 + Catch::Session session; + + // Set default reporter to console with colors + session.configData().showDurations = Catch::ShowDurations::Always; + + // Apply command line arguments + int returnCode = session.applyCommandLine(argc, argv); + if (returnCode != 0) { + return returnCode; + } + + if (shouldPrintBenchmarkPreamble(argc, argv)) { + std::cout << "=================================\n"; + std::cout << " DSR Benchmarking Suite\n"; + std::cout << "=================================\n\n"; + std::cout << "Available benchmark categories:\n"; + std::cout << " [BASELINE] - Curated low-noise regression baseline\n"; + std::cout << " [EXTENDED] - Slower supplementary baseline coverage\n"; + std::cout << " [LATENCY] - Signal emission, CRDT operations\n"; + std::cout << " [THROUGHPUT] - Single agent insert/read/update/delete, concurrent writers\n"; + std::cout << " [CRDT] - mvreg and dot_context micro-benchmarks\n"; + std::cout << " [SCALABILITY] - Thread scaling, graph size impact\n"; + std::cout << " [CONSISTENCY] - Convergence time, conflict rates\n"; + std::cout << " [PROFILE] - Expensive profiling-focused cases\n"; + std::cout << " [LOAD] - Work-under-load and concurrency-heavy cases\n"; + std::cout << " [MULTIAGENT] - Multi-agent synchronization/consistency cases\n"; + std::cout << "\n"; + std::cout << "Usage examples:\n"; + std::cout << " ./dsr_benchmarks # Run all non-hidden benchmarks\n"; + std::cout << " ./dsr_benchmarks \"[BASELINE]\" # Run curated baseline benchmarks\n"; + std::cout << " ./dsr_benchmarks \"[EXTENDED]\" # Run slower supplementary coverage\n"; + std::cout << " ./dsr_benchmarks \"[LATENCY]\" # Run latency benchmarks\n"; + std::cout << " ./dsr_benchmarks \"[THROUGHPUT]\" # Run throughput benchmarks\n"; + std::cout << " ./dsr_benchmarks \"[CRDT]\" # Run CRDT micro-benchmarks\n"; + std::cout << " ./dsr_benchmarks \"[PROFILE][LOAD]\" # Run long load-heavy cases\n"; + std::cout << " ./dsr_benchmarks \"[PROFILE][MULTIAGENT]\" # Run multi-agent profiling cases\n"; + std::cout << " ./dsr_benchmarks \"[.multi]\" # Run multi-agent tests (may timeout)\n"; + std::cout << " ./dsr_benchmarks -r json::out=x.json # Export to JSON\n"; + std::cout << " ./dsr_benchmarks --verbose # Show Qt debug messages\n"; + std::cout << "\n"; + std::cout << "Note: [.multi] and [.extended] tests are hidden by default.\n"; + std::cout << "\n"; + } + + return session.run(); +} diff --git a/benchmarks/consistency/conflict_rate_bench.cpp b/benchmarks/consistency/conflict_rate_bench.cpp new file mode 100644 index 0000000..fd76cb7 --- /dev/null +++ b/benchmarks/consistency/conflict_rate_bench.cpp @@ -0,0 +1,354 @@ +#include +#include +#include +#include +#include + +#include "../core/timing_utils.h" +#include "../core/metrics_collector.h" +#include "../core/report_generator.h" +#include "../fixtures/multi_agent_fixture.h" +#include "../fixtures/graph_generator.h" + +using namespace DSR; +using namespace DSR::Benchmark; + +TEST_CASE("Conflict rate benchmarks", "[CONSISTENCY][conflict][.multi][PROFILE][MULTIAGENT]") { + GraphGenerator generator; + MetricsCollector collector("conflict_rate"); + + SECTION("Concurrent attribute updates - same node") { + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(4, config_file)); + fixture.wait_for_sync(); + + // Create shared node and capture actual ID + auto* agent_0 = fixture.get_agent(0); + auto shared_node = GraphGenerator::create_test_node( + 0, agent_0->get_agent_id(), "conflict_test"); + auto insert_result = agent_0->insert_node(shared_node); + REQUIRE(insert_result.has_value()); + uint64_t shared_node_id = insert_result.value(); + + fixture.wait_for_sync(); + REQUIRE(fixture.verify_convergence()); + + constexpr int NUM_ROUNDS = 50; + constexpr int UPDATES_PER_AGENT = 10; + constexpr size_t NUM_AGENTS = 4; + + std::atomic total_updates{0}; + uint64_t conflicts_detected = 0; + + std::barrier sync_point(NUM_AGENTS); + + for (int round = 0; round < NUM_ROUNDS; ++round) { + std::vector threads; + threads.reserve(NUM_AGENTS); + + // Record initial values before concurrent updates + std::vector expected_values(NUM_AGENTS); + for (size_t i = 0; i < NUM_AGENTS; ++i) { + expected_values[i] = static_cast(round * 1000 + i * 100); + } + + for (size_t agent_idx = 0; agent_idx < NUM_AGENTS; ++agent_idx) { + threads.emplace_back([&, agent_idx, node_id = shared_node_id]() { + auto* agent = fixture.get_agent(agent_idx); + sync_point.arrive_and_wait(); + + for (int i = 0; i < UPDATES_PER_AGENT; ++i) { + auto node = agent->get_node(node_id); + if (node) { + int32_t value = static_cast( + round * 1000 + agent_idx * 100 + i); + agent->add_or_modify_attrib_local(*node, value); + agent->update_node(*node); + total_updates.fetch_add(1, std::memory_order_relaxed); + } + } + }); + } + + for (auto& t : threads) { + t.join(); + } + + // Wait for convergence + fixture.wait_for_sync(std::chrono::milliseconds(500)); + + // Check if all agents converged to the same value + std::set final_values; + for (size_t i = 0; i < NUM_AGENTS; ++i) { + auto* agent = fixture.get_agent(i); + auto node = agent->get_node(shared_node_id); + if (node) { + auto attr = agent->get_attrib_by_name(*node); + if (attr.has_value()) { + final_values.insert(attr.value()); + } + } + } + + // If agents have different values, conflict resolution may still be in progress + // or there was a conflict that resolved differently + if (final_values.size() > 1) { + conflicts_detected++; + } + } + + double conflict_rate = static_cast(conflicts_detected) / + static_cast(NUM_ROUNDS) * 100.0; + + collector.record_consistency("concurrent_update_conflict_rate", + conflict_rate, "%", + {{"num_agents", std::to_string(NUM_AGENTS)}, + {"updates_per_round", std::to_string(UPDATES_PER_AGENT * NUM_AGENTS)}}); + + INFO("Conflict rate: " << conflict_rate << "% (" << conflicts_detected + << "/" << NUM_ROUNDS << " rounds)"); + INFO("Total updates: " << total_updates.load()); + + // Verify final convergence + fixture.wait_for_sync(std::chrono::milliseconds(1000)); + CHECK(fixture.verify_convergence()); + } + + SECTION("Concurrent node creations - potential ID conflicts") { + // This tests CRDT behavior when multiple agents create nodes + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(4, config_file)); + fixture.wait_for_sync(); + + constexpr int NODES_PER_AGENT = 100; + constexpr size_t NUM_AGENTS = 4; + + std::atomic total_created{0}; + std::atomic creation_failures{0}; + + std::barrier sync_point(NUM_AGENTS); + std::vector threads; + threads.reserve(NUM_AGENTS); + + for (size_t agent_idx = 0; agent_idx < NUM_AGENTS; ++agent_idx) { + threads.emplace_back([&, agent_idx]() { + auto* agent = fixture.get_agent(agent_idx); + sync_point.arrive_and_wait(); + + for (int i = 0; i < NODES_PER_AGENT; ++i) { + // Each agent uses unique IDs in its range + uint64_t node_id = 8500000 + agent_idx * 10000 + i; + auto node = GraphGenerator::create_test_node( + node_id, agent->get_agent_id(), + "agent" + std::to_string(agent_idx) + "_node" + std::to_string(i)); + + auto result = agent->insert_node(node); + if (result.has_value()) { + total_created.fetch_add(1, std::memory_order_relaxed); + } else { + creation_failures.fetch_add(1, std::memory_order_relaxed); + } + } + }); + } + + for (auto& t : threads) { + t.join(); + } + + // Wait for convergence + fixture.wait_for_sync(std::chrono::milliseconds(2000)); + + // Verify all agents have the same nodes + auto* agent_0 = fixture.get_agent(0); + size_t expected_node_count = agent_0->get_nodes().size(); + + bool all_match = true; + for (size_t i = 1; i < NUM_AGENTS; ++i) { + auto* agent = fixture.get_agent(i); + if (agent->get_nodes().size() != expected_node_count) { + all_match = false; + } + } + + collector.record_consistency("node_creation_success_rate", + static_cast(total_created.load()) / + static_cast(NODES_PER_AGENT * NUM_AGENTS) * 100.0, "%"); + + collector.record_consistency("final_convergence", + all_match ? 100.0 : 0.0, "%"); + + INFO("Created: " << total_created.load() << "/" << NODES_PER_AGENT * NUM_AGENTS); + INFO("Failures: " << creation_failures.load()); + INFO("All agents converged: " << (all_match ? "yes" : "no")); + + CHECK(fixture.verify_convergence()); + } + + SECTION("Edge conflict resolution") { + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(2, config_file)); + fixture.wait_for_sync(); + + auto* agent_a = fixture.get_agent(0); + auto* agent_b = fixture.get_agent(1); + + // Create shared nodes and capture actual IDs + auto node1 = GraphGenerator::create_test_node(0, agent_a->get_agent_id(), "edge_node_1"); + auto node2 = GraphGenerator::create_test_node(0, agent_a->get_agent_id(), "edge_node_2"); + auto result1 = agent_a->insert_node(node1); + auto result2 = agent_a->insert_node(node2); + REQUIRE(result1.has_value()); + REQUIRE(result2.has_value()); + uint64_t node1_id = result1.value(); + uint64_t node2_id = result2.value(); + + fixture.wait_for_sync(); + REQUIRE(fixture.verify_convergence()); + + uint64_t conflicts = 0; + constexpr int NUM_ROUNDS = 50; + + for (int round = 0; round < NUM_ROUNDS; ++round) { + // Both agents try to create the same edge simultaneously + auto edge_a = GraphGenerator::create_test_edge( + node1_id, node2_id, agent_a->get_agent_id(), "test_edge"); + auto edge_b = GraphGenerator::create_test_edge( + node1_id, node2_id, agent_b->get_agent_id(), "test_edge"); + + std::thread ta([&]() { agent_a->insert_or_assign_edge(edge_a); }); + std::thread tb([&]() { agent_b->insert_or_assign_edge(edge_b); }); + + ta.join(); + tb.join(); + + fixture.wait_for_sync(std::chrono::milliseconds(200)); + + // Check both agents see the edge + auto edge_on_a = agent_a->get_edge(node1_id, node2_id, "test_edge"); + auto edge_on_b = agent_b->get_edge(node1_id, node2_id, "test_edge"); + + if (!edge_on_a.has_value() || !edge_on_b.has_value()) { + conflicts++; + } + + // Delete edge for next round + agent_a->delete_edge(node1_id, node2_id, "test_edge"); + fixture.wait_for_sync(std::chrono::milliseconds(100)); + } + + double conflict_rate = static_cast(conflicts) / + static_cast(NUM_ROUNDS) * 100.0; + + collector.record_consistency("edge_conflict_rate", + conflict_rate, "%"); + + INFO("Edge conflict rate: " << conflict_rate << "%"); + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "conflict_rate"); +} + +TEST_CASE("CRDT eventual consistency verification", "[CONSISTENCY][eventual][.multi][PROFILE][MULTIAGENT]") { + GraphGenerator generator; + MetricsCollector collector("eventual_consistency"); + + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(4, config_file)); + fixture.wait_for_sync(); + + SECTION("All agents eventually converge after chaos") { + constexpr size_t NUM_AGENTS = 4; + constexpr int OPS_PER_AGENT = 50; + + std::barrier sync_point(NUM_AGENTS); + std::atomic stop_flag{false}; + + // Each agent performs random operations + std::vector threads; + for (size_t agent_idx = 0; agent_idx < NUM_AGENTS; ++agent_idx) { + threads.emplace_back([&, agent_idx]() { + auto* agent = fixture.get_agent(agent_idx); + uint64_t base_id = 8700000 + agent_idx * 10000; + + sync_point.arrive_and_wait(); + + for (int i = 0; i < OPS_PER_AGENT && !stop_flag.load(); ++i) { + int op = i % 3; + + if (op == 0) { + // Insert node + auto node = GraphGenerator::create_test_node( + base_id + i, agent->get_agent_id()); + agent->insert_node(node); + } else if (op == 1) { + // Update existing node + auto node = agent->get_node(base_id + (i % (std::max(1, i / 2)))); + if (node) { + agent->add_or_modify_attrib_local( + *node, static_cast(i)); + agent->update_node(*node); + } + } else { + // Insert edge + auto root = agent->get_node_root(); + if (root) { + auto existing = agent->get_node(base_id + (i % (std::max(1, i / 2)))); + if (existing) { + auto edge = GraphGenerator::create_test_edge( + root->id(), existing->id(), agent->get_agent_id()); + agent->insert_or_assign_edge(edge); + } + } + } + + // Small delay between operations + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + }); + } + + for (auto& t : threads) { + t.join(); + } + + // Wait for eventual consistency + INFO("Waiting for eventual consistency..."); + + auto start = std::chrono::steady_clock::now(); + bool converged = fixture.verify_convergence(std::chrono::seconds(30)); + auto duration = std::chrono::duration_cast( + std::chrono::steady_clock::now() - start); + + collector.record_consistency("eventual_consistency_achieved", + converged ? 100.0 : 0.0, "%"); + collector.record_consistency("convergence_duration_after_chaos", + static_cast(duration.count()), "ms"); + + INFO("Convergence " << (converged ? "achieved" : "FAILED") + << " in " << duration.count() << " ms"); + + CHECK(converged); + + if (converged) { + // Verify all agents have same node count + auto* agent_0 = fixture.get_agent(0); + size_t node_count = agent_0->get_nodes().size(); + + for (size_t i = 1; i < NUM_AGENTS; ++i) { + auto* agent = fixture.get_agent(i); + CHECK(agent->get_nodes().size() == node_count); + } + } + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "eventual_consistency"); +} diff --git a/benchmarks/consistency/convergence_time_bench.cpp b/benchmarks/consistency/convergence_time_bench.cpp new file mode 100644 index 0000000..fa77784 --- /dev/null +++ b/benchmarks/consistency/convergence_time_bench.cpp @@ -0,0 +1,253 @@ +#include +#include +#include +#include +#include + +#include "../core/timing_utils.h" +#include "../core/metrics_collector.h" +#include "../core/report_generator.h" +#include "../fixtures/multi_agent_fixture.h" +#include "../fixtures/graph_generator.h" + +using namespace DSR; +using namespace DSR::Benchmark; + +TEST_CASE("Convergence time benchmarks", "[CONSISTENCY][convergence][.multi][PROFILE][MULTIAGENT]") { + GraphGenerator generator; + MetricsCollector collector("convergence_time"); + + SECTION("Single update convergence") { + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(2, config_file)); + fixture.wait_for_sync(); + + auto* agent_a = fixture.get_agent(0); + auto* agent_b = fixture.get_agent(1); + REQUIRE(agent_a != nullptr); + REQUIRE(agent_b != nullptr); + + LatencyTracker tracker(100); + + for (int i = 0; i < 100; ++i) { + auto node = GraphGenerator::create_test_node( + 0, agent_a->get_agent_id(), + "conv_node_" + std::to_string(i)); + + uint64_t start = get_unix_timestamp(); + auto result = agent_a->insert_node(node); + if (!result.has_value()) continue; + uint64_t node_id = result.value(); + + // Poll until agent B sees the node + auto poll_start = std::chrono::steady_clock::now(); + while (std::chrono::steady_clock::now() - poll_start < std::chrono::seconds(5)) { + fixture.process_events(1); + auto b_node = agent_b->get_node(node_id); + if (b_node.has_value()) { + uint64_t conv_time = get_unix_timestamp() - start; + tracker.record(conv_time); + break; + } + } + } + + auto stats = tracker.stats(); + collector.record_latency_stats("single_node_convergence", stats); + collector.record_consistency("convergence_success_rate", + (static_cast(tracker.count()) / 100.0) * 100, "%"); + + INFO("Single node convergence - Mean: " << stats.mean_us() << " us, " + << "P99: " << stats.p99_us() << " us"); + INFO("Success rate: " << tracker.count() << "/100"); + } + + SECTION("Batch convergence time") { + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(2, config_file)); + fixture.wait_for_sync(); + + auto* agent_a = fixture.get_agent(0); + auto* agent_b = fixture.get_agent(1); + + LatencyTracker tracker(20); + + for (int batch = 0; batch < 20; ++batch) { + // Insert batch of 10 nodes and capture actual IDs + std::vector node_ids; + node_ids.reserve(10); + + uint64_t start = get_unix_timestamp(); + + for (int i = 0; i < 10; ++i) { + auto node = GraphGenerator::create_test_node( + 0, agent_a->get_agent_id()); + auto result = agent_a->insert_node(node); + if (result.has_value()) { + node_ids.push_back(result.value()); + } + } + + // Wait for all nodes to converge + auto poll_start = std::chrono::steady_clock::now(); + while (std::chrono::steady_clock::now() - poll_start < std::chrono::seconds(10)) { + fixture.process_events(1); + + bool all_converged = true; + for (auto id : node_ids) { + if (!agent_b->get_node(id).has_value()) { + all_converged = false; + break; + } + } + + if (all_converged) { + uint64_t conv_time = get_unix_timestamp() - start; + tracker.record(conv_time); + break; + } + } + } + + auto stats = tracker.stats(); + collector.record_latency_stats("batch_convergence_10_nodes", stats); + + INFO("Batch convergence (10 nodes) - Mean: " << stats.mean_ms() << " ms"); + } + + SECTION("Convergence under concurrent updates") { + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(4, config_file)); + fixture.wait_for_sync(); + + LatencyTracker tracker(50); + + // Each agent creates nodes concurrently + for (int round = 0; round < 50; ++round) { + std::vector all_node_ids; + std::mutex ids_mutex; + + uint64_t start = get_unix_timestamp(); + + // Each agent creates 5 nodes in parallel + std::vector threads; + for (size_t agent_idx = 0; agent_idx < 4; ++agent_idx) { + threads.emplace_back([&, agent_idx]() { + auto* agent = fixture.get_agent(agent_idx); + for (int i = 0; i < 5; ++i) { + auto node = GraphGenerator::create_test_node( + 0, agent->get_agent_id()); + auto result = agent->insert_node(node); + if (result.has_value()) { + std::lock_guard lock(ids_mutex); + all_node_ids.push_back(result.value()); + } + } + }); + } + for (auto& t : threads) t.join(); + + // Wait for all agents to see all nodes + auto poll_start = std::chrono::steady_clock::now(); + while (std::chrono::steady_clock::now() - poll_start < std::chrono::seconds(15)) { + fixture.process_events(5); + + bool all_converged = true; + for (size_t agent_idx = 0; agent_idx < 4 && all_converged; ++agent_idx) { + auto* agent = fixture.get_agent(agent_idx); + for (auto id : all_node_ids) { + if (!agent->get_node(id).has_value()) { + all_converged = false; + break; + } + } + } + + if (all_converged) { + uint64_t conv_time = get_unix_timestamp() - start; + tracker.record(conv_time); + break; + } + } + } + + auto stats = tracker.stats(); + collector.record_latency_stats("concurrent_convergence_4_agents", stats); + + INFO("Concurrent convergence (4 agents) - Mean: " << stats.mean_ms() << " ms, " + << "P99: " << stats.p99_ms() << " ms"); + + // Check against timeout + CHECK(stats.p99_ms() < 1000); // Should converge within 1 second p99 + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "convergence_time"); +} + +TEST_CASE("Attribute convergence", "[CONSISTENCY][convergence][attributes][.multi][PROFILE][MULTIAGENT]") { + GraphGenerator generator; + MetricsCollector collector("attribute_convergence"); + + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(2, config_file)); + fixture.wait_for_sync(); + + auto* agent_a = fixture.get_agent(0); + auto* agent_b = fixture.get_agent(1); + + // Create shared test node and capture actual ID + auto test_node = GraphGenerator::create_test_node( + 0, agent_a->get_agent_id(), "attr_conv_test"); + auto insert_result = agent_a->insert_node(test_node); + REQUIRE(insert_result.has_value()); + uint64_t shared_node_id = insert_result.value(); + + fixture.wait_for_sync(); + REQUIRE(fixture.verify_convergence()); + + SECTION("Attribute update convergence") { + LatencyTracker tracker(100); + + for (int i = 0; i < 100; ++i) { + auto node = agent_a->get_node(shared_node_id); + REQUIRE(node.has_value()); + + int32_t new_value = 1000 + i; + agent_a->add_or_modify_attrib_local(*node, new_value); + + uint64_t start = get_unix_timestamp(); + agent_a->update_node(*node); + + // Wait for attribute to converge + auto poll_start = std::chrono::steady_clock::now(); + while (std::chrono::steady_clock::now() - poll_start < std::chrono::seconds(5)) { + fixture.process_events(1); + + auto b_node = agent_b->get_node(shared_node_id); + if (b_node.has_value()) { + auto attr = agent_b->get_attrib_by_name(*b_node); + if (attr.has_value() && attr.value() == new_value) { + uint64_t conv_time = get_unix_timestamp() - start; + tracker.record(conv_time); + break; + } + } + } + } + + auto stats = tracker.stats(); + collector.record_latency_stats("attribute_update_convergence", stats); + + INFO("Attribute convergence - Mean: " << stats.mean_us() << " us"); + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "attribute_convergence"); +} diff --git a/benchmarks/core/benchmark_config.h b/benchmarks/core/benchmark_config.h new file mode 100644 index 0000000..0734131 --- /dev/null +++ b/benchmarks/core/benchmark_config.h @@ -0,0 +1,55 @@ +#ifndef DSR_BENCHMARK_CONFIG_H +#define DSR_BENCHMARK_CONFIG_H + +#include +#include +#include + +namespace DSR::Benchmark { + +struct BenchmarkConfig { + // Timing configuration + uint32_t warmup_iterations = 10; + uint32_t measurement_iterations = 100; + std::chrono::milliseconds sync_wait_time{200}; + std::chrono::seconds max_convergence_timeout{10}; + + // Multi-agent configuration + uint32_t default_agent_count = 2; + uint32_t max_agent_count = 16; + + // Graph generation + uint32_t small_graph_nodes = 100; + uint32_t medium_graph_nodes = 1000; + uint32_t large_graph_nodes = 10000; + + // Throughput settings + uint32_t throughput_duration_seconds = 5; + uint32_t concurrent_writer_threads = 4; + + // Output settings + std::string results_directory = "results"; + bool export_json = true; + bool export_csv = true; + bool verbose = false; +}; + +// Default configuration singleton +inline BenchmarkConfig& default_config() { + static BenchmarkConfig config; + return config; +} + +// Percentile levels for latency statistics +constexpr double PERCENTILE_P50 = 0.50; +constexpr double PERCENTILE_P90 = 0.90; +constexpr double PERCENTILE_P95 = 0.95; +constexpr double PERCENTILE_P99 = 0.99; + +// Threshold constants for validation +constexpr uint64_t MAX_EXPECTED_LATENCY_NS = 100'000'000; // 100ms +constexpr uint64_t MIN_EXPECTED_THROUGHPUT_OPS = 1000; // 1000 ops/sec + +} // namespace DSR::Benchmark + +#endif // DSR_BENCHMARK_CONFIG_H diff --git a/benchmarks/core/metrics_collector.h b/benchmarks/core/metrics_collector.h new file mode 100644 index 0000000..cf08f60 --- /dev/null +++ b/benchmarks/core/metrics_collector.h @@ -0,0 +1,239 @@ +#ifndef DSR_METRICS_COLLECTOR_H +#define DSR_METRICS_COLLECTOR_H + +#include +#include +#include +#include +#include +#include +#include "timing_utils.h" +#include "benchmark_config.h" + +namespace DSR::Benchmark { + +// Categories of benchmark metrics +enum class MetricCategory { + Latency, + Throughput, + Scalability, + Consistency +}; + +inline std::string to_string(MetricCategory cat) { + switch (cat) { + case MetricCategory::Latency: return "latency"; + case MetricCategory::Throughput: return "throughput"; + case MetricCategory::Scalability: return "scalability"; + case MetricCategory::Consistency: return "consistency"; + } + return "unknown"; +} + + +// Individual metric measurement +struct Metric { + std::string name; + MetricCategory category; + std::string unit; + double value; + std::map additional_values; // For percentiles, etc. + std::map tags; // For categorization +}; + + +// Result of a complete benchmark run +struct BenchmarkResult { + std::string benchmark_name; + std::string timestamp; + std::chrono::milliseconds total_duration; + std::vector metrics; + std::map metadata; +}; + + +// Thread-safe collector for benchmark metrics +class MetricsCollector { +public: + MetricsCollector() = default; + + explicit MetricsCollector(std::string benchmark_name) + : benchmark_name_(std::move(benchmark_name)) + , start_time_(std::chrono::steady_clock::now()) + {} + + // Set benchmark name + void set_benchmark_name(const std::string& name) { + std::lock_guard lock(mutex_); + benchmark_name_ = name; + } + + // Add metadata + void add_metadata(const std::string& key, const std::string& value) { + std::lock_guard lock(mutex_); + metadata_[key] = value; + } + + // Record a simple metric + void record(const std::string& name, MetricCategory category, + double value, const std::string& unit = "") { + Metric m; + m.name = name; + m.category = category; + m.value = value; + m.unit = unit; + + std::lock_guard lock(mutex_); + metrics_.push_back(std::move(m)); + } + + // Record a metric with tags + void record(const std::string& name, MetricCategory category, + double value, const std::string& unit, + const std::map& tags) { + Metric m; + m.name = name; + m.category = category; + m.value = value; + m.unit = unit; + m.tags = tags; + + std::lock_guard lock(mutex_); + metrics_.push_back(std::move(m)); + } + + // Record latency statistics from a LatencyTracker + void record_latency_stats(const std::string& name, LatencyStats stats, + const std::map& tags = {}) { + Metric m; + m.name = name; + m.category = MetricCategory::Latency; + m.value = stats.mean_ns; + m.unit = "ns"; + m.tags = tags; + m.additional_values["count"] = static_cast(stats.count); + m.additional_values["mean_ns"] = stats.mean_ns; + m.additional_values["stddev_ns"] = stats.stddev_ns; + m.additional_values["min_ns"] = static_cast(stats.min_ns); + m.additional_values["max_ns"] = static_cast(stats.max_ns); + m.additional_values["p50_ns"] = static_cast(stats.p50_ns); + m.additional_values["p90_ns"] = static_cast(stats.p90_ns); + m.additional_values["p95_ns"] = static_cast(stats.p95_ns); + m.additional_values["p99_ns"] = static_cast(stats.p99_ns); + + std::lock_guard lock(mutex_); + metrics_.push_back(std::move(m)); + } + + // Record throughput + void record_throughput(const std::string& name, uint64_t operations, + std::chrono::milliseconds duration, + const std::map& tags = {}) { + double ops_per_sec = static_cast(operations) / + (static_cast(duration.count()) / 1000.0); + + Metric m; + m.name = name; + m.category = MetricCategory::Throughput; + m.value = ops_per_sec; + m.unit = "ops/sec"; + m.tags = tags; + m.additional_values["total_operations"] = static_cast(operations); + m.additional_values["duration_ms"] = static_cast(duration.count()); + + std::lock_guard lock(mutex_); + metrics_.push_back(std::move(m)); + } + + // Record scalability metric + void record_scalability(const std::string& name, uint32_t scale_factor, + double metric_value, const std::string& unit, + const std::map& tags = {}) { + Metric m; + m.name = name; + m.category = MetricCategory::Scalability; + m.value = metric_value; + m.unit = unit; + m.tags = tags; + m.additional_values["scale_factor"] = static_cast(scale_factor); + + std::lock_guard lock(mutex_); + metrics_.push_back(std::move(m)); + } + + // Record consistency metric + void record_consistency(const std::string& name, double value, + const std::string& unit, + const std::map& tags = {}) { + Metric m; + m.name = name; + m.category = MetricCategory::Consistency; + m.value = value; + m.unit = unit; + m.tags = tags; + + std::lock_guard lock(mutex_); + metrics_.push_back(std::move(m)); + } + + // Get all metrics by category + [[nodiscard]] std::vector get_metrics(MetricCategory category) const { + std::lock_guard lock(mutex_); + std::vector result; + for (const auto& m : metrics_) { + if (m.category == category) { + result.push_back(m); + } + } + return result; + } + + // Get all metrics + [[nodiscard]] std::vector get_all_metrics() const { + std::lock_guard lock(mutex_); + return metrics_; + } + + // Generate final result + [[nodiscard]] BenchmarkResult finalize() { + auto end_time = std::chrono::steady_clock::now(); + auto duration = std::chrono::duration_cast( + end_time - start_time_); + + // Generate timestamp + auto now = std::chrono::system_clock::now(); + auto time_t_now = std::chrono::system_clock::to_time_t(now); + char timestamp_buf[64]; + std::strftime(timestamp_buf, sizeof(timestamp_buf), "%Y-%m-%dT%H:%M:%S", + std::localtime(&time_t_now)); + + std::lock_guard lock(mutex_); + BenchmarkResult result; + result.benchmark_name = benchmark_name_; + result.timestamp = timestamp_buf; + result.total_duration = duration; + result.metrics = metrics_; + result.metadata = metadata_; + + return result; + } + + // Clear all collected metrics + void clear() { + std::lock_guard lock(mutex_); + metrics_.clear(); + metadata_.clear(); + start_time_ = std::chrono::steady_clock::now(); + } + +private: + mutable std::mutex mutex_; + std::string benchmark_name_; + std::chrono::steady_clock::time_point start_time_; + std::vector metrics_; + std::map metadata_; +}; + +} // namespace DSR::Benchmark + +#endif // DSR_METRICS_COLLECTOR_H diff --git a/benchmarks/core/nanobench_adapter.h b/benchmarks/core/nanobench_adapter.h new file mode 100644 index 0000000..e7ff1a4 --- /dev/null +++ b/benchmarks/core/nanobench_adapter.h @@ -0,0 +1,176 @@ +#ifndef DSR_NANOBENCH_ADAPTER_H +#define DSR_NANOBENCH_ADAPTER_H + +// Bridge between ankerl::nanobench and the MetricsCollector/LatencyStats pipeline. +// +// Usage pattern: +// +// auto bench = make_latency_bench(1000); // 1000 samples, 100 warmup +// bench.run("op_name", [&] { +// auto result = graph->some_op(); +// ankerl::nanobench::doNotOptimizeAway(result); +// }); +// collector.record_latency_stats("op_name", nb_to_stats(bench)); +// collector.record("op_name", MetricCategory::Throughput, +// nb_throughput(bench), "ops/sec", tags); +// +// make_latency_bench() is intended for steady-state operations and allows the +// call sites to raise minEpochIterations() for very fast paths. For destructive +// or state-mutating workloads, use make_single_op_latency_bench() so each epoch +// stays a single operation and the benchmarked state does not drift with +// nanobench's adaptive iteration counts. + +#include +#include +#include +#include +#include +#include +#include +#include "timing_utils.h" // LatencyStats + +namespace DSR::Benchmark { + +// --------------------------------------------------------------------------- +// TeeBuf / nb_report_stream +// +// Writes nanobench table output to both stdout and results/nanobench_report.md +// so the full table is available for offline inspection. +// The file is created/truncated once on the first call; all test cases in the +// same process run append naturally via the shared static ofstream. +// --------------------------------------------------------------------------- +class TeeBuf : public std::streambuf { +public: + TeeBuf(std::streambuf* a, std::streambuf* b) : a_(a), b_(b) {} +protected: + int overflow(int c) override { + if (c == traits_type::eof()) return traits_type::not_eof(c); + if (a_->sputc(static_cast(c)) == traits_type::eof()) return traits_type::eof(); + if (b_->sputc(static_cast(c)) == traits_type::eof()) return traits_type::eof(); + return c; + } + std::streamsize xsputn(const char* s, std::streamsize n) override { + a_->sputn(s, n); + return b_->sputn(s, n); + } +private: + std::streambuf *a_, *b_; +}; + +inline std::ostream& nb_report_stream() { + static std::ofstream file = []() { + std::filesystem::create_directories("results"); + return std::ofstream("results/nanobench_report.md"); + }(); + static TeeBuf tee(std::cout.rdbuf(), file.rdbuf()); + static std::ostream stream(&tee); + return stream; +} + +// --------------------------------------------------------------------------- +// nb_to_stats +// +// Extracts per-epoch elapsed times from the last benchmark run, sorts them, +// and returns a LatencyStats compatible with MetricsCollector::record_latency_stats(). +// Note: nanobench stores elapsed as average time per iteration within each +// epoch, not total epoch time. If a benchmark uses minEpochIterations() > 1, +// the returned distribution is still useful for steady-state throughput/latency +// summaries, but it is not a raw single-operation percentile distribution. +// --------------------------------------------------------------------------- +inline LatencyStats nb_to_stats(const ankerl::nanobench::Bench& bench) { + using Measure = ankerl::nanobench::Result::Measure; + + if (bench.results().empty()) return {}; + + const auto& r = bench.results().back(); + const size_t n = r.size(); + if (n == 0) return {}; + + // Collect per-epoch elapsed times in nanoseconds + std::vector ns(n); + for (size_t i = 0; i < n; ++i) + ns[i] = r.get(i, Measure::elapsed) * 1e9; + + std::sort(ns.begin(), ns.end()); + + // Percentile helper: nearest-rank + auto pct = [&](double p) -> uint64_t { + const size_t idx = static_cast(p / 100.0 * static_cast(n - 1) + 0.5); + return static_cast(ns[std::min(idx, n - 1)]); + }; + + double sum = 0.0; + for (double v : ns) sum += v; + const double mean = sum / static_cast(n); + + double var = 0.0; + for (double v : ns) var += (v - mean) * (v - mean); + + LatencyStats s{}; + s.count = n; + s.mean_ns = mean; + s.stddev_ns = (n > 1) ? std::sqrt(var / static_cast(n - 1)) : 0.0; + s.min_ns = static_cast(ns.front()); + s.max_ns = static_cast(ns.back()); + s.p50_ns = pct(50); + s.p90_ns = pct(90); + s.p95_ns = pct(95); + s.p99_ns = pct(99); + return s; +} + +// --------------------------------------------------------------------------- +// nb_throughput +// +// Derives single-operation throughput (ops/sec) from the mean latency of the +// last benchmark run. +// --------------------------------------------------------------------------- +inline double nb_throughput(const ankerl::nanobench::Bench& bench) { + if (bench.results().empty()) return 0.0; + using Measure = ankerl::nanobench::Result::Measure; + const double mean_s = bench.results().back().average(Measure::elapsed); + return (mean_s > 0.0) ? 1.0 / mean_s : 0.0; +} + +// --------------------------------------------------------------------------- +// make_latency_bench +// +// Returns a Bench pre-configured for single-operation latency measurement: +// epochIterations(1) — one sample per epoch → full percentile resolution +// epochs(n_samples) — total independent latency samples to collect +// warmup(n_warmup) — thrown-away warm-up iterations before measurement +// output(stream) — tee to stdout + results/nanobench_report.md +// --------------------------------------------------------------------------- +inline ankerl::nanobench::Bench make_latency_bench( + size_t n_samples = 1000, + size_t n_warmup = 100) +{ + ankerl::nanobench::Bench b; + b.warmup(n_warmup) + .epochs(n_samples) + .minEpochIterations(1) + .minEpochTime(std::chrono::milliseconds(10)) + .performanceCounters(false) + .output(&nb_report_stream()); + return b; +} + +// Returns a Bench that keeps the measured workload fixed at one operation per +// epoch. This is for destructive or stateful benchmarks where adaptive +// iteration counts would otherwise change the graph shape during the run. +inline ankerl::nanobench::Bench make_single_op_latency_bench( + size_t n_samples = 1000, + size_t n_warmup = 100) +{ + ankerl::nanobench::Bench b; + b.warmup(n_warmup) + .epochs(n_samples) + .epochIterations(1) + .performanceCounters(false) + .output(&nb_report_stream()); + return b; +} + +} // namespace DSR::Benchmark + +#endif // DSR_NANOBENCH_ADAPTER_H diff --git a/benchmarks/core/report_generator.h b/benchmarks/core/report_generator.h new file mode 100644 index 0000000..6831f2a --- /dev/null +++ b/benchmarks/core/report_generator.h @@ -0,0 +1,255 @@ +#ifndef DSR_REPORT_GENERATOR_H +#define DSR_REPORT_GENERATOR_H + +#include +#include +#include +#include +#include +#include "metrics_collector.h" + +namespace DSR::Benchmark { + +class ReportGenerator { +public: + explicit ReportGenerator(std::string output_directory = "results") + : output_directory_(std::move(output_directory)) + {} + + // Export benchmark result to JSON + bool export_json(const BenchmarkResult& result, const std::string& filename = "") { + std::string filepath = generate_filepath(result, filename, ".json"); + std::ofstream out(filepath); + if (!out.is_open()) { + return false; + } + + out << "{\n"; + out << " \"benchmark_name\": " << quote(result.benchmark_name) << ",\n"; + out << " \"timestamp\": " << quote(result.timestamp) << ",\n"; + out << " \"total_duration_ms\": " << result.total_duration.count() << ",\n"; + + // Metadata + out << " \"metadata\": {\n"; + bool first = true; + for (const auto& [key, value] : result.metadata) { + if (!first) out << ",\n"; + out << " " << quote(key) << ": " << quote(value); + first = false; + } + out << "\n },\n"; + + // Metrics + out << " \"metrics\": [\n"; + for (size_t i = 0; i < result.metrics.size(); ++i) { + const auto& m = result.metrics[i]; + out << " {\n"; + out << " \"name\": " << quote(m.name) << ",\n"; + out << " \"category\": " << quote(to_string(m.category)) << ",\n"; + out << " \"value\": " << format_double(m.value) << ",\n"; + out << " \"unit\": " << quote(m.unit); + + if (!m.additional_values.empty()) { + out << ",\n \"additional\": {\n"; + bool first_add = true; + for (const auto& [key, value] : m.additional_values) { + if (!first_add) out << ",\n"; + out << " " << quote(key) << ": " << format_double(value); + first_add = false; + } + out << "\n }"; + } + + if (!m.tags.empty()) { + out << ",\n \"tags\": {\n"; + bool first_tag = true; + for (const auto& [key, value] : m.tags) { + if (!first_tag) out << ",\n"; + out << " " << quote(key) << ": " << quote(value); + first_tag = false; + } + out << "\n }"; + } + + out << "\n }"; + if (i < result.metrics.size() - 1) out << ","; + out << "\n"; + } + out << " ]\n"; + out << "}\n"; + + out.close(); + last_json_path_ = filepath; + return true; + } + + // Export benchmark result to CSV + bool export_csv(const BenchmarkResult& result, const std::string& filename = "") { + std::string filepath = generate_filepath(result, filename, ".csv"); + std::ofstream out(filepath); + if (!out.is_open()) { + return false; + } + + // Header + out << "benchmark_name,timestamp,metric_name,category,value,unit," + << "mean_ns,stddev_ns,min_ns,max_ns,p50_ns,p90_ns,p95_ns,p99_ns,count\n"; + + // Data rows + for (const auto& m : result.metrics) { + out << quote_csv(result.benchmark_name) << "," + << quote_csv(result.timestamp) << "," + << quote_csv(m.name) << "," + << quote_csv(to_string(m.category)) << "," + << format_double(m.value) << "," + << quote_csv(m.unit) << ","; + + // Additional values (latency-specific) + auto get_add = [&m](const std::string& key) -> std::string { + auto it = m.additional_values.find(key); + if (it != m.additional_values.end()) { + return format_double(it->second); + } + return ""; + }; + + out << get_add("mean_ns") << "," + << get_add("stddev_ns") << "," + << get_add("min_ns") << "," + << get_add("max_ns") << "," + << get_add("p50_ns") << "," + << get_add("p90_ns") << "," + << get_add("p95_ns") << "," + << get_add("p99_ns") << "," + << get_add("count") << "\n"; + } + + out.close(); + last_csv_path_ = filepath; + return true; + } + + // Export both JSON and CSV + bool export_all(const BenchmarkResult& result, const std::string& base_filename = "") { + bool json_ok = export_json(result, base_filename); + bool csv_ok = export_csv(result, base_filename); + return json_ok && csv_ok; + } + + // Compare with baseline and generate comparison report + bool compare_with_baseline(const BenchmarkResult& current, + const std::string& baseline_json_path, + double regression_threshold_percent = 10.0) { + // Read baseline JSON (simplified parsing) + std::ifstream baseline_file(baseline_json_path); + if (!baseline_file.is_open()) { + return false; + } + + // For now, just note that comparison is requested + // Full JSON parsing would require nlohmann/json + comparison_requested_ = true; + baseline_path_ = baseline_json_path; + regression_threshold_ = regression_threshold_percent; + + return true; + } + + // Get last generated file paths + [[nodiscard]] const std::string& last_json_path() const { return last_json_path_; } + [[nodiscard]] const std::string& last_csv_path() const { return last_csv_path_; } + + // Set output directory + void set_output_directory(const std::string& dir) { + output_directory_ = dir; + } + +private: + std::string generate_filepath(const BenchmarkResult& result, + const std::string& filename, + const std::string& extension) { + // Ensure directory exists + std::filesystem::create_directories(output_directory_); + + std::string name = filename; + if (name.empty()) { + // Generate filename from benchmark name and timestamp + name = "benchmark_" + sanitize_filename(result.benchmark_name) + + "_" + sanitize_filename(result.timestamp); + } + + // Remove extension if present + if (name.size() > extension.size() && + name.substr(name.size() - extension.size()) == extension) { + name = name.substr(0, name.size() - extension.size()); + } + + return output_directory_ + "/" + name + extension; + } + + static std::string sanitize_filename(const std::string& name) { + std::string result; + for (char c : name) { + if (std::isalnum(c) || c == '_' || c == '-') { + result += c; + } else if (c == ' ' || c == ':' || c == '/') { + result += '_'; + } + } + return result; + } + + static std::string quote(const std::string& s) { + std::string result = "\""; + for (char c : s) { + if (c == '"') result += "\\\""; + else if (c == '\\') result += "\\\\"; + else if (c == '\n') result += "\\n"; + else result += c; + } + result += "\""; + return result; + } + + static std::string quote_csv(const std::string& s) { + if (s.find(',') != std::string::npos || + s.find('"') != std::string::npos || + s.find('\n') != std::string::npos) { + std::string escaped; + for (char c : s) { + if (c == '"') escaped += "\"\""; + else escaped += c; + } + return "\"" + escaped + "\""; + } + return s; + } + + static std::string format_double(double value) { + std::ostringstream oss; + oss << std::setprecision(6) << std::fixed << value; + std::string str = oss.str(); + // Remove trailing zeros + size_t dot_pos = str.find('.'); + if (dot_pos != std::string::npos) { + size_t last_non_zero = str.find_last_not_of('0'); + if (last_non_zero > dot_pos) { + str = str.substr(0, last_non_zero + 1); + } else { + str = str.substr(0, dot_pos); + } + } + return str; + } + + std::string output_directory_; + std::string last_json_path_; + std::string last_csv_path_; + bool comparison_requested_ = false; + std::string baseline_path_; + double regression_threshold_ = 10.0; +}; + +} // namespace DSR::Benchmark + +#endif // DSR_REPORT_GENERATOR_H diff --git a/benchmarks/core/timing_utils.h b/benchmarks/core/timing_utils.h new file mode 100644 index 0000000..51d904e --- /dev/null +++ b/benchmarks/core/timing_utils.h @@ -0,0 +1,301 @@ +#ifndef DSR_TIMING_UTILS_H +#define DSR_TIMING_UTILS_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DSR::Benchmark { + +// Monotonic nanosecond counter for benchmark measurements. +// Uses steady_clock (CLOCK_MONOTONIC on Linux) instead of system_clock so +// that NTP adjustments and settimeofday() cannot produce negative intervals +// or artificially inflate latency samples. +inline uint64_t bench_now() noexcept { + return static_cast( + std::chrono::steady_clock::now().time_since_epoch().count()); +} + +// RAII timer that calls a callback with elapsed nanoseconds on destruction +class ScopedTimer { +public: + using Callback = std::function; + + explicit ScopedTimer(Callback on_complete) + : callback_(std::move(on_complete)) + , start_time_(bench_now()) + {} + + ~ScopedTimer() { + if (callback_) { + uint64_t elapsed = bench_now() - start_time_; + callback_(elapsed); + } + } + + // Disable copy + ScopedTimer(const ScopedTimer&) = delete; + ScopedTimer& operator=(const ScopedTimer&) = delete; + + // Allow move + ScopedTimer(ScopedTimer&& other) noexcept + : callback_(std::move(other.callback_)) + , start_time_(other.start_time_) + { + other.callback_ = nullptr; + } + + ScopedTimer& operator=(ScopedTimer&& other) noexcept { + if (this != &other) { + callback_ = std::move(other.callback_); + start_time_ = other.start_time_; + other.callback_ = nullptr; + } + return *this; + } + + // Get elapsed time without stopping + [[nodiscard]] uint64_t elapsed_ns() const { + return bench_now() - start_time_; + } + + // Cancel the callback + void cancel() { + callback_ = nullptr; + } + +private: + Callback callback_; + uint64_t start_time_; +}; + + +// Statistics from latency measurements +struct LatencyStats { + uint64_t count = 0; + double mean_ns = 0.0; + double stddev_ns = 0.0; + uint64_t min_ns = 0; + uint64_t max_ns = 0; + uint64_t p50_ns = 0; + uint64_t p90_ns = 0; + uint64_t p95_ns = 0; + uint64_t p99_ns = 0; + + // Convenience methods for different units + [[nodiscard]] double mean_us() const { return mean_ns / 1000.0; } + [[nodiscard]] double mean_ms() const { return mean_ns / 1'000'000.0; } + [[nodiscard]] double stddev_us() const { return stddev_ns / 1000.0; } + [[nodiscard]] double stddev_ms() const { return stddev_ns / 1'000'000.0; } + [[nodiscard]] double min_us() const { return min_ns / 1000.0; } + [[nodiscard]] double max_us() const { return max_ns / 1000.0; } + [[nodiscard]] double p50_us() const { return p50_ns / 1000.0; } + [[nodiscard]] double p90_us() const { return p90_ns / 1000.0; } + [[nodiscard]] double p95_us() const { return p95_ns / 1000.0; } + [[nodiscard]] double p99_us() const { return p99_ns / 1000.0; } + [[nodiscard]] double min_ms() const { return min_ns / 1'000'000.0; } + [[nodiscard]] double max_ms() const { return max_ns / 1'000'000.0; } + [[nodiscard]] double p50_ms() const { return p50_ns / 1'000'000.0; } + [[nodiscard]] double p90_ms() const { return p90_ns / 1'000'000.0; } + [[nodiscard]] double p95_ms() const { return p95_ns / 1'000'000.0; } + [[nodiscard]] double p99_ms() const { return p99_ns / 1'000'000.0; } +}; + + +// Collects latency samples and computes statistics +class LatencyTracker { +public: + LatencyTracker() = default; + + // Reserve space for expected samples + explicit LatencyTracker(size_t expected_samples) { + samples_.reserve(expected_samples); + } + + // Record a latency sample in nanoseconds + void record(uint64_t latency_ns) { + samples_.push_back(latency_ns); + stats_valid_ = false; + } + + // Record using ScopedTimer callback pattern + [[nodiscard]] auto recorder() { + return [this](uint64_t latency_ns) { + this->record(latency_ns); + }; + } + + // Create a ScopedTimer that records to this tracker + [[nodiscard]] ScopedTimer scoped_record() { + return ScopedTimer(recorder()); + } + + // Get number of recorded samples + [[nodiscard]] size_t count() const { + return samples_.size(); + } + + // Check if tracker has samples + [[nodiscard]] bool empty() const { + return samples_.empty(); + } + + // Clear all samples + void clear() { + samples_.clear(); + stats_valid_ = false; + } + + // Get raw samples (for export) + [[nodiscard]] const std::vector& samples() const { + return samples_; + } + + // Compute and return statistics + [[nodiscard]] LatencyStats stats() { + if (stats_valid_) { + return cached_stats_; + } + + if (samples_.empty()) { + return LatencyStats{}; + } + + // Sort samples for percentile calculation + std::vector sorted = samples_; + std::sort(sorted.begin(), sorted.end()); + + LatencyStats result; + result.count = sorted.size(); + result.min_ns = sorted.front(); + result.max_ns = sorted.back(); + + // Calculate mean + double sum = std::accumulate(sorted.begin(), sorted.end(), 0.0); + result.mean_ns = sum / static_cast(result.count); + + // Calculate standard deviation + double sq_sum = std::accumulate(sorted.begin(), sorted.end(), 0.0, + [mean = result.mean_ns](double acc, uint64_t val) { + double diff = static_cast(val) - mean; + return acc + diff * diff; + }); + result.stddev_ns = std::sqrt(sq_sum / static_cast(result.count)); + + // Calculate percentiles + result.p50_ns = percentile(sorted, 0.50); + result.p90_ns = percentile(sorted, 0.90); + result.p95_ns = percentile(sorted, 0.95); + result.p99_ns = percentile(sorted, 0.99); + + cached_stats_ = result; + stats_valid_ = true; + return result; + } + +private: + static uint64_t percentile(const std::vector& sorted, double p) { + if (sorted.empty()) return 0; + if (sorted.size() == 1) return sorted[0]; + + double index = p * static_cast(sorted.size() - 1); + size_t lower = static_cast(std::floor(index)); + size_t upper = static_cast(std::ceil(index)); + + if (lower == upper) { + return sorted[lower]; + } + + double fraction = index - static_cast(lower); + return static_cast( + static_cast(sorted[lower]) * (1.0 - fraction) + + static_cast(sorted[upper]) * fraction + ); + } + + std::vector samples_; + LatencyStats cached_stats_; + bool stats_valid_ = false; +}; + + +// Utility function to measure a single operation +template +uint64_t measure_ns(Func&& func) { + uint64_t start = bench_now(); + std::forward(func)(); + return bench_now() - start; +} + +// Utility function to run warmup iterations +template +void warmup(Func&& func, uint32_t iterations) { + for (uint32_t i = 0; i < iterations; ++i) { + std::forward(func)(); + } +} + +struct SampledBenchmarkResult { + LatencyStats latency; + std::chrono::milliseconds wall_time{0}; +}; + +template +SampledBenchmarkResult run_sampled_benchmark( + size_t warmup_iterations, + size_t measurement_iterations, + MeasureFunc&& measure_func, + MaintenanceFunc&& maintenance_func, + size_t maintenance_period = 1) +{ + auto maybe_maintain = [&](size_t iteration) { + if constexpr (std::is_invocable_v) { + if (maintenance_period != 0 && ((iteration + 1) % maintenance_period) == 0) { + maintenance_func(); + } + } + }; + + for (size_t i = 0; i < warmup_iterations; ++i) { + measure_func(); + maybe_maintain(i); + } + + LatencyTracker tracker(measurement_iterations); + auto wall_start = std::chrono::steady_clock::now(); + + for (size_t i = 0; i < measurement_iterations; ++i) { + tracker.record(measure_ns(measure_func)); + maybe_maintain(i); + } + + auto wall_end = std::chrono::steady_clock::now(); + return { + .latency = tracker.stats(), + .wall_time = std::chrono::duration_cast(wall_end - wall_start), + }; +} + +template +SampledBenchmarkResult run_sampled_benchmark( + size_t warmup_iterations, + size_t measurement_iterations, + MeasureFunc&& measure_func) +{ + return run_sampled_benchmark( + warmup_iterations, + measurement_iterations, + std::forward(measure_func), + [] {}, + 0); +} + +} // namespace DSR::Benchmark + +#endif // DSR_TIMING_UTILS_H diff --git a/benchmarks/fixtures/graph_generator.h b/benchmarks/fixtures/graph_generator.h new file mode 100644 index 0000000..f0e6ec5 --- /dev/null +++ b/benchmarks/fixtures/graph_generator.h @@ -0,0 +1,357 @@ +#ifndef DSR_GRAPH_GENERATOR_H +#define DSR_GRAPH_GENERATOR_H + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DSR::Benchmark { + +// Graph topology types +enum class GraphTopology { + Linear, // Chain of nodes + Star, // Hub with spokes + Tree, // Hierarchical tree + FullMesh, // Every node connected to every other + Random // Random connections +}; + + +// Configuration for synthetic graph generation +struct GraphGeneratorConfig { + uint32_t num_nodes = 100; + uint32_t edges_per_node = 2; + GraphTopology topology = GraphTopology::Tree; + std::string node_type = "test_node"; + std::string edge_type = "test_edge"; + bool include_rt_edges = false; + bool include_attributes = true; + uint32_t attributes_per_node = 3; +}; + + +class GraphGenerator { +public: + static constexpr unsigned int DEFAULT_SEED = 0x5A17B3C1u; + + explicit GraphGenerator(unsigned int seed = DEFAULT_SEED) + : rng_(seed) + { + // Ensure test types are registered (safe to call multiple times) + register_test_types(); + } + + // Register test node/edge types - call this before using any DSR operations + static void register_test_types() { + static bool registered = false; + if (!registered) { + node_types::register_type("test_node"); + edge_types::register_type("test_edge"); + registered = true; + } + } + + // Generate a config file with synthetic graph + std::string generate_config_file(const GraphGeneratorConfig& config) { + std::string filename = temp_filename(); + std::ofstream out(filename); + if (!out.is_open()) { + return ""; + } + + out << "{\n"; + out << " \"DSRModel\": {\n"; + out << " \"symbols\": {\n"; + + // Generate root node + out << generate_root_node(); + + // Generate additional nodes based on topology + auto node_ids = generate_node_ids(config.num_nodes); + + for (size_t i = 0; i < node_ids.size(); ++i) { + out << ",\n"; + out << generate_node(node_ids[i], config, i); + } + + out << "\n }\n"; + out << " }\n"; + out << "}\n"; + + out.close(); + return filename; + } + + // Generate small graph (100 nodes) + std::string generate_small_graph() { + GraphGeneratorConfig config; + config.num_nodes = 100; + config.topology = GraphTopology::Tree; + return generate_config_file(config); + } + + // Generate medium graph (1000 nodes) + std::string generate_medium_graph() { + GraphGeneratorConfig config; + config.num_nodes = 1000; + config.topology = GraphTopology::Tree; + return generate_config_file(config); + } + + // Generate large graph (10000 nodes) + std::string generate_large_graph() { + GraphGeneratorConfig config; + config.num_nodes = 10000; + config.topology = GraphTopology::Tree; + config.include_attributes = false; // Reduce size + return generate_config_file(config); + } + + // Generate empty config (just root) + std::string generate_empty_graph() { + std::string filename = temp_filename(); + std::ofstream out(filename); + if (!out.is_open()) { + return ""; + } + + out << "{\n"; + out << " \"DSRModel\": {\n"; + out << " \"symbols\": {\n"; + out << generate_root_node(); + out << "\n }\n"; + out << " }\n"; + out << "}\n"; + + out.close(); + return filename; + } + + // Add nodes directly to an existing graph + void populate_graph(DSRGraph& graph, uint32_t num_nodes, + const std::string& node_type = "test_node") { + uint64_t base_id = 1000; + auto root = graph.get_node_root(); + uint64_t parent_id = root ? root->id() : 100; + + for (uint32_t i = 0; i < num_nodes; ++i) { + DSR::Node node; + node.id(base_id + i); + node.name("bench_node_" + std::to_string(i)); + node.type(node_type); + node.agent_id(graph.get_agent_id()); + + // Add some attributes + graph.add_attrib_local(node, static_cast(i % 10)); + + graph.insert_node(node); + + // Add edge from parent + if (i > 0 && (i % 10) == 0) { + parent_id = base_id + i - 1; + } + + DSR::Edge edge; + edge.from(parent_id); + edge.to(node.id()); + edge.type("test_edge"); + edge.agent_id(graph.get_agent_id()); + graph.insert_or_assign_edge(edge); + } + } + + // Create a node for insertion benchmarks + static DSR::Node create_test_node(uint64_t id, uint32_t agent_id, + const std::string& name = "") { + DSR::Node node; + node.id(id); + node.name(name.empty() ? "test_node_" + std::to_string(id) : name); + node.type("test_node"); + node.agent_id(agent_id); + return node; + } + + // Create an edge for insertion benchmarks + static DSR::Edge create_test_edge(uint64_t from, uint64_t to, + uint32_t agent_id, + const std::string& type = "test_edge") { + DSR::Edge edge; + edge.from(from); + edge.to(to); + edge.type(type); + edge.agent_id(agent_id); + return edge; + } + +private: + std::string temp_filename() { + static std::atomic next_id{0}; + return "/tmp/dsr_bench_" + std::to_string(getpid()) + "_" + + std::to_string(next_id.fetch_add(1, std::memory_order_relaxed)) + ".json"; + } + + std::vector generate_node_ids(uint32_t count) { + std::vector ids; + ids.reserve(count); + for (uint32_t i = 0; i < count; ++i) { + ids.push_back(1000 + i); // Start from 1000 to avoid conflicts + } + return ids; + } + + std::string generate_root_node() { + return R"( "100": { + "attribute": { + "level": { + "type": 1, + "value": 0 + } + }, + "id": "100", + "links": [], + "name": "root", + "type": "root" + })"; + } + + std::string generate_node(uint64_t id, const GraphGeneratorConfig& config, + size_t index) { + std::ostringstream oss; + oss << " \"" << id << "\": {\n"; + + // Attributes + oss << " \"attribute\": {\n"; + oss << " \"level\": {\n"; + oss << " \"type\": 1,\n"; + oss << " \"value\": " << (index % 10 + 1) << "\n"; + oss << " }"; + + if (config.include_attributes) { + for (uint32_t a = 0; a < config.attributes_per_node; ++a) { + oss << ",\n \"attr_" << a << "\": {\n"; + oss << " \"type\": 1,\n"; + oss << " \"value\": " << (rng_() % 1000) << "\n"; + oss << " }"; + } + } + + oss << "\n },\n"; + + // ID and name + oss << " \"id\": \"" << id << "\",\n"; + + // Links (edges) + oss << " \"links\": ["; + auto links = generate_links(id, config, index); + for (size_t i = 0; i < links.size(); ++i) { + if (i > 0) oss << ", "; + oss << "\n" << links[i]; + } + if (!links.empty()) oss << "\n "; + oss << "],\n"; + + // Name and type + oss << " \"name\": \"node_" << id << "\",\n"; + oss << " \"type\": \"" << config.node_type << "\"\n"; + oss << " }"; + + return oss.str(); + } + + std::vector generate_links(uint64_t from_id, + const GraphGeneratorConfig& config, + size_t index) { + std::vector links; + + // Always link back to root for tree topology + if (config.topology == GraphTopology::Tree && index == 0) { + links.push_back(generate_link(from_id, 100, config.edge_type, config.include_rt_edges)); + } + + // Generate additional links based on topology + switch (config.topology) { + case GraphTopology::Linear: + if (index > 0) { + links.push_back(generate_link(from_id, 1000 + index - 1, + config.edge_type, config.include_rt_edges)); + } else { + links.push_back(generate_link(from_id, 100, + config.edge_type, config.include_rt_edges)); + } + break; + + case GraphTopology::Star: + links.push_back(generate_link(from_id, 100, + config.edge_type, config.include_rt_edges)); + break; + + case GraphTopology::Tree: { + // Each node links to its parent in tree + uint64_t parent_id = (index == 0) ? 100 : (1000 + (index - 1) / 2); + links.push_back(generate_link(from_id, parent_id, + config.edge_type, config.include_rt_edges)); + break; + } + + case GraphTopology::FullMesh: + // Limited to avoid explosion + for (uint64_t target = 1000; target < from_id && links.size() < 5; ++target) { + links.push_back(generate_link(from_id, target, + config.edge_type, config.include_rt_edges)); + } + break; + + case GraphTopology::Random: { + std::uniform_int_distribution count_dist(1, config.edges_per_node); + std::uniform_int_distribution id_dist(100, 1000 + index - 1); + uint32_t num_links = (index == 0) ? 1 : count_dist(rng_); + for (uint32_t i = 0; i < num_links; ++i) { + uint64_t target = (index == 0) ? 100 : id_dist(rng_); + links.push_back(generate_link(from_id, target, + config.edge_type, config.include_rt_edges)); + } + break; + } + } + + return links; + } + + std::string generate_link(uint64_t from, uint64_t to, + const std::string& type, bool include_rt) { + std::ostringstream oss; + oss << " {\n"; + oss << " \"dst\": \"" << to << "\",\n"; + oss << " \"label\": \"" << type << "\",\n"; + oss << " \"linkAttribute\": {"; + + if (include_rt && type == "RT") { + oss << R"( + "rt_rotation_euler_xyz": { + "type": 3, + "value": [0, 0, 0] + }, + "rt_translation": { + "type": 3, + "value": [0, 0, 0] + })"; + } + + oss << "},\n"; + oss << " \"src\": \"" << from << "\"\n"; + oss << " }"; + + return oss.str(); + } + + std::mt19937 rng_; +}; + +} // namespace DSR::Benchmark + +#endif // DSR_GRAPH_GENERATOR_H diff --git a/benchmarks/fixtures/multi_agent_fixture.h b/benchmarks/fixtures/multi_agent_fixture.h new file mode 100644 index 0000000..46e9f32 --- /dev/null +++ b/benchmarks/fixtures/multi_agent_fixture.h @@ -0,0 +1,275 @@ +#ifndef DSR_MULTI_AGENT_FIXTURE_H +#define DSR_MULTI_AGENT_FIXTURE_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../core/benchmark_config.h" +#include "../core/timing_utils.h" + +namespace DSR::Benchmark { + +// Agent info for tracking +struct AgentInfo { + uint32_t id; + std::string name; + std::unique_ptr graph; + std::atomic participants_matched{0}; +}; + + +// Forward declaration for type registration +class GraphGenerator; + +// Reusable multi-agent test fixture +class MultiAgentFixture { +public: + explicit MultiAgentFixture(const BenchmarkConfig& config = default_config()) + : config_(config) + { + // Ensure test types are registered before any DSR operations + register_benchmark_types(); + } + + // Register node/edge types needed by benchmarks + static void register_benchmark_types() { + static bool registered = false; + if (!registered) { + node_types::register_type("test_node"); + edge_types::register_type("test_edge"); + registered = true; + } + } + + ~MultiAgentFixture() { + cleanup(); + } + + // Disable copy + MultiAgentFixture(const MultiAgentFixture&) = delete; + MultiAgentFixture& operator=(const MultiAgentFixture&) = delete; + + // Create N agent instances with DSRGraph + // First agent loads from config_file, others sync via DDS + bool create_agents(uint32_t num_agents, const std::string& config_file) { + if (num_agents == 0 || num_agents > config_.max_agent_count) { + qWarning("Can't create agents"); + return false; + } + if (config_file.empty()) { + qWarning("create_agents: config_file is empty — graph generator likely failed to write to /tmp (check permissions)"); + return false; + } + + // Keep agent IDs deterministic while remaining disjoint across fixture + // instances in the same process. + static std::atomic next_base_agent_id{1000}; + base_agent_id_ = next_base_agent_id.fetch_add(config_.max_agent_count + 1, + std::memory_order_relaxed); + + agents_.clear(); + agents_.reserve(num_agents); + + // Create first agent with config file (it defines the initial graph) + { + auto agent = std::make_unique(); + agent->id = base_agent_id_; + agent->name = "bench_agent_0"; + + try { + agent->graph = std::make_unique( + agent->name, + agent->id, + config_file, + true + ); + agents_.push_back(std::move(agent)); + } catch (const std::exception& e) { + qWarning("Failed to create primary agent: %s", e.what()); + return false; + } + } + + // Small delay for DDS to initialize primary agent + process_events(50); + + // Create additional agents WITHOUT config file - they sync via DDS + for (uint32_t i = 1; i < num_agents; ++i) { + auto agent = std::make_unique(); + agent->id = base_agent_id_ + i; + agent->name = "bench_agent_" + std::to_string(i); + + try { + // No config file - agent receives graph from DDS + agent->graph = std::make_unique( + agent->name, + agent->id, + std::string{}, + true + ); + agents_.push_back(std::move(agent)); + } catch (const std::exception& e) { + qWarning("Failed to create agent %u: %s", i, e.what()); + return false; + } + + // Process events after each agent creation + process_events(20); + } + + return true; + } + + // Wait for DDS synchronization between agents + // Actively processes events while waiting + void wait_for_sync(std::chrono::milliseconds wait_time = std::chrono::milliseconds{0}) { + if (wait_time.count() == 0) { + wait_time = config_.sync_wait_time; + } + + auto start = std::chrono::steady_clock::now(); + while (std::chrono::steady_clock::now() - start < wait_time) { + process_events(10); + } + } + + // Verify all agents have converged to same state + bool verify_convergence(std::chrono::seconds timeout = std::chrono::seconds{0}) { + if (timeout.count() == 0) { + timeout = config_.max_convergence_timeout; + } + + if (agents_.size() < 2) { + return true; // Single agent is always converged + } + + auto start = std::chrono::steady_clock::now(); + + while (std::chrono::steady_clock::now() - start < timeout) { + if (check_node_convergence()) { + return true; + } + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + process_events(); + } + + return false; + } + + // Measure time to convergence + std::chrono::milliseconds measure_convergence_time() { + auto start = std::chrono::steady_clock::now(); + + while (!check_node_convergence()) { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + process_events(); + + auto elapsed = std::chrono::steady_clock::now() - start; + if (elapsed > config_.max_convergence_timeout) { + return std::chrono::milliseconds{-1}; // Timeout + } + } + + return std::chrono::duration_cast( + std::chrono::steady_clock::now() - start); + } + + // Get agent by index + DSRGraph* get_agent(size_t index) { + if (index < agents_.size()) { + return agents_[index]->graph.get(); + } + return nullptr; + } + + // Get agent info by index + AgentInfo* get_agent_info(size_t index) { + if (index < agents_.size()) { + return agents_[index].get(); + } + return nullptr; + } + + // Get number of agents + [[nodiscard]] size_t agent_count() const { + return agents_.size(); + } + + // Connect signal handler to all agents + template + void connect_all(Signal signal, Slot slot) { + for (auto& agent : agents_) { + QObject::connect(agent->graph.get(), signal, slot, Qt::QueuedConnection); + } + } + + // Process Qt events (for signal delivery) + void process_events(int timeout_ms = 10) { + auto* app = QCoreApplication::instance(); + if (app) { + app->processEvents(QEventLoop::AllEvents, timeout_ms); + } + } + + // Run event loop for specified duration + void run_event_loop(std::chrono::milliseconds duration) { + auto* app = QCoreApplication::instance(); + if (!app) return; + + QEventLoop loop; + QTimer::singleShot(duration.count(), &loop, &QEventLoop::quit); + loop.exec(); + } + + // Cleanup all agents + void cleanup() { + agents_.clear(); + } + + // Get number of agents + [[nodiscard]] size_t size() const { + return agents_.size(); + } + +private: + bool check_node_convergence() { + if (agents_.size() < 2) return true; + + auto& first_graph = agents_[0]->graph; + auto first_nodes = first_graph->get_nodes(); + + for (size_t i = 1; i < agents_.size(); ++i) { + auto nodes = agents_[i]->graph->get_nodes(); + if (nodes.size() != first_nodes.size()) { + return false; + } + + // Check each node exists in the other graph + for (const auto& node : first_nodes) { + auto other_node = agents_[i]->graph->get_node(node.id()); + if (!other_node.has_value()) { + return false; + } + } + } + + return true; + } + + BenchmarkConfig config_; + uint32_t base_agent_id_ = 0; + std::vector> agents_; +}; + +} // namespace DSR::Benchmark + +#endif // DSR_MULTI_AGENT_FIXTURE_H diff --git a/benchmarks/flamegraph.sh b/benchmarks/flamegraph.sh new file mode 100755 index 0000000..6067101 --- /dev/null +++ b/benchmarks/flamegraph.sh @@ -0,0 +1,223 @@ +#!/usr/bin/env bash +# flamegraph.sh - generate a per-benchmark flamegraph SVG using perf. +# +# Usage: +# ./flamegraph.sh [OPTIONS] [FILTER] +# +# Options: +# -b BINARY Path to dsr_benchmarks (default: ./build/dsr_benchmarks) +# -o OUTPUT Output root directory for run subdirectories +# (default: ./results/flamegraphs) +# -F FREQ perf sampling frequency in Hz (default: 999) +# -k Keep raw perf.data files (deleted by default) +# -l List matching profile targets and exit +# -p PRESET Built-in preset: load, multiagent, profile +# -r RUN_ID Run directory name under OUTPUT +# (default: flamegraph-YYYYMMDD-HHMMSS) +# -h Show this help +# +# FILTER is forwarded to Catch2 as a tag expression or exact test name, e.g.: +# ./flamegraph.sh "Signal emission under load" +# ./flamegraph.sh "[PROFILE][LOAD]" +# ./flamegraph.sh -p multiagent +# ./flamegraph.sh -l -p profile +# +# The script intentionally does not default to "all benchmarks". Pass an exact +# benchmark name, a Catch2 tag expression, or a preset for scoped profiling. +# +# Requirements: +# 1. perf +# sudo apt install linux-tools-common linux-tools-$(uname -r) +# +# 2. FlameGraph scripts (flamegraph.pl + stackcollapse-perf.pl) +# git clone https://github.com/brendangregg/FlameGraph /opt/FlameGraph +# export FG_DIR=/opt/FlameGraph +# Either add them to PATH or set FG_DIR before running this script. +# +# 3. perf_event_paranoia - perf needs read access to kernel symbols. +# If perf says "Permission denied" or produces empty stacks, lower the +# paranoia level (resets on reboot): +# echo 1 | sudo tee /proc/sys/kernel/perf_event_paranoia +# To make it permanent: +# echo 'kernel.perf_event_paranoia = 1' | sudo tee /etc/sysctl.d/99-perf.conf +# sudo sysctl --system +# +# 4. Debug symbols - for meaningful stack frames the binary should be built +# with frame pointers or DWARF info. The CMake target already passes -g. + +set -euo pipefail + +BINARY="./build/dsr_benchmarks" +OUTROOT="./results/flamegraphs" +FREQ=999 +KEEP_DATA=0 +LIST_ONLY=0 +PRESET="" +FILTER="" +RUN_ID="flamegraph-$(date +%Y%m%d-%H%M%S)" + +while getopts "b:o:F:klp:r:h" opt; do + case "$opt" in + b) BINARY="$OPTARG" ;; + o) OUTROOT="$OPTARG" ;; + F) FREQ="$OPTARG" ;; + k) KEEP_DATA=1 ;; + l) LIST_ONLY=1 ;; + p) PRESET="$OPTARG" ;; + r) RUN_ID="$OPTARG" ;; + h) + sed -n '2,/^set -/p' "$0" | grep '^#' | sed 's/^# \{0,1\}//' + exit 0 + ;; + *) echo "Unknown option: -$OPTARG" >&2; exit 1 ;; + esac +done +shift $((OPTIND - 1)) +FILTER="${1:-}" + +preset_to_filter() { + case "$1" in + load) echo "[PROFILE][LOAD]" ;; + multiagent) echo "[PROFILE][MULTIAGENT]" ;; + profile) echo "[PROFILE]" ;; + *) + echo "ERROR: unknown preset '$1' (expected: load, multiagent, profile)" >&2 + exit 1 + ;; + esac +} + +find_tool() { + local name="$1" + + if [[ -n "${FG_DIR:-}" && -x "${FG_DIR}/${name}" ]]; then + echo "${FG_DIR}/${name}" + return + fi + + if command -v "$name" >/dev/null 2>&1; then + command -v "$name" + return + fi + + for p in /usr/share/FlameGraph /opt/FlameGraph "$HOME/FlameGraph"; do + if [[ -x "${p}/${name}" ]]; then + echo "${p}/${name}" + return + fi + done + + echo "" +} + +if [[ -n "$PRESET" && -n "$FILTER" ]]; then + echo "ERROR: use either -p PRESET or a FILTER argument, not both" >&2 + exit 1 +fi + +if [[ -n "$PRESET" ]]; then + FILTER="$(preset_to_filter "$PRESET")" +fi + +if [[ -z "$FILTER" ]]; then + cat >&2 <<'EOF' +ERROR: a benchmark filter is required. +Examples: + ./flamegraph.sh "Signal emission under load" + ./flamegraph.sh "[PROFILE][LOAD]" + ./flamegraph.sh -p multiagent + ./flamegraph.sh -l -p profile +EOF + exit 1 +fi + +[[ -x "$BINARY" ]] || { echo "ERROR: binary not found or not executable: $BINARY" >&2; exit 1; } +command -v perf >/dev/null 2>&1 || { echo "ERROR: perf not found" >&2; exit 1; } + +OUTDIR="${OUTROOT}/${RUN_ID}" +mkdir -p "$OUTDIR" + +mapfile -t TEST_NAMES < <( + "$BINARY" --list-tests --verbosity quiet "$FILTER" 2>/dev/null \ + | sed 's/\r$//' \ + | grep -v '^[[:space:]]' \ + | grep -v '^All available test cases:' \ + | grep -v '^[0-9][0-9]* test cases$' \ + | grep -v '^$' +) + +if [[ ${#TEST_NAMES[@]} -eq 0 ]]; then + echo "No tests matched filter: '${FILTER}'" >&2 + echo "Run '$BINARY --list-tests' to see available tests." >&2 + exit 1 +fi + +if [[ $LIST_ONLY -eq 1 ]]; then + printf '%s\n' "${TEST_NAMES[@]}" + exit 0 +fi + +COLLAPSE="$(find_tool stackcollapse-perf.pl)" +FLAMEGRAPH="$(find_tool flamegraph.pl)" + +if [[ -z "$COLLAPSE" || -z "$FLAMEGRAPH" ]]; then + cat >&2 <<'EOF' +ERROR: FlameGraph tools not found. +Install Brendan Gregg's FlameGraph scripts: + git clone https://github.com/brendangregg/FlameGraph /opt/FlameGraph + export FG_DIR=/opt/FlameGraph +or set FG_DIR to the directory containing flamegraph.pl and stackcollapse-perf.pl. +EOF + exit 1 +fi + +echo "Found ${#TEST_NAMES[@]} test(s) to profile." +echo "Output: $OUTDIR" +echo + +PASS=0 +FAIL=0 + +for name in "${TEST_NAMES[@]}"; do + safe="$(echo "$name" | tr -cs 'A-Za-z0-9_-' '_' | sed 's/_\+/_/g; s/^_//; s/_$//')" + + perf_data="${OUTDIR}/${safe}.perf.data" + svg_out="${OUTDIR}/${safe}.svg" + perf_tmp="${perf_data}.tmp.$$" + svg_tmp="${svg_out}.tmp.$$" + + echo "-- $name" + + if perf record \ + -F "$FREQ" \ + -g \ + --call-graph dwarf \ + -o "$perf_tmp" \ + -- "$BINARY" "$name" 2>/dev/null; then + + perf script -i "$perf_tmp" 2>/dev/null \ + | perl "$COLLAPSE" --inline \ + | perl "$FLAMEGRAPH" --title "$name" \ + > "$svg_tmp" + + mv -f "$svg_tmp" "$svg_out" + + echo " -> $svg_out" + ((PASS++)) || true + else + echo " x perf record failed" >&2 + rm -f "$perf_tmp" "$svg_tmp" + ((FAIL++)) || true + continue + fi + + if [[ $KEEP_DATA -eq 0 && -f "$perf_tmp" ]]; then + rm -f "$perf_tmp" + elif [[ -f "$perf_tmp" ]]; then + mv -f "$perf_tmp" "$perf_data" + fi +done + +echo +echo "Done: $PASS succeeded, $FAIL failed." +[[ $FAIL -eq 0 ]] diff --git a/benchmarks/latency/crdt_join_bench.cpp b/benchmarks/latency/crdt_join_bench.cpp new file mode 100644 index 0000000..8d7e6fc --- /dev/null +++ b/benchmarks/latency/crdt_join_bench.cpp @@ -0,0 +1,217 @@ +#include +#include + +#include +#include +#include "../core/nanobench_adapter.h" +#include "../core/metrics_collector.h" +#include "../core/report_generator.h" + +using namespace DSR::Benchmark; + +// Create a test attribute +static DSR::CRDTAttribute make_test_attribute(uint32_t agent_id, int32_t value) { + DSR::CRDTAttribute attr; + attr.value(value); + attr.timestamp(bench_now()); + attr.agent_id(agent_id); + return attr; +} + +// All four mvreg operations in a single TEST_CASE so they export together +// to one JSON file. +TEST_CASE("CRDT mvreg operations", "[CRDT][mvreg][BASELINE]") { + MetricsCollector collector("crdt_mvreg"); + collector.add_metadata("profile", "baseline"); + + // ── mvreg write ─────────────────────────────────────────────────────────── + { + mvreg reg; + reg.id = 100; + int i = 0; + + auto bench = make_latency_bench(); + bench.run("mvreg_write", [&] { + auto attr = make_test_attribute(100, i++); + auto delta = reg.write(attr); + ankerl::nanobench::doNotOptimizeAway(delta); + }); + collector.record_latency_stats("mvreg_write", nb_to_stats(bench)); + } + + // ── mvreg join (same agent) ─────────────────────────────────────────────── + { + mvreg reg; + reg.id = 100; + auto init_attr = make_test_attribute(100, 0); + reg.write(init_attr); + int i = 0; + + auto bench = make_latency_bench(); + bench.run("mvreg_join_same_agent", [&] { + mvreg delta_reg; + delta_reg.id = 100; + auto new_attr = make_test_attribute(100, i++); + auto delta = delta_reg.write(new_attr); + reg.join(std::move(delta)); + ankerl::nanobench::doNotOptimizeAway(reg); + }); + collector.record_latency_stats("mvreg_join_same_agent", nb_to_stats(bench)); + } + + // ── mvreg join (different agents) ──────────────────────────────────────── + { + int i = 0; + + auto bench = make_latency_bench(); + bench.run("mvreg_join_different_agent", [&] { + mvreg reg; + reg.id = 100; + auto attr = make_test_attribute(100, 0); + auto delta = reg.write(attr); + + uint32_t other_agent = 200 + (i % 10); + mvreg delta_reg; + delta_reg.id = other_agent; + delta_reg.join(std::move(delta)); + auto new_attr = make_test_attribute(other_agent, i * 2); + delta = delta_reg.write(new_attr); + + reg.join(std::move(delta)); + ankerl::nanobench::doNotOptimizeAway(reg); + ++i; + }); + collector.record_latency_stats("mvreg_join_different_agent", nb_to_stats(bench)); + } + + // ── mvreg read ──────────────────────────────────────────────────────────── + { + mvreg reg; + reg.id = 100; + auto attr = make_test_attribute(100, 42); + reg.write(attr); + + // Read is pure — no warmup needed (cache already warm after write) + auto bench = make_latency_bench(1000, 0); + bench.minEpochIterations(10); + bench.run("mvreg_read", [&] { + const auto& value = reg.read_reg(); + ankerl::nanobench::doNotOptimizeAway(value); + }); + collector.record_latency_stats("mvreg_read", nb_to_stats(bench)); + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "crdt_mvreg"); +} + +TEST_CASE("CRDT dot_context operations", "[CRDT][dot_context][BASELINE]") { + MetricsCollector collector("crdt_dot_context"); + collector.add_metadata("profile", "baseline"); + + // ── makedot ─────────────────────────────────────────────────────────────── + { + dot_context ctx; + int i = 0; + + auto bench = make_latency_bench(); + bench.minEpochIterations(10); + bench.run("dot_context_makedot", [&] { + auto dot = ctx.makedot(100 + (i++ % 10)); + ankerl::nanobench::doNotOptimizeAway(dot); + }); + collector.record_latency_stats("dot_context_makedot", nb_to_stats(bench)); + } + + // ── dotin ───────────────────────────────────────────────────────────────── + { + dot_context ctx; + for (int i = 0; i < 100; ++i) ctx.makedot(100 + (i % 10)); + int i = 0; + + auto bench = make_latency_bench(1000, 0); + bench.minEpochIterations(10); + bench.run("dot_context_dotin", [&] { + std::pair dot{100 + (i++ % 10), i % 50}; + bool r = ctx.dotin(dot); + ankerl::nanobench::doNotOptimizeAway(r); + }); + collector.record_latency_stats("dot_context_dotin", nb_to_stats(bench)); + } + + // ── join ────────────────────────────────────────────────────────────────── + { + auto bench = make_latency_bench(); + bench.run("dot_context_join", [&] { + dot_context ctx1; + dot_context ctx2; + for (int j = 0; j < 10; ++j) { + ctx1.makedot(100); + ctx2.makedot(200); + } + ctx1.join(ctx2); + ankerl::nanobench::doNotOptimizeAway(ctx1); + }); + collector.record_latency_stats("dot_context_join", nb_to_stats(bench)); + } + + // ── compact ─────────────────────────────────────────────────────────────── + { + auto bench = make_latency_bench(); + bench.run("dot_context_compact", [&] { + dot_context ctx; + for (int j = 0; j < 50; ++j) ctx.insertdot({100, j * 2}, false); + ctx.compact(); + ankerl::nanobench::doNotOptimizeAway(ctx); + }); + collector.record_latency_stats("dot_context_compact", nb_to_stats(bench)); + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "crdt_dot_context"); +} + +// Catch2 BENCHMARK macros — kept hidden; run with [!benchmark] to activate. +TEST_CASE("CRDT micro-benchmarks (Catch2 BENCHMARK)", "[.][crdt][!benchmark]") { + + BENCHMARK("mvreg write") { + mvreg reg; + reg.id = 100; + auto attr = make_test_attribute(100, 42); + return reg.write(attr); + }; + + BENCHMARK("mvreg join") { + mvreg reg; + reg.id = 100; + auto attr1 = make_test_attribute(100, 1); + auto delta = reg.write(attr1); + + mvreg delta_reg; + delta_reg.id = 200; + delta_reg.join(std::move(delta)); + auto attr2 = make_test_attribute(200, 2); + delta = delta_reg.write(attr2); + + reg.join(std::move(delta)); + return reg.read_reg(); + }; + + BENCHMARK("dot_context makedot") { + dot_context ctx; + return ctx.makedot(100); + }; + + BENCHMARK("dot_context join") { + dot_context ctx1; + dot_context ctx2; + for (int i = 0; i < 10; ++i) { + ctx1.makedot(100); + ctx2.makedot(200); + } + ctx1.join(ctx2); + return ctx1.cc.size(); + }; +} diff --git a/benchmarks/latency/delta_propagation_bench.cpp b/benchmarks/latency/delta_propagation_bench.cpp new file mode 100644 index 0000000..b301352 --- /dev/null +++ b/benchmarks/latency/delta_propagation_bench.cpp @@ -0,0 +1,338 @@ +#include +#include +#include +#include + +#include "../core/timing_utils.h" +#include "../core/metrics_collector.h" +#include "../core/report_generator.h" +#include "../fixtures/multi_agent_fixture.h" +#include "../fixtures/graph_generator.h" + +using namespace DSR; +using namespace DSR::Benchmark; + +// Multi-agent tests require working DDS synchronization +// Skip these by default - run with "[delta]" tag explicitly to test +TEST_CASE("Delta propagation latency between agents", "[LATENCY][delta][.multi][PROFILE][MULTIAGENT]") { + // Setup + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("delta_propagation"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(2, config_file)); + + // Wait for DDS discovery and initial sync + fixture.wait_for_sync(std::chrono::milliseconds(500)); + REQUIRE(fixture.verify_convergence(std::chrono::seconds(10))); + + auto* agent_a = fixture.get_agent(0); + auto* agent_b = fixture.get_agent(1); + REQUIRE(agent_a != nullptr); + REQUIRE(agent_b != nullptr); + + SECTION("Node insertion propagation latency") { + LatencyTracker tracker(100); + std::atomic receive_time{0}; + std::atomic received{false}; + std::atomic expected_node_id{0}; + + // Connect to agent B's signal + QObject::connect(agent_b, &DSR::DSRGraph::update_node_signal, agent_b, + [&](uint64_t id, const std::string& type, DSR::SignalInfo) { + if (id == expected_node_id.load(std::memory_order_acquire)) { + receive_time.store(get_unix_timestamp()); + received.store(true); + } + }, Qt::DirectConnection); + + // Warmup + for (int i = 0; i < 10; ++i) { + auto node = GraphGenerator::create_test_node( + 2000 + i, agent_a->get_agent_id(), "warmup_" + std::to_string(i)); + agent_a->insert_node(node); + fixture.wait_for_sync(std::chrono::milliseconds(50)); + } + + // Measurement iterations + for (int i = 0; i < 100; ++i) { + received.store(false); + + auto node = GraphGenerator::create_test_node( + expected_node_id, agent_a->get_agent_id(), + "bench_node_" + std::to_string(i)); + + uint64_t send_time = get_unix_timestamp(); + auto ins_result = agent_a->insert_node(node); + REQUIRE(ins_result.has_value()); + expected_node_id.store(ins_result.value(), std::memory_order_release); + + // Wait for signal with timeout + auto start = std::chrono::steady_clock::now(); + while (!received.load()) { + fixture.process_events(1); + if (std::chrono::steady_clock::now() - start > std::chrono::seconds(5)) { + FAIL("Timeout waiting for node propagation"); + } + } + + uint64_t latency = receive_time.load() - send_time; + tracker.record(latency); + } + + auto stats = tracker.stats(); + collector.record_latency_stats("node_propagation", stats); + + INFO("Node propagation latency - Mean: " << stats.mean_us() << " us, " + << "P99: " << stats.p99_us() << " us"); + + // Validation + CHECK(stats.p99_ns < MAX_EXPECTED_LATENCY_NS); + } + + SECTION("Edge insertion propagation latency") { + LatencyTracker tracker(100); + std::atomic receive_time{0}; + std::atomic received{false}; + + // First create nodes on agent A + auto root = agent_a->get_node_root(); + REQUIRE(root.has_value()); + + std::vector node_to_ids = {}; + + for (int i = 0; i < 110; ++i) { + auto node = GraphGenerator::create_test_node( + 4000 + i, agent_a->get_agent_id(), "edge_node_" + std::to_string(i)); + auto ins = agent_a->insert_node(node); + REQUIRE(ins.has_value()); + node_to_ids.push_back(ins.value()); + } + + // Wait for all nodes to sync to agent B before creating edges + fixture.wait_for_sync(std::chrono::milliseconds(500)); + REQUIRE(fixture.verify_convergence(std::chrono::seconds(10))); + + // Connect to agent B's edge signal + std::atomic expected_from{0}; + std::atomic expected_to{0}; + QObject::connect(agent_b, &DSR::DSRGraph::update_edge_signal, agent_b, + [&](uint64_t from, uint64_t to, const std::string& type, DSR::SignalInfo) { + if (from == expected_from.load(std::memory_order_acquire) && + to == expected_to.load(std::memory_order_acquire)) { + receive_time.store(get_unix_timestamp()); + received.store(true); + } + }, Qt::DirectConnection); + + // Warmup + for (int i = 0; i < 10; ++i) { + auto edge = GraphGenerator::create_test_edge( + root->id(), node_to_ids[i], agent_a->get_agent_id()); + agent_a->insert_or_assign_edge(edge); + fixture.wait_for_sync(std::chrono::milliseconds(50)); + } + + // Measurement iterations + for (int i = 10; i < 110; ++i) { + expected_from.store(root->id(), std::memory_order_release); + expected_to.store(node_to_ids[i], std::memory_order_release); + received.store(false); + + auto edge = GraphGenerator::create_test_edge( + expected_from, expected_to, agent_a->get_agent_id()); + + uint64_t send_time = get_unix_timestamp(); + agent_a->insert_or_assign_edge(edge); + + // Wait for signal with timeout + auto start = std::chrono::steady_clock::now(); + while (!received.load()) { + fixture.process_events(1); + if (std::chrono::steady_clock::now() - start > std::chrono::seconds(5)) { + FAIL("Timeout waiting for edge propagation"); + } + } + + uint64_t latency = receive_time.load() - send_time; + tracker.record(latency); + } + + auto stats = tracker.stats(); + collector.record_latency_stats("edge_propagation", stats); + + INFO("Edge propagation latency - Mean: " << stats.mean_us() << " us, " + << "P99: " << stats.p99_us() << " us"); + + CHECK(stats.p99_ns < MAX_EXPECTED_LATENCY_NS); + } + + SECTION("Attribute update propagation latency") { + LatencyTracker tracker(100); + std::atomic receive_time{0}; + std::atomic received{false}; + + // Create a node for attribute updates + auto test_node = GraphGenerator::create_test_node( + 5000, agent_a->get_agent_id(), "attr_test_node"); + auto insert_result = agent_a->insert_node(test_node); + REQUIRE(insert_result.has_value()); + + // Wait for sync to agent B + fixture.wait_for_sync(std::chrono::milliseconds(500)); + REQUIRE(fixture.verify_convergence(std::chrono::seconds(10))); + + // Verify node exists on agent A + auto check_node = agent_a->get_node(*insert_result); + REQUIRE(check_node.has_value()); + + // Connect to agent B's attribute signal + QObject::connect(agent_b, &DSR::DSRGraph::update_node_attr_signal, agent_b, + [&](uint64_t id, const std::vector& att_names, DSR::SignalInfo) { + if (id == *insert_result) { + receive_time.store(get_unix_timestamp()); + received.store(true); + } + }, Qt::DirectConnection); + + // Warmup + for (int i = 0; i < 10; ++i) { + auto node = agent_a->get_node(*insert_result); + if (node) { + agent_a->add_or_modify_attrib_local(*node, static_cast(i)); + agent_a->update_node(*node); + } + fixture.wait_for_sync(std::chrono::milliseconds(50)); + } + + // Measurement iterations + for (int i = 0; i < 100; ++i) { + received.store(false); + + auto node = agent_a->get_node(*insert_result); + REQUIRE(node.has_value()); + + agent_a->add_or_modify_attrib_local(*node, static_cast(1000 + i)); + + uint64_t send_time = get_unix_timestamp(); + agent_a->update_node(*node); + + // Wait for signal with timeout + auto start = std::chrono::steady_clock::now(); + while (!received.load()) { + fixture.process_events(1); + if (std::chrono::steady_clock::now() - start > std::chrono::seconds(5)) { + FAIL("Timeout waiting for attribute propagation"); + } + } + + uint64_t latency = receive_time.load() - send_time; + tracker.record(latency); + } + + auto stats = tracker.stats(); + collector.record_latency_stats("attribute_propagation", stats); + + INFO("Attribute propagation latency - Mean: " << stats.mean_us() << " us, " + << "P99: " << stats.p99_us() << " us"); + + CHECK(stats.p99_ns < MAX_EXPECTED_LATENCY_NS); + } + + // Export results + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "delta_propagation"); +} + +TEST_CASE("Delta propagation with varying agent counts", "[LATENCY][delta][scalability][.multi][PROFILE][MULTIAGENT]") { + MetricsCollector collector("delta_propagation_scaling"); + GraphGenerator generator; + + for (uint32_t num_agents : {2, 4, 8}) { + SECTION("With " + std::to_string(num_agents) + " agents") { + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + + if (!fixture.create_agents(num_agents, config_file)) { + WARN("Could not create " << num_agents << " agents, skipping"); + continue; + } + + // Wait for DDS discovery with all agents + fixture.wait_for_sync(std::chrono::milliseconds(500 * num_agents)); + if (!fixture.verify_convergence(std::chrono::seconds(15))) { + WARN("Agents failed to converge, skipping"); + continue; + } + + auto* sender = fixture.get_agent(0); + REQUIRE(sender != nullptr); + + LatencyTracker tracker(50); + + // Track reception across all other agents + std::atomic received_count{0}; + std::vector> receive_times(num_agents - 1); + std::atomic current_expected_id{0}; + + for (size_t i = 1; i < num_agents; ++i) { + auto* receiver = fixture.get_agent(i); + QObject::connect(receiver, &DSR::DSRGraph::update_node_signal, receiver, + [&, idx = i - 1](uint64_t id, const std::string& type, DSR::SignalInfo) { + if (id == current_expected_id.load()) { + receive_times[idx].store(get_unix_timestamp()); + received_count.fetch_add(1); + } + }, Qt::DirectConnection); + } + + // Measurement + for (int i = 0; i < 50; ++i) { + received_count.store(0); + for (auto& rt : receive_times) rt.store(0); + + auto node = GraphGenerator::create_test_node( + 0, sender->get_agent_id(), + "scale_node_" + std::to_string(i)); + + uint64_t send_time = get_unix_timestamp(); + auto result = sender->insert_node(node); + REQUIRE(result.has_value()); + current_expected_id.store(result.value()); + + // Wait for all receivers + auto start = std::chrono::steady_clock::now(); + while (received_count.load() < num_agents - 1) { + fixture.process_events(1); + if (std::chrono::steady_clock::now() - start > std::chrono::seconds(10)) { + break; + } + } + + // Record max latency (time for all to receive) + uint64_t max_receive = 0; + for (const auto& rt : receive_times) { + max_receive = std::max(max_receive, rt.load()); + } + if (max_receive > 0) { + tracker.record(max_receive - send_time); + } + } + + auto stats = tracker.stats(); + collector.record_latency_stats( + "propagation_" + std::to_string(num_agents) + "_agents", + stats, + {{"num_agents", std::to_string(num_agents)}}); + + INFO(num_agents << " agents - Mean: " << stats.mean_us() << " us, " + << "P99: " << stats.p99_us() << " us"); + } + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "delta_propagation_scaling"); +} diff --git a/benchmarks/latency/signal_latency_bench.cpp b/benchmarks/latency/signal_latency_bench.cpp new file mode 100644 index 0000000..0f95b47 --- /dev/null +++ b/benchmarks/latency/signal_latency_bench.cpp @@ -0,0 +1,261 @@ +#include +#include + +#include "../core/nanobench_adapter.h" +#include "../core/timing_utils.h" +#include "../core/metrics_collector.h" +#include "../core/report_generator.h" +#include "../fixtures/multi_agent_fixture.h" +#include "../fixtures/graph_generator.h" + +using namespace DSR; +using namespace DSR::Benchmark; + +// For Qt::DirectConnection cases the signal fires synchronously within the +// graph operation, so nanobench's elapsed time equals the dispatch latency. +// For Qt::QueuedConnection the callback fires asynchronously via the Qt event +// loop — manual bench_now() timing with fixture.process_events() is required. + +TEST_CASE("Node signal direct latency", "[LATENCY][signal][EXTENDED][.extended]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("signal_latency"); + collector.add_metadata("profile", "extended"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + std::atomic callback_called{false}; + + QObject::connect(graph, &DSR::DSRGraph::update_node_signal, graph, + [&](uint64_t, const std::string&, DSR::SignalInfo) { + callback_called.store(true); + }, Qt::DirectConnection); + + // ~40µs/op: 300 iters/epoch × 100 epochs ≈ 1.2 s + auto bench = make_latency_bench(100, 50); + bench.minEpochIterations(300); + bench.run("node_signal_direct", [&] { + callback_called.store(false); + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + graph->insert_node(node); + REQUIRE(callback_called.load()); + ankerl::nanobench::doNotOptimizeAway(node); + }); + + auto stats = nb_to_stats(bench); + collector.record_latency_stats("node_signal_direct", stats); + INFO("Node signal (direct) - Mean: " << stats.mean_us() << " us, p99: " << stats.p99_us() << " us"); + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "signal_node_direct"); +} + +TEST_CASE("Edge signal direct latency", "[LATENCY][signal][EXTENDED][.extended]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("signal_latency"); + collector.add_metadata("profile", "extended"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + auto root = graph->get_node_root(); + REQUIRE(root.has_value()); + + // Pre-create enough nodes for warmup(50) + epochs(1000) = 1050 + std::vector node_ids; + node_ids.reserve(1060); + for (int i = 0; i < 1060; ++i) { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto id = graph->insert_node(node); + REQUIRE(id.has_value()); + node_ids.push_back(*id); + } + + std::atomic callback_called{false}; + std::atomic target_to{0}; + + QObject::connect(graph, &DSR::DSRGraph::update_edge_signal, graph, + [&](uint64_t, uint64_t to, const std::string&, DSR::SignalInfo) { + if (to == target_to.load()) { + callback_called.store(true); + } + }, Qt::DirectConnection); + + // ~14µs/op: 600 iters/epoch × 100 epochs ≈ 0.84 s + size_t idx = 0; + auto bench = make_latency_bench(100, 50); + bench.minEpochIterations(600); + bench.run("edge_signal_direct", [&] { + uint64_t target = node_ids[idx++ % node_ids.size()]; + target_to.store(target); + callback_called.store(false); + auto edge = GraphGenerator::create_test_edge( + root->id(), target, graph->get_agent_id()); + graph->insert_or_assign_edge(edge); + REQUIRE(callback_called.load()); + ankerl::nanobench::doNotOptimizeAway(edge); + }); + + auto stats = nb_to_stats(bench); + collector.record_latency_stats("edge_signal_direct", stats); + INFO("Edge signal (direct) - Mean: " << stats.mean_us() << " us, p99: " << stats.p99_us() << " us"); + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "signal_edge_direct"); +} + +TEST_CASE("Attribute signal direct latency", "[LATENCY][signal][EXTENDED][.extended]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("signal_latency"); + collector.add_metadata("profile", "extended"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + auto test_node = GraphGenerator::create_test_node(0, graph->get_agent_id(), "attr_signal_test"); + auto node_id = graph->insert_node(test_node); + REQUIRE(node_id.has_value()); + + std::atomic callback_called{false}; + + QObject::connect(graph, &DSR::DSRGraph::update_node_attr_signal, graph, + [&](uint64_t id, const std::vector&, DSR::SignalInfo) { + if (id == *node_id) { + callback_called.store(true); + } + }, Qt::DirectConnection); + + uint64_t counter = 0; + auto bench = make_latency_bench(1000, 50); + bench.run("attr_signal_direct", [&] { + callback_called.store(false); + auto node = graph->get_node(*node_id); + REQUIRE(node.has_value()); + graph->add_or_modify_attrib_local( + *node, static_cast(100 + counter++)); + graph->update_node(*node); + REQUIRE(callback_called.load()); + ankerl::nanobench::doNotOptimizeAway(node); + }); + + auto stats = nb_to_stats(bench); + collector.record_latency_stats("attr_signal_direct", stats); + INFO("Attr signal (direct) - Mean: " << stats.mean_us() << " us, p99: " << stats.p99_us() << " us"); + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "signal_attr_direct"); +} + +TEST_CASE("Node signal queued latency", "[LATENCY][signal]") { + // Qt::QueuedConnection dispatches via the event loop, so the callback + // fires asynchronously. nanobench cannot model the poll-wait pattern; + // manual bench_now() + fixture.process_events() is used instead. + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("signal_latency"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + LatencyTracker tracker(1000); + std::atomic callback_time{0}; + std::atomic callback_called{false}; + + QObject::connect(graph, &DSR::DSRGraph::update_node_signal, graph, + [&](uint64_t, const std::string&, DSR::SignalInfo) { + callback_time.store(bench_now()); + callback_called.store(true); + }, Qt::QueuedConnection); + + // Warmup + for (int i = 0; i < 50; ++i) { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + fixture.process_events(); + } + + // Measurement + for (int i = 0; i < 1000; ++i) { + callback_called.store(false); + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + uint64_t pre_insert = bench_now(); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + + auto deadline = std::chrono::steady_clock::now() + std::chrono::milliseconds(100); + while (!callback_called.load() && std::chrono::steady_clock::now() < deadline) { + fixture.process_events(1); + } + + if (callback_called.load()) { + tracker.record(callback_time.load() - pre_insert); + } + } + + auto stats = tracker.stats(); + collector.record_latency_stats("node_signal_queued", stats); + INFO("Node signal (queued) - Mean: " << stats.mean_us() << " us, p99: " << stats.p99_us() << " us"); + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "signal_node_queued"); +} + +TEST_CASE("Signal emission under load", "[LATENCY][signal][stress][PROFILE][LOAD]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("signal_latency_stress"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + // Pre-populate graph with 1000 nodes + for (int i = 0; i < 1000; ++i) { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + } + fixture.process_events(); + + std::atomic callback_called{false}; + + QObject::connect(graph, &DSR::DSRGraph::update_node_signal, graph, + [&](uint64_t, const std::string&, DSR::SignalInfo) { + callback_called.store(true); + }, Qt::DirectConnection); + + auto bench = make_latency_bench(1000, 50); + bench.minEpochIterations(10); + bench.run("signal_under_load", [&] { + callback_called.store(false); + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + graph->insert_node(node); + REQUIRE(callback_called.load()); + ankerl::nanobench::doNotOptimizeAway(node); + }); + + auto stats = nb_to_stats(bench); + collector.record_latency_stats("signal_with_1000_nodes", stats, {{"existing_nodes", "1000"}}); + INFO("Signal with 1000 nodes - Mean: " << stats.mean_us() << " us, p99: " << stats.p99_us() << " us"); + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "signal_latency_stress"); +} diff --git a/benchmarks/python/bench_baseline_graph.py b/benchmarks/python/bench_baseline_graph.py new file mode 100644 index 0000000..7a8166c --- /dev/null +++ b/benchmarks/python/bench_baseline_graph.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +""" +Stable Python baseline benchmarks on a fixed graph. + +This intentionally avoids graph growth during measurement. The goal is to +provide a low-noise Python baseline for lookup/query/update paths and binding +costs, not to model end-to-end insertion throughput. +""" + +import sys +import os +import time + +sys.path.insert(0, os.path.dirname(__file__)) + +from bench_utils import LatencyTracker, MetricsCollector, make_temp_config_file + +try: + import pydsr +except ImportError: + print("Error: pydsr module not found.") + sys.exit(1) + + +def benchmark_fixed_graph(graph: pydsr.DSRGraph, collector: MetricsCollector): + agent_id = graph.get_agent_id() + root = graph.get_node("root") + assert root is not None, "root node missing" + + # Keep the Python baseline bounded so the top-level default run stays usable. + node_ids = [] + for i in range(300): + node = pydsr.Node(agent_id, "testtype", f"baseline_node_{i}") + inserted = graph.insert_node(node) + assert inserted is not None, f"insert_node failed for baseline_node_{i}" + node_ids.append(inserted) + edge = pydsr.Edge(inserted, root.id, "testtype_e", agent_id) + assert graph.insert_or_assign_edge(edge), f"insert edge failed for baseline_node_{i}" + + for node_id in node_ids: + assert graph.get_node(node_id) is not None + graph.get_nodes() + graph.get_nodes_by_type("testtype") + graph.get_edges(root.id) + graph.get_edges_by_type("testtype_e") + + tracker = LatencyTracker(1000) + for i in range(1000): + node_id = node_ids[i % len(node_ids)] + with tracker.measure(): + node = graph.get_node(node_id) + assert node is not None + collector.record_latency_stats("node_read_by_id", tracker.stats()) + + tracker = LatencyTracker(500) + for i in range(500): + name = f"baseline_node_{i % len(node_ids)}" + with tracker.measure(): + node = graph.get_node(name) + assert node is not None + collector.record_latency_stats("node_read_by_name", tracker.stats()) + + tracker = LatencyTracker(500) + target = graph.get_node("baseline_node_0") + assert target is not None + for i in range(500): + target.attrs["level"] = pydsr.Attribute(i) + with tracker.measure(): + ok = graph.update_node(target) + assert ok + collector.record_latency_stats("node_update", tracker.stats()) + + tracker = LatencyTracker(100) + for _ in range(100): + with tracker.measure(): + nodes = graph.get_nodes() + assert nodes + collector.record_latency_stats("get_nodes", tracker.stats()) + + tracker = LatencyTracker(100) + for _ in range(100): + with tracker.measure(): + nodes = graph.get_nodes_by_type("testtype") + assert nodes + collector.record_latency_stats("get_nodes_by_type", tracker.stats()) + + tracker = LatencyTracker(300) + for i in range(300): + node_id = node_ids[i % len(node_ids)] + with tracker.measure(): + edge = graph.get_edge(root.id, node_id, "testtype_e") + assert edge is not None + collector.record_latency_stats("edge_read", tracker.stats()) + + tracker = LatencyTracker(100) + for _ in range(100): + with tracker.measure(): + edges = graph.get_edges_by_type("testtype_e") + assert edges + collector.record_latency_stats("get_edges_by_type", tracker.stats()) + + +def main(): + print("=" * 60) + print("DSR Python Baseline Graph Benchmarks") + print("=" * 60) + print() + + collector = MetricsCollector("python_baseline_graph") + collector.metadata["profile"] = "baseline" + + config_file = make_temp_config_file() + graph = pydsr.DSRGraph(0, "python_baseline_graph", 84, config_file) + time.sleep(0.3) + + benchmark_fixed_graph(graph, collector) + + del graph + os.unlink(config_file) + + results_dir = os.environ.get( + "BENCH_RESULTS_DIR", + os.path.join(os.path.dirname(__file__), "..", "results"), + ) + os.makedirs(results_dir, exist_ok=True) + collector.export_json(os.path.join(results_dir, "python_baseline_graph.json")) + collector.export_csv(os.path.join(results_dir, "python_baseline_graph.csv")) + print(f"\nResults exported to {results_dir}") + + +if __name__ == "__main__": + main() diff --git a/benchmarks/python/bench_binding_overhead.py b/benchmarks/python/bench_binding_overhead.py new file mode 100644 index 0000000..e8dfb38 --- /dev/null +++ b/benchmarks/python/bench_binding_overhead.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python3 +""" +Benchmark: Python binding overhead vs C++. + +Measures the overhead introduced by pybind11 bindings. Pure Python object +creation (Node, Edge, Attribute) uses pyperf.Runner.bench_func() for +calibrated, multi-process timing. Numpy array copy benchmarks use +bench_time_func() so the array setup happens outside the timed loop. + +Graph creation overhead is measured separately with a LatencyTracker because +it is too expensive (~500 ms each) to repeat inside pyperf worker processes. +""" + +import sys +import os +import time + +sys.path.insert(0, os.path.dirname(__file__)) + +from bench_utils import (LatencyTracker, MetricsCollector, make_temp_config_file, + pyperf_to_latency_stats) + +try: + import pydsr +except ImportError: + print("Error: pydsr module not found. Build with Python bindings enabled.") + sys.exit(1) + +try: + import pyperf +except ImportError: + print("Error: pyperf module not found. Install with: pip install pyperf") + sys.exit(1) + + +# ── Pure Python object creation (bench_func) ────────────────────────────────── + +def _create_node(): + return pydsr.Node(1, "testtype", "bench_node") + + +def _create_edge(): + return pydsr.Edge(100, 200, "testtype_e", 1) + + +def _create_attr_str(): + return pydsr.Attribute("test_string") + + +def _create_attr_int(): + return pydsr.Attribute(42) + + +def _create_attr_float(): + return pydsr.Attribute(3.14159) + + +def _create_attr_list(): + return pydsr.Attribute([1.0, 2.0, 3.0]) + + +# ── Numpy copy benchmarks (bench_time_func) ─────────────────────────────────── + +def _make_numpy_set_func(size: int): + """Return a bench_time_func that times setting a numpy array attribute.""" + def time_func(loops): + if not hasattr(time_func, "_data"): + import numpy as np + time_func._data = np.random.randint(0, 255, size, dtype=np.uint8) + time_func._attr = pydsr.Attribute([0]) + for _ in range(10): # warmup + time_func._attr.value = time_func._data + data = time_func._data + attr = time_func._attr + t1 = pyperf.perf_counter() + for _ in range(loops): + attr.value = data + return pyperf.perf_counter() - t1 + time_func.__name__ = f"numpy_set_{size}" + return time_func + + +def _make_numpy_get_func(size: int): + """Return a bench_time_func that times getting a numpy array attribute.""" + def time_func(loops): + if not hasattr(time_func, "_attr"): + import numpy as np + data = np.random.randint(0, 255, size, dtype=np.uint8) + attr = pydsr.Attribute([0]) + attr.value = data + for _ in range(10): # warmup + _ = attr.value + time_func._attr = attr + attr = time_func._attr + t1 = pyperf.perf_counter() + for _ in range(loops): + _ = attr.value + return pyperf.perf_counter() - t1 + time_func.__name__ = f"numpy_get_{size}" + return time_func + + +# ── Graph creation (LatencyTracker — too expensive for pyperf workers) ───────── + +def benchmark_graph_creation(collector: MetricsCollector): + tracker = LatencyTracker(10) + config_file = make_temp_config_file() + + for i in range(10): + with tracker.measure(): + g = pydsr.DSRGraph(0, f"bench_graph_{i}", 100 + i, config_file) + del g + time.sleep(0.5) + + os.unlink(config_file) + + stats = tracker.stats() + collector.record_latency_stats("graph_creation", stats) + print(f"Graph creation: mean={stats.mean_ms:.2f} ms") + + +# ── Main ────────────────────────────────────────────────────────────────────── + +def main(): + # Inject default pyperf tuning before Runner parses sys.argv. + # Worker processes always receive --worker so they are skipped here. + if "--worker" not in sys.argv: + if "--values" not in sys.argv: + sys.argv.extend(["--values", "20"]) + if "--warmups" not in sys.argv: + sys.argv.extend(["--warmups", "5"]) + + runner = pyperf.Runner() + + # runner.args may be None before the first bench_func call in some pyperf + # versions; use sys.argv directly (worker processes always receive --worker). + if "--worker" not in sys.argv: + print("=" * 60) + print("DSR Python Binding Overhead Benchmarks") + print("=" * 60) + + # Pure Python object creation + bm_node = runner.bench_func("node_creation", _create_node) + bm_edge = runner.bench_func("edge_creation", _create_edge) + bm_attr_str = runner.bench_func("attribute_string", _create_attr_str) + bm_attr_int = runner.bench_func("attribute_int", _create_attr_int) + bm_attr_float = runner.bench_func("attribute_float", _create_attr_float) + bm_attr_list = runner.bench_func("attribute_list", _create_attr_list) + + # Numpy attribute benchmarks + numpy_bms = {} + try: + import numpy # noqa: F401 — check availability before spawning workers + for size in [1000, 10000, 100000, 1000000]: + numpy_bms[f"numpy_set_{size}"] = runner.bench_time_func( + f"numpy_set_{size}", _make_numpy_set_func(size)) + numpy_bms[f"numpy_get_{size}"] = runner.bench_time_func( + f"numpy_get_{size}", _make_numpy_get_func(size)) + except ImportError: + print("Numpy not available, skipping numpy benchmarks") + + # Worker processes must not run the export code (stdout is not redirected, + # so workers printing zeros would overwrite/corrupt the master's output). + if "--worker" in sys.argv: + return + + collector = MetricsCollector("binding_overhead") + + pyperf_items = [ + ("node_creation", bm_node), + ("edge_creation", bm_edge), + ("attribute_string", bm_attr_str), + ("attribute_int", bm_attr_int), + ("attribute_float", bm_attr_float), + ("attribute_list", bm_attr_list), + ] + for name, bm in pyperf_items: + stats = pyperf_to_latency_stats(bm) + collector.record_latency_stats(name, stats) + print(f"{name}: mean={stats.mean_us:.3f} µs") + + for name, bm in numpy_bms.items(): + stats = pyperf_to_latency_stats(bm) + collector.record_latency_stats(name, stats) + print(f"{name}: mean={stats.mean_us:.2f} µs") + + print("\n--- Graph Creation ---") + benchmark_graph_creation(collector) + + results_dir = os.environ.get( + "BENCH_RESULTS_DIR", + os.path.join(os.path.dirname(__file__), "..", "results"), + ) + os.makedirs(results_dir, exist_ok=True) + collector.export_json(os.path.join(results_dir, "python_binding_overhead.json")) + collector.export_csv(os.path.join(results_dir, "python_binding_overhead.csv")) + print(f"\nResults exported to {results_dir}") + + +if __name__ == "__main__": + main() diff --git a/benchmarks/python/bench_graph_operations.py b/benchmarks/python/bench_graph_operations.py new file mode 100644 index 0000000..4e3412b --- /dev/null +++ b/benchmarks/python/bench_graph_operations.py @@ -0,0 +1,281 @@ +#!/usr/bin/env python3 +""" +Benchmark: Graph operations (CRUD) performance. + +Measures insert, read, update, delete performance for nodes and edges. + +pyperf is intentionally not used here: the benchmark functions share a single +DSRGraph instance and depend on each other's side-effects (e.g. edge +benchmarks rely on nodes inserted by node benchmarks). pyperf's per-worker +subprocess model would require re-running the full setup chain in each worker, +and the shared-state dependency makes clean isolation impractical. +""" + +import sys +import os +import time + +sys.path.insert(0, os.path.dirname(__file__)) + +from bench_utils import LatencyTracker, MetricsCollector, make_temp_config_file, warmup + +try: + import pydsr +except ImportError: + print("Error: pydsr module not found.") + sys.exit(1) + + +def benchmark_node_operations(graph: pydsr.DSRGraph, collector: MetricsCollector): + """Benchmark node CRUD operations.""" + agent_id = graph.get_agent_id() + + # --- Insert --- + tracker = LatencyTracker(2000) + base_id = 10000 + + # Warmup + for i in range(100): + node = pydsr.Node(agent_id, "testtype", f"warmup_{i}") + result = graph.insert_node(node) + assert result is not None, f"Warmup insert_node failed at i={i}" + + # Measure + for i in range(2000): + node = pydsr.Node(agent_id, "testtype", f"bench_node_{i}") + with tracker.measure(): + result = graph.insert_node(node) + assert result is not None, f"insert_node failed at i={i}" + + stats = tracker.stats() + collector.record_latency_stats("node_insert", stats) + print(f"Node insert: mean={stats.mean_us:.2f} us, p99={stats.p99_us:.2f} us") + + # --- Read by ID --- + tracker = LatencyTracker(3000) + nodes = graph.get_nodes() + node_ids = [n.id for n in nodes[:100]] + # Warmup: touch all IDs to bring them into cache + for node_id in node_ids: + node = graph.get_node(node_id) + assert node is not None, f"Warmup get_node({node_id}) returned None" + + for i in range(3000): + node_id = node_ids[i % len(node_ids)] + with tracker.measure(): + node = graph.get_node(node_id) + assert node is not None, f"get_node({node_id}) returned None" + + stats = tracker.stats() + collector.record_latency_stats("node_read_by_id", stats) + print(f"Node read (by id): mean={stats.mean_us:.2f} us") + + # --- Read by name --- + tracker = LatencyTracker(3000) + node_names = [f"bench_node_{i}" for i in range(100)] + for name in node_names: + node = graph.get_node(name) + assert node is not None, f"Warmup get_node('{name}') returned None" + + for i in range(3000): + name = node_names[i % len(node_names)] + with tracker.measure(): + node = graph.get_node(name) + assert node is not None, f"get_node('{name}') returned None" + + stats = tracker.stats() + collector.record_latency_stats("node_read_by_name", stats) + print(f"Node read (by name): mean={stats.mean_us:.2f} us") + + # --- Update --- + tracker = LatencyTracker(2000) + test_node = graph.get_node("bench_node_0") + assert test_node is not None, "bench_node_0 not found for update benchmark" + + for i in range(2000): + test_node.attrs["level"] = pydsr.Attribute(i) + with tracker.measure(): + result = graph.update_node(test_node) + assert result, f"update_node failed at i={i}" + + stats = tracker.stats() + collector.record_latency_stats("node_update", stats) + print(f"Node update: mean={stats.mean_us:.2f} us") + + # --- Delete --- + tracker = LatencyTracker(500) + delete_nodes = [f"bench_node_{i}" for i in range(1500, 2000)] + + for name in delete_nodes: + with tracker.measure(): + result = graph.delete_node(name) + assert result, f"delete_node('{name}') failed" + + stats = tracker.stats() + collector.record_latency_stats("node_delete", stats) + print(f"Node delete: mean={stats.mean_us:.2f} us") + + +def benchmark_edge_operations(graph: pydsr.DSRGraph, collector: MetricsCollector): + """Benchmark edge CRUD operations.""" + agent_id = graph.get_agent_id() + + # Get root node + root = graph.get_node("root") + if not root: + print("No root node found") + return + + # Create target nodes for edges + for i in range(200): + node = pydsr.Node(agent_id, "testtype", f"edge_target_{i}") + result = graph.insert_node(node) + assert result is not None, f"insert_node failed for edge_target_{i}" + + time.sleep(0.1) + + # --- Insert edge --- + tracker = LatencyTracker(200) + + for i in range(200): + target = graph.get_node(f"edge_target_{i}") + assert target is not None, f"edge_target_{i} not found for edge insert" + edge = pydsr.Edge(target.id, root.id, "testtype_e", agent_id) + with tracker.measure(): + result = graph.insert_or_assign_edge(edge) + assert result, f"insert_or_assign_edge failed for edge_target_{i}" + + stats = tracker.stats() + collector.record_latency_stats("edge_insert", stats) + print(f"Edge insert: mean={stats.mean_us:.2f} us, p99={stats.p99_us:.2f} us") + + # --- Read edge --- + tracker = LatencyTracker(500) + + for i in range(500): + target = graph.get_node(f"edge_target_{i % 200}") + assert target is not None, f"edge_target_{i % 200} not found for edge read" + with tracker.measure(): + edge = graph.get_edge(root.id, target.id, "testtype_e") + assert edge is not None, f"get_edge returned None for edge_target_{i % 200}" + + stats = tracker.stats() + collector.record_latency_stats("edge_read", stats) + print(f"Edge read: mean={stats.mean_us:.2f} us") + + # --- Delete edge --- + tracker = LatencyTracker(100) + + for i in range(100, 200): + target = graph.get_node(f"edge_target_{i}") + assert target is not None, f"edge_target_{i} not found for edge delete" + with tracker.measure(): + result = graph.delete_edge(root.id, target.id, "testtype_e") + assert result, f"delete_edge failed for edge_target_{i}" + + stats = tracker.stats() + collector.record_latency_stats("edge_delete", stats) + print(f"Edge delete: mean={stats.mean_us:.2f} us") + + +def benchmark_query_operations(graph: pydsr.DSRGraph, collector: MetricsCollector): + """Benchmark query operations.""" + + # --- get_nodes --- + tracker = LatencyTracker(500) + + for _ in range(500): + with tracker.measure(): + graph.get_nodes() + + stats = tracker.stats() + collector.record_latency_stats("get_all_nodes", stats) + print(f"get_nodes(): mean={stats.mean_us:.2f} us") + + # --- get_nodes_by_type --- + tracker = LatencyTracker(500) + + for _ in range(500): + with tracker.measure(): + graph.get_nodes_by_type("testtype") + + stats = tracker.stats() + collector.record_latency_stats("get_nodes_by_type", stats) + print(f"get_nodes_by_type(): mean={stats.mean_us:.2f} us") + + # --- get_edges (from node) --- + root = graph.get_node("root") + if root: + tracker = LatencyTracker(500) + + for _ in range(500): + with tracker.measure(): + graph.get_edges(root.id) + + stats = tracker.stats() + collector.record_latency_stats("get_edges_from_node", stats) + print(f"get_edges(id): mean={stats.mean_us:.2f} us") + + # --- get_edges_to_id --- + if root: + tracker = LatencyTracker(500) + + for _ in range(500): + with tracker.measure(): + graph.get_edges_to_id(root.id) + + stats = tracker.stats() + collector.record_latency_stats("get_edges_to_id", stats) + print(f"get_edges_to_id(id): mean={stats.mean_us:.2f} us") + + # --- get_edges_by_type --- + tracker = LatencyTracker(500) + + for _ in range(500): + with tracker.measure(): + graph.get_edges_by_type("testtype_e") + + stats = tracker.stats() + collector.record_latency_stats("get_edges_by_type", stats) + print(f"get_edges_by_type(): mean={stats.mean_us:.2f} us") + + +def main(): + print("=" * 60) + print("DSR Python Graph Operations Benchmarks") + print("=" * 60) + print() + + collector = MetricsCollector("graph_operations") + + # Create graph + config_file = make_temp_config_file() + graph = pydsr.DSRGraph(0, "bench_graph_ops", 42, config_file) + time.sleep(0.5) + + print("--- Node Operations ---") + benchmark_node_operations(graph, collector) + + print("\n--- Edge Operations ---") + benchmark_edge_operations(graph, collector) + + print("\n--- Query Operations ---") + benchmark_query_operations(graph, collector) + + # Cleanup + del graph + os.unlink(config_file) + + # Export + results_dir = os.environ.get( + "BENCH_RESULTS_DIR", + os.path.join(os.path.dirname(__file__), "..", "results"), + ) + os.makedirs(results_dir, exist_ok=True) + collector.export_json(os.path.join(results_dir, "python_graph_operations.json")) + collector.export_csv(os.path.join(results_dir, "python_graph_operations.csv")) + print(f"\nResults exported to {results_dir}") + + +if __name__ == "__main__": + main() diff --git a/benchmarks/python/bench_signals.py b/benchmarks/python/bench_signals.py new file mode 100644 index 0000000..0249787 --- /dev/null +++ b/benchmarks/python/bench_signals.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 +""" +Benchmark: Signal/callback performance. + +Measures signal connection, emission, and callback invocation overhead. + +pyperf is intentionally not used here: the signal benchmarks use async +patterns (threading.Event, callback_received.wait()) that are incompatible +with pyperf's tight synchronous loop model. The queued-signal benchmark +also relies on Qt's event loop processing between operations. +""" + +import sys +import os +import time +import threading + +sys.path.insert(0, os.path.dirname(__file__)) + +from bench_utils import LatencyTracker, MetricsCollector, make_temp_config_file + +try: + import pydsr +except ImportError: + print("Error: pydsr module not found.") + sys.exit(1) + + +def benchmark_signal_callback_latency(graph: pydsr.DSRGraph, collector: MetricsCollector): + """Measure signal callback invocation latency.""" + agent_id = graph.get_agent_id() + tracker = LatencyTracker(100) + + callback_time = [0] + callback_received = threading.Event() + expected_id = [0] + + def on_node_update(node_id: int, node_type: str): + if node_id == expected_id[0]: + callback_time[0] = time.perf_counter_ns() + callback_received.set() + + # Connect signal + pydsr.signals.connect(graph, pydsr.signals.UPDATE_NODE, on_node_update) + + # Warmup + for i in range(20): + node = pydsr.Node(agent_id, "testtype", f"warmup_sig_{i}") + result = graph.insert_node(node) + assert result is not None, f"Warmup insert_node failed at i={i}" + time.sleep(0.05) + + # Measure + for i in range(100): + callback_received.clear() + node = pydsr.Node(agent_id, "testtype", f"signal_node_{i}") + + send_time = time.perf_counter_ns() + expected_id[0] = graph.insert_node(node) + assert expected_id[0] is not None, f"insert_node failed at signal measurement i={i}" + + # Wait for callback + if callback_received.wait(timeout=2.0): + latency = callback_time[0] - send_time + tracker.record(latency) + + stats = tracker.stats() + collector.record_latency_stats("signal_callback_latency", stats) + print(f"Signal callback latency: mean={stats.mean_us:.2f} us, p99={stats.p99_us:.2f} us") + print(f" (received {tracker.count}/100 callbacks)") + + +def benchmark_signal_throughput(graph: pydsr.DSRGraph, collector: MetricsCollector): + """Measure how many signals can be processed per second. + + Uses a fixed insert count instead of a time-based loop to keep the + callback backlog bounded. An unbounded loop (e.g. 3 s × 40K inserts/sec) + creates a queue that outlasts the benchmark and blocks graph teardown. + """ + agent_id = graph.get_agent_id() + + callback_count = [0] + + def on_node_update(node_id: int, node_type: str): + callback_count[0] += 1 + + pydsr.signals.connect(graph, pydsr.signals.UPDATE_NODE, on_node_update) + + INSERT_COUNT = 3000 + print("Generating signals...") + start = time.perf_counter() + + for i in range(INSERT_COUNT): + node = pydsr.Node(agent_id, "testtype", f"sig_tp_{i}") + result = graph.insert_node(node) + assert result is not None, f"insert_node failed at signal throughput i={i}" + + # Wait for callbacks to drain, but give up after a timeout so teardown + # isn't blocked indefinitely if the callback rate is very slow. + drain_deadline = time.perf_counter() + 5.0 + prev = -1 + while time.perf_counter() < drain_deadline: + time.sleep(0.1) + cur = callback_count[0] + if cur == prev: # no new callbacks — queue is drained + break + prev = cur + + duration = time.perf_counter() - start + callbacks_per_sec = callback_count[0] / duration + + collector.record_throughput("signal_callbacks", callback_count[0], duration) + print(f"Signal throughput: {callbacks_per_sec:.0f} callbacks/sec") + print(f" ({callback_count[0]} callbacks for {INSERT_COUNT} inserts)") + + +def benchmark_multiple_handlers(graph: pydsr.DSRGraph, collector: MetricsCollector): + """Measure impact of multiple signal handlers.""" + agent_id = graph.get_agent_id() + + for num_handlers in [1, 5, 10]: + callback_counts = [0] * num_handlers + + def make_handler(idx): + def handler(node_id: int, node_type: str): + callback_counts[idx] += 1 + return handler + + # Connect multiple handlers + handlers = [make_handler(i) for i in range(num_handlers)] + for h in handlers: + pydsr.signals.connect(graph, pydsr.signals.UPDATE_NODE, h) + + # Generate updates + insert_count = 100 + start = time.perf_counter() + + for i in range(insert_count): + node = pydsr.Node(agent_id, "testtype", f"mh_{num_handlers}_{i}") + result = graph.insert_node(node) + assert result is not None, f"insert_node failed for mh_{num_handlers}_{i}" + + time.sleep(0.3) # Let callbacks process + duration = time.perf_counter() - start + + total_callbacks = sum(callback_counts) + collector.record("callbacks_with_handlers", "throughput", + total_callbacks / duration, + "callbacks/sec", + tags={"num_handlers": str(num_handlers)}) + + print(f"{num_handlers} handlers: {total_callbacks} callbacks in {duration:.2f}s") + + +def main(): + print("=" * 60) + print("DSR Python Signal Benchmarks") + print("=" * 60) + print() + + collector = MetricsCollector("signals") + + config_file = make_temp_config_file() + graph = pydsr.DSRGraph(0, "bench_signals", 42, config_file) + time.sleep(0.5) + + print("--- Signal Callback Latency ---") + benchmark_signal_callback_latency(graph, collector) + + print("\n--- Signal Throughput ---") + benchmark_signal_throughput(graph, collector) + + print("\n--- Multiple Handlers Impact ---") + benchmark_multiple_handlers(graph, collector) + + del graph + os.unlink(config_file) + + # Export + results_dir = os.environ.get( + "BENCH_RESULTS_DIR", + os.path.join(os.path.dirname(__file__), "..", "results"), + ) + os.makedirs(results_dir, exist_ok=True) + collector.export_json(os.path.join(results_dir, "python_signals.json")) + collector.export_csv(os.path.join(results_dir, "python_signals.csv")) + print(f"\nResults exported to {results_dir}") + + +if __name__ == "__main__": + main() diff --git a/benchmarks/python/bench_throughput.py b/benchmarks/python/bench_throughput.py new file mode 100644 index 0000000..3157842 --- /dev/null +++ b/benchmarks/python/bench_throughput.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python3 +""" +Benchmark: Single-agent throughput + latency for node/edge operations. + +Uses pyperf.Runner with bench_time_func so that pyperf calibrates the +iteration count and runs multiple worker processes for noise reduction. +Each bench_time_func performs lazy setup (graph creation) outside the timed +loop on the first call; subsequent calls in the same worker reuse the graph. + +The master process collects all Benchmark objects, converts them to +LatencyStats, and exports to python_throughput.json. +""" + +import sys +import os +import time + +sys.path.insert(0, os.path.dirname(__file__)) + +from bench_utils import MetricsCollector, make_temp_config_file, pyperf_to_latency_stats + +try: + import pydsr +except ImportError: + print("Error: pydsr module not found.") + sys.exit(1) + +try: + import pyperf +except ImportError: + print("Error: pyperf module not found. Install with: pip install pyperf") + sys.exit(1) + + +# ── Lazy graph initialisation (runs once per worker process) ────────────────── + +def _init_graph(tag: str, agent_id_hint: int = 43): + """Create a DSRGraph and return (graph, config_path, agent_id).""" + config = make_temp_config_file() + graph = pydsr.DSRGraph(0, f"bench_throughput_{tag}", agent_id_hint, config) + time.sleep(0.2) + return graph, config, graph.get_agent_id() + + +# ── bench_time_func implementations ────────────────────────────────────────── +# Each function signature is (loops,) -> float (elapsed seconds). +# pyperf calls time_func(loops) — use pyperf.perf_counter() directly. +# State is stored as function attributes so setup only happens once per worker. + +def _bench_node_insert(loops): + if not hasattr(_bench_node_insert, "_graph"): + graph, config, agent_id = _init_graph("insert") + _bench_node_insert._graph = graph + _bench_node_insert._config = config + _bench_node_insert._agent_id = agent_id + _bench_node_insert._counter = 0 + + graph = _bench_node_insert._graph + agent_id = _bench_node_insert._agent_id + + t1 = pyperf.perf_counter() + for _ in range(loops): + node = pydsr.Node(agent_id, "testtype", f"thr_ins_{_bench_node_insert._counter}") + _bench_node_insert._counter += 1 + graph.insert_node(node) + return pyperf.perf_counter() - t1 + + +def _bench_node_read(loops): + if not hasattr(_bench_node_read, "_graph"): + graph, config, agent_id = _init_graph("read") + node_ids = [] + for i in range(1000): + node = pydsr.Node(agent_id, "testtype", f"thr_rd_{i}") + nid = graph.insert_node(node) + assert nid is not None + node_ids.append(nid) + for nid in node_ids: + graph.get_node(nid) # cache warmup + _bench_node_read._graph = graph + _bench_node_read._config = config + _bench_node_read._node_ids = node_ids + _bench_node_read._idx = 0 + + graph = _bench_node_read._graph + node_ids = _bench_node_read._node_ids + idx = _bench_node_read._idx + + t1 = pyperf.perf_counter() + for _ in range(loops): + graph.get_node(node_ids[idx % len(node_ids)]) + idx += 1 + _bench_node_read._idx = idx + return pyperf.perf_counter() - t1 + + +def _bench_node_update(loops): + if not hasattr(_bench_node_update, "_graph"): + graph, config, agent_id = _init_graph("update") + node = pydsr.Node(agent_id, "testtype", "thr_upd_target") + nid = graph.insert_node(node) + assert nid is not None + target = graph.get_node("thr_upd_target") + assert target is not None + _bench_node_update._graph = graph + _bench_node_update._config = config + _bench_node_update._target = target + _bench_node_update._counter = 0 + + graph = _bench_node_update._graph + target = _bench_node_update._target + + t1 = pyperf.perf_counter() + for _ in range(loops): + target.attrs["level"] = pydsr.Attribute(_bench_node_update._counter % 1000) + _bench_node_update._counter += 1 + graph.update_node(target) + return pyperf.perf_counter() - t1 + + +def _bench_edge_insert(loops): + if not hasattr(_bench_edge_insert, "_graph"): + graph, config, agent_id = _init_graph("edge_insert", 44) + root = graph.get_node("root") + assert root is not None, "no root node" + targets = [] + for i in range(1000): + node = pydsr.Node(agent_id, "testtype", f"thr_etgt_{i}") + ins = graph.insert_node(node) + assert ins is not None + n = graph.get_node(f"thr_etgt_{i}") + assert n is not None + targets.append(n.id) + _bench_edge_insert._graph = graph + _bench_edge_insert._config = config + _bench_edge_insert._agent_id = agent_id + _bench_edge_insert._root_id = root.id + _bench_edge_insert._targets = targets + _bench_edge_insert._idx = 0 + + graph = _bench_edge_insert._graph + agent_id = _bench_edge_insert._agent_id + root_id = _bench_edge_insert._root_id + targets = _bench_edge_insert._targets + idx = _bench_edge_insert._idx + + t1 = pyperf.perf_counter() + for _ in range(loops): + tid = targets[idx % len(targets)] + edge = pydsr.Edge(tid, root_id, "testtype_e", agent_id) + graph.insert_or_assign_edge(edge) + idx += 1 + _bench_edge_insert._idx = idx + return pyperf.perf_counter() - t1 + + +def _bench_edge_read(loops): + if not hasattr(_bench_edge_read, "_graph"): + graph, config, agent_id = _init_graph("edge_read", 45) + root = graph.get_node("root") + assert root is not None, "no root node" + targets = [] + for i in range(1000): + node = pydsr.Node(agent_id, "testtype", f"thr_erd_{i}") + ins = graph.insert_node(node) + assert ins is not None + n = graph.get_node(f"thr_erd_{i}") + assert n is not None + targets.append(n.id) + edge = pydsr.Edge(n.id, root.id, "testtype_e", agent_id) + graph.insert_or_assign_edge(edge) + for tid in targets: + graph.get_edge(root.id, tid, "testtype_e") # cache warmup + _bench_edge_read._graph = graph + _bench_edge_read._config = config + _bench_edge_read._root_id = root.id + _bench_edge_read._targets = targets + _bench_edge_read._idx = 0 + + graph = _bench_edge_read._graph + root_id = _bench_edge_read._root_id + targets = _bench_edge_read._targets + idx = _bench_edge_read._idx + + t1 = pyperf.perf_counter() + for _ in range(loops): + graph.get_edge(root_id, targets[idx % len(targets)], "testtype_e") + idx += 1 + _bench_edge_read._idx = idx + return pyperf.perf_counter() - t1 + + +# ── Main ────────────────────────────────────────────────────────────────────── + +def main(): + # Inject default pyperf tuning before Runner parses sys.argv. + # Worker processes always receive --worker so they are skipped here. + if "--worker" not in sys.argv: + if "--values" not in sys.argv: + sys.argv.extend(["--values", "20"]) + if "--warmups" not in sys.argv: + sys.argv.extend(["--warmups", "5"]) + + runner = pyperf.Runner() + + bm_node_insert = runner.bench_time_func("node_insert", _bench_node_insert) + bm_node_read = runner.bench_time_func("node_read", _bench_node_read) + bm_node_update = runner.bench_time_func("node_update", _bench_node_update) + bm_edge_insert = runner.bench_time_func("edge_insert", _bench_edge_insert) + bm_edge_read = runner.bench_time_func("edge_read", _bench_edge_read) + + # Worker processes must not run the export code (stdout is not redirected, + # so workers printing zeros would overwrite/corrupt the master's output). + if "--worker" in sys.argv: + return + collector = MetricsCollector("python_throughput") + + benchmarks = [ + ("node_insert", bm_node_insert), + ("node_read", bm_node_read), + ("node_update", bm_node_update), + ("edge_insert", bm_edge_insert), + ("edge_read", bm_edge_read), + ] + for name, bm in benchmarks: + stats = pyperf_to_latency_stats(bm) + collector.record_latency_stats(name, stats) + if stats.mean_ns > 0: + collector.record_throughput(name, 1, stats.mean_ns / 1e9) + print(f"{name}: mean={stats.mean_us:.2f} µs stddev={stats.stddev_ns/1000:.2f} µs") + + results_dir = os.environ.get( + "BENCH_RESULTS_DIR", + os.path.join(os.path.dirname(__file__), "..", "results"), + ) + os.makedirs(results_dir, exist_ok=True) + collector.export_json(os.path.join(results_dir, "python_throughput.json")) + collector.export_csv(os.path.join(results_dir, "python_throughput.csv")) + print(f"\nResults exported to {results_dir}") + + +if __name__ == "__main__": + main() diff --git a/benchmarks/python/bench_utils.py b/benchmarks/python/bench_utils.py new file mode 100644 index 0000000..c2f60f4 --- /dev/null +++ b/benchmarks/python/bench_utils.py @@ -0,0 +1,295 @@ +""" +Utility functions for DSR Python benchmarks. +""" + +import time +import statistics +import json +import csv +import os +from dataclasses import dataclass, field +from typing import Callable, List, Dict, Any, Optional +from contextlib import contextmanager + + +@dataclass +class LatencyStats: + """Statistics from latency measurements.""" + count: int = 0 + mean_ns: float = 0.0 + stddev_ns: float = 0.0 + min_ns: float = 0.0 + max_ns: float = 0.0 + p50_ns: float = 0.0 + p90_ns: float = 0.0 + p95_ns: float = 0.0 + p99_ns: float = 0.0 + + @property + def mean_us(self) -> float: + return self.mean_ns / 1000.0 + + @property + def mean_ms(self) -> float: + return self.mean_ns / 1_000_000.0 + + @property + def p99_us(self) -> float: + return self.p99_ns / 1000.0 + + @property + def p99_ms(self) -> float: + return self.p99_ns / 1_000_000.0 + + +class LatencyTracker: + """Collects latency samples and computes statistics.""" + + def __init__(self, expected_samples: int = 100): + self.samples: List[float] = [] + + def record(self, latency_ns: float): + """Record a latency sample in nanoseconds.""" + self.samples.append(latency_ns) + + def record_seconds(self, latency_sec: float): + """Record a latency sample in seconds.""" + self.samples.append(latency_sec * 1_000_000_000) + + @contextmanager + def measure(self): + """Context manager for measuring latency.""" + start = time.perf_counter_ns() + yield + self.samples.append(time.perf_counter_ns() - start) + + def clear(self): + self.samples.clear() + + @property + def count(self) -> int: + return len(self.samples) + + def stats(self) -> LatencyStats: + """Compute and return statistics.""" + if not self.samples: + return LatencyStats() + + sorted_samples = sorted(self.samples) + n = len(sorted_samples) + + def percentile(p: float) -> float: + idx = p * (n - 1) + lower = int(idx) + upper = min(lower + 1, n - 1) + frac = idx - lower + return sorted_samples[lower] * (1 - frac) + sorted_samples[upper] * frac + + return LatencyStats( + count=n, + mean_ns=statistics.mean(sorted_samples), + stddev_ns=statistics.stdev(sorted_samples) if n > 1 else 0.0, + min_ns=sorted_samples[0], + max_ns=sorted_samples[-1], + p50_ns=percentile(0.50), + p90_ns=percentile(0.90), + p95_ns=percentile(0.95), + p99_ns=percentile(0.99), + ) + + +@dataclass +class Metric: + """Individual metric measurement.""" + name: str + category: str + value: float + unit: str = "" + additional: Dict[str, float] = field(default_factory=dict) + tags: Dict[str, str] = field(default_factory=dict) + + +class MetricsCollector: + """Collects benchmark metrics.""" + + def __init__(self, benchmark_name: str = ""): + self.benchmark_name = benchmark_name + self.metrics: List[Metric] = [] + self.metadata: Dict[str, str] = {} + self.start_time = time.time() + + def record(self, name: str, category: str, value: float, + unit: str = "", tags: Optional[Dict[str, str]] = None): + self.metrics.append(Metric( + name=name, + category=category, + value=value, + unit=unit, + tags=tags or {}, + )) + + def record_latency_stats(self, name: str, stats: LatencyStats, + tags: Optional[Dict[str, str]] = None): + m = Metric( + name=name, + category="latency", + value=stats.mean_ns, + unit="ns", + tags=tags or {}, + additional={ + "count": stats.count, + "mean_ns": stats.mean_ns, + "stddev_ns": stats.stddev_ns, + "min_ns": stats.min_ns, + "max_ns": stats.max_ns, + "p50_ns": stats.p50_ns, + "p90_ns": stats.p90_ns, + "p95_ns": stats.p95_ns, + "p99_ns": stats.p99_ns, + } + ) + self.metrics.append(m) + + def record_scalability(self, name: str, scale_factor: int, value: float, + unit: str = "", tags: Optional[Dict[str, str]] = None): + m = Metric(name=name, category="scalability", value=value, unit=unit, + tags=tags or {}, additional={"scale_factor": float(scale_factor)}) + self.metrics.append(m) + + def record_throughput(self, name: str, operations: int, + duration_sec: float, tags: Optional[Dict[str, str]] = None): + ops_per_sec = operations / duration_sec if duration_sec > 0 else 0 + m = Metric( + name=name, + category="throughput", + value=ops_per_sec, + unit="ops/sec", + tags=tags or {}, + additional={ + "total_operations": operations, + "duration_sec": duration_sec, + } + ) + self.metrics.append(m) + + def export_json(self, filepath: str): + """Export metrics to JSON.""" + os.makedirs(os.path.dirname(filepath) or ".", exist_ok=True) + result = { + "benchmark_name": self.benchmark_name, + "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S"), + "total_duration_sec": time.time() - self.start_time, + "metadata": self.metadata, + "metrics": [ + { + "name": m.name, + "category": m.category, + "value": m.value, + "unit": m.unit, + "additional": m.additional, + "tags": m.tags, + } + for m in self.metrics + ] + } + with open(filepath, "w") as f: + json.dump(result, f, indent=2) + + def export_csv(self, filepath: str): + """Export metrics to CSV.""" + os.makedirs(os.path.dirname(filepath) or ".", exist_ok=True) + with open(filepath, "w", newline="") as f: + writer = csv.writer(f) + writer.writerow([ + "benchmark_name", "metric_name", "category", "value", "unit", + "mean_ns", "p50_ns", "p95_ns", "p99_ns", "count" + ]) + for m in self.metrics: + writer.writerow([ + self.benchmark_name, m.name, m.category, m.value, m.unit, + m.additional.get("mean_ns", ""), + m.additional.get("p50_ns", ""), + m.additional.get("p95_ns", ""), + m.additional.get("p99_ns", ""), + m.additional.get("count", ""), + ]) + + +def make_temp_config_file() -> str: + """Create a minimal DSR config file.""" + import tempfile + config = { + "DSRModel": { + "symbols": { + "100": { + "attribute": { + "level": {"type": 1, "value": 0} + }, + "id": "100", + "links": [], + "name": "root", + "type": "root" + } + } + } + } + fd, path = tempfile.mkstemp(suffix=".json", prefix="dsr_bench_") + with os.fdopen(fd, "w") as f: + json.dump(config, f) + return path + + +def warmup(func: Callable, iterations: int = 10): + """Run warmup iterations.""" + for _ in range(iterations): + func() + + +# ── pyperf integration ──────────────────────────────────────────────────────── + +try: + import pyperf as _pyperf # type: ignore + HAS_PYPERF = True +except ImportError: + _pyperf = None # type: ignore + HAS_PYPERF = False + + +def pyperf_to_latency_stats(bm) -> LatencyStats: + """Convert a pyperf Benchmark to LatencyStats. + + pyperf 'values' are mean elapsed time per operation (in seconds) for each + run. With the default 3 processes × 5 values we get ~15 data points. + Note: these are per-run averages, not individual-op samples, so percentiles + reflect variability across runs rather than per-op tail latency. + """ + if bm is None: + return LatencyStats() + try: + values_ns = [v * 1e9 for v in bm.get_values()] + except Exception: + return LatencyStats() + if not values_ns: + return LatencyStats() + + sorted_v = sorted(values_ns) + n = len(sorted_v) + + def pct(p: float) -> float: + idx = p * (n - 1) + lo = int(idx) + hi = min(lo + 1, n - 1) + f = idx - lo + return sorted_v[lo] * (1 - f) + sorted_v[hi] * f + + return LatencyStats( + count=n, + mean_ns=statistics.mean(sorted_v), + stddev_ns=statistics.stdev(sorted_v) if n > 1 else 0.0, + min_ns=sorted_v[0], + max_ns=sorted_v[-1], + p50_ns=pct(0.50), + p90_ns=pct(0.90), + p95_ns=pct(0.95), + p99_ns=pct(0.99), + ) diff --git a/benchmarks/python/run_all.py b/benchmarks/python/run_all.py new file mode 100644 index 0000000..9e362fe --- /dev/null +++ b/benchmarks/python/run_all.py @@ -0,0 +1,250 @@ +#!/usr/bin/env python3 +""" +Run all DSR Python benchmarks and record the results as a named run. + +Usage: + python run_all.py # auto-timestamped run + python run_all.py --label "after-fix" # labelled run + python run_all.py --list # list previous runs + python run_all.py --delete # remove a run from the index +""" + +import sys +import os +import subprocess +import time +import json +import argparse +import platform +from datetime import datetime + +ALL_BENCHMARKS = [ + "bench_binding_overhead.py", + "bench_baseline_graph.py", + "bench_graph_operations.py", + "bench_throughput.py", + "bench_signals.py", +] + +BASELINE_BENCHMARKS = [ + "bench_baseline_graph.py", +] + +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +DEFAULT_RESULTS_ROOT = os.path.abspath(os.path.join(SCRIPT_DIR, "..", "results")) +RUNS_INDEX = os.path.join(DEFAULT_RESULTS_ROOT, "runs.json") + + +# ── Index helpers ───────────────────────────────────────────────────────────── + +def load_runs() -> list: + if not os.path.isfile(RUNS_INDEX): + return [] + try: + with open(RUNS_INDEX) as f: + return json.load(f) + except PermissionError: + print(f"WARNING: cannot read benchmark index: {RUNS_INDEX} (permission denied)") + return [] + + +def save_runs(runs: list): + os.makedirs(DEFAULT_RESULTS_ROOT, exist_ok=True) + try: + with open(RUNS_INDEX, "w") as f: + json.dump(runs, f, indent=2) + except PermissionError: + print(f"WARNING: cannot update benchmark index: {RUNS_INDEX} (permission denied)") + + +def register_run(run_info: dict): + runs = load_runs() + runs = [r for r in runs if r["id"] != run_info["id"]] + runs.append(run_info) + runs.sort(key=lambda r: r["id"]) + save_runs(runs) + + +# ── Commands ────────────────────────────────────────────────────────────────── + +def cmd_list(): + runs = load_runs() + if not runs: + print("No runs recorded yet.") + return + print(f"{'ID':<22} {'Label':<20} {'Pass/Total':>10} {'Duration':>9}") + print("-" * 70) + for r in runs: + ratio = f"{r.get('benchmarks_passed', 0)}/{r.get('benchmarks_run', 0)}" + dur = f"{r.get('total_duration_sec', 0):.1f}s" + label = r.get("label") or "-" + print(f"{r['id']:<22} {label:<20} {ratio:>10} {dur:>9}") + + +def cmd_delete(run_id: str): + runs = load_runs() + before = len(runs) + runs = [r for r in runs if r["id"] != run_id] + if len(runs) == before: + print(f"Run '{run_id}' not found in index.") + return + save_runs(runs) + print(f"Removed run '{run_id}' from index (result files kept on disk).") + + +def cmd_run_direct(benchmarks) -> int: + """Run benchmarks using BENCH_RESULTS_DIR already set in the environment. + + Called by the top-level run_benchmarks.py wrapper so it can manage the + run directory and index registration itself. + """ + results_dir = os.environ.get("BENCH_RESULTS_DIR", ".") + print("=" * 70) + print(" DSR Python Benchmark Suite") + print(f" Output : {results_dir}") + print("=" * 70) + print() + + env = dict(os.environ) + results = [] + suite_start = time.time() + + for bench in benchmarks: + bench_path = os.path.join(SCRIPT_DIR, bench) + print(f"\n{'=' * 70}") + print(f"Running: {bench}") + print("=" * 70) + try: + proc = subprocess.run([sys.executable, bench_path], cwd=SCRIPT_DIR, env=env, timeout=300) + results.append((bench, proc.returncode == 0)) + except subprocess.TimeoutExpired: + print(f"TIMEOUT: {bench}") + results.append((bench, False)) + except Exception as e: + print(f"ERROR: {bench}: {e}") + results.append((bench, False)) + + total_duration = time.time() - suite_start + passed = sum(1 for _, ok in results if ok) + print(f"\n {passed}/{len(results)} benchmarks completed in {total_duration:.1f}s") + return 0 if all(ok for _, ok in results) else 1 + + +def cmd_run(label, results_root, benchmarks): + ts = datetime.now() + run_id = ts.strftime("%Y%m%dT%H%M%S") + dir_name = run_id if not label else f"{run_id}_{label.replace(' ', '-')}" + run_dir = os.path.join(results_root, dir_name) + os.makedirs(run_dir, exist_ok=True) + + print("=" * 70) + print(f" DSR Python Benchmark Suite") + print(f" Run ID : {run_id}") + if label: + print(f" Label : {label}") + print(f" Output : {run_dir}") + print("=" * 70) + print() + + env = {**os.environ, "BENCH_RESULTS_DIR": run_dir} + + results = [] + suite_start = time.time() + + for bench in benchmarks: + bench_path = os.path.join(SCRIPT_DIR, bench) + print(f"\n{'=' * 70}") + print(f"Running: {bench}") + print("=" * 70) + + try: + proc = subprocess.run( + [sys.executable, bench_path], + cwd=SCRIPT_DIR, + env=env, + timeout=300, + ) + results.append((bench, proc.returncode == 0)) + except subprocess.TimeoutExpired: + print(f"TIMEOUT: {bench}") + results.append((bench, False)) + except Exception as e: + print(f"ERROR: {bench}: {e}") + results.append((bench, False)) + + total_duration = time.time() - suite_start + + try: + git_hash = subprocess.check_output( + ["git", "rev-parse", "--short", "HEAD"], + cwd=SCRIPT_DIR, stderr=subprocess.DEVNULL, + ).decode().strip() + except Exception: + git_hash = "" + + run_info = { + "id": run_id, + "label": label or "", + "dir": dir_name, + "timestamp": ts.isoformat(), + "total_duration_sec": round(total_duration, 2), + "benchmarks_run": len(results), + "benchmarks_passed": sum(1 for _, ok in results if ok), + "git_hash": git_hash, + "platform": platform.platform(), + "python": sys.version.split()[0], + } + + with open(os.path.join(run_dir, "run_info.json"), "w") as f: + json.dump(run_info, f, indent=2) + + register_run(run_info) + + print("\n" + "=" * 70) + print(" Summary") + print("=" * 70) + for bench, ok in results: + print(f" [{'PASS' if ok else 'FAIL'}] {bench}") + + passed = sum(1 for _, ok in results if ok) + print(f"\n {passed}/{len(results)} benchmarks completed in {total_duration:.1f}s") + print(f" Run ID : {run_id}") + print(f" Results : {run_dir}") + print(f" Index : {RUNS_INDEX}") + + return 0 if all(ok for _, ok in results) else 1 + + +# ── Entry point ─────────────────────────────────────────────────────────────── + +def main(): + parser = argparse.ArgumentParser(description="Run DSR benchmarks and track results") + parser.add_argument("--label", "-l", help="Human-readable label for this run") + parser.add_argument("--results-root", default=DEFAULT_RESULTS_ROOT, + help="Root directory for all run results") + parser.add_argument("--list", action="store_true", help="List all recorded runs") + parser.add_argument("--delete", metavar="RUN_ID", help="Remove a run from the index") + parser.add_argument("--direct", action="store_true", + help="Run benchmarks using BENCH_RESULTS_DIR from env, skip index registration") + parser.add_argument("--baseline", action="store_true", + help="Run only the curated low-noise Python baseline set") + args = parser.parse_args() + + benchmarks = BASELINE_BENCHMARKS if args.baseline else ALL_BENCHMARKS + + if args.list: + cmd_list() + return 0 + + if args.delete: + cmd_delete(args.delete) + return 0 + + if args.direct: + return cmd_run_direct(benchmarks) + + return cmd_run(args.label, args.results_root, benchmarks) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/benchmarks/report.py b/benchmarks/report.py new file mode 100644 index 0000000..fda994f --- /dev/null +++ b/benchmarks/report.py @@ -0,0 +1,1514 @@ +#!/usr/bin/env python3 +""" +Generate a visual HTML report from benchmark results. + +Single run: + python report.py # latest run + python report.py --run 20260314T153000 + +Compare two runs: + python report.py --run 20260314T153000 --baseline 20260313T090000 + +List available runs: + python report.py --list +""" + +import json +import os +import sys +import glob +import argparse +from typing import Optional +from datetime import datetime + +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +DEFAULT_RESULTS_ROOT = os.path.abspath(os.path.join(SCRIPT_DIR, "results")) +RUNS_INDEX = os.path.join(DEFAULT_RESULTS_ROOT, "runs.json") + + +# ── Data loading ────────────────────────────────────────────────────────────── + +def load_runs_index() -> list: + if not os.path.isfile(RUNS_INDEX): + return [] + with open(RUNS_INDEX) as f: + return json.load(f) + + +def load_run_metrics(run_dir: str) -> list: + """Load all JSON metric files from a run directory. + + Scans two locations: + - /*.json Python benchmark output + - /cpp/results/*.json C++ benchmark output (written by dsr_benchmarks) + """ + SKIP = {"run_info.json", "stability_summary.json"} + search_paths = [ + (run_dir, "*.json"), + (os.path.join(run_dir, "cpp", "results"), "*.json"), + ] + + cpp_dir = os.path.join(run_dir, "cpp", "results") + metrics = [] + for directory, pattern in search_paths: + lang = "cpp" if os.path.abspath(directory) == os.path.abspath(cpp_dir) else "python" + for path in sorted(glob.glob(os.path.join(directory, pattern))): + if os.path.basename(path) in SKIP: + continue + try: + with open(path) as f: + data = json.load(f) + data["_source_file"] = os.path.basename(path) + data["_lang"] = lang + metrics.append(data) + except Exception as e: + print(f"Warning: could not load {path}: {e}", file=sys.stderr) + return metrics + + +def load_run_info(run_dir: str) -> dict: + path = os.path.join(run_dir, "run_info.json") + if os.path.isfile(path): + with open(path) as f: + return json.load(f) + return {} + + +def resolve_run_dir(run_id: str, results_root: str) -> str: + """Find the directory for a run_id (handles labelled dirs like 20260314T153000_label).""" + # Direct match + direct = os.path.join(results_root, run_id) + if os.path.isdir(direct): + return direct + # Prefix match (labelled) + for entry in os.listdir(results_root): + if entry.startswith(run_id): + candidate = os.path.join(results_root, entry) + if os.path.isdir(candidate): + return candidate + # Look up in index + for r in load_runs_index(): + if r["id"] == run_id: + candidate = os.path.join(results_root, r["dir"]) + if os.path.isdir(candidate): + return candidate + raise FileNotFoundError(f"Run directory not found for id '{run_id}'") + + +_UNIT_TO_NS = {"ns": 1, "us": 1_000, "µs": 1_000, "ms": 1_000_000, "s": 1_000_000_000} + + +def _to_ns(value: float, unit: str) -> float: + return value * _UNIT_TO_NS.get(unit.strip(), 1) + + +def infer_profile(bench: dict, metric: Optional[dict] = None) -> str: + metadata = bench.get("metadata", {}) or {} + meta_profile = str(metadata.get("profile", "")).strip().lower() + if meta_profile in {"baseline", "extended", "other"}: + return meta_profile + + tags = (metric or {}).get("tags", {}) or {} + tag_values = {str(v).upper() for v in tags.values()} + tag_keys = {str(k).upper() for k in tags.keys()} + + if "BASELINE" in tag_keys or "BASELINE" in tag_values: + return "baseline" + if "EXTENDED" in tag_keys or "EXTENDED" in tag_values: + return "extended" + + source = bench.get("_source_file", "").lower() + bench_name = bench.get("benchmark_name", "").lower() + if "baseline" in source or "baseline" in bench_name: + return "baseline" + if "extended" in source or "extended" in bench_name: + return "extended" + if bench_name.startswith("crdt_") or source.startswith("crdt_"): + return "baseline" + return "other" + + +def flatten_metrics(bench_files: list) -> tuple[list, list]: + """Return (latency_metrics, throughput_metrics) as flat lists.""" + latency, throughput = [], [] + latency_keys: set = set() # (bench_name, metric_name) pairs with real latency data + for bench in bench_files: + bench_name = bench.get("benchmark_name", bench["_source_file"]) + lang = bench.get("_lang", "python") + for m in bench.get("metrics", []): + add = m.get("additional", {}) + tags = m.get("tags", {}) + unit = m.get("unit", "") + category = m.get("category", "") + profile = infer_profile(bench, m) + + # For scalability metrics with repeated names, append the tag that + # differentiates them (e.g. graph_size) so each row is unique. + metric_name = m["name"] + if tags: + tag_suffix = "_".join(f"{k}={v}" for k, v in tags.items() + if k in ("graph_size", "num_threads", "threads", "scale_factor")) + if tag_suffix: + metric_name = f"{metric_name}@{tag_suffix}" + + entry = { + "benchmark": bench_name, + "metric": metric_name, + "lang": lang, + "profile": profile, + "value": m["value"], + "unit": unit, + "additional": add, + } + + if category == "latency": + entry.update({ + "mean_ns": add.get("mean_ns", m["value"]), + "p50_ns": add.get("p50_ns", 0), + "p95_ns": add.get("p95_ns", 0), + "p99_ns": add.get("p99_ns", 0), + "min_ns": add.get("min_ns", 0), + "max_ns": add.get("max_ns", 0), + "count": int(add.get("count", 0)), + "has_percentiles": True, + }) + latency.append(entry) + latency_keys.add((bench_name, metric_name)) + elif category == "throughput": + entry.update({ + "ops_per_sec": m["value"], + "total_ops": add.get("total_operations", 0), + "duration_sec": add.get("duration_sec", add.get("duration_ms", 0) / 1000), + }) + throughput.append(entry) + elif category == "scalability" and unit in _UNIT_TO_NS: + # Only promote scalability entries that have no proper latency + # counterpart — avoids duplicates and preserves percentile data. + if (bench_name, metric_name) in latency_keys: + continue + mean_ns = _to_ns(m["value"], unit) + entry.update({ + "mean_ns": mean_ns, + "p50_ns": 0, + "p95_ns": 0, + "p99_ns": 0, + "min_ns": 0, + "max_ns": 0, + "count": int(add.get("count", 0)), + "has_percentiles": False, + }) + latency.append(entry) + return latency, throughput + + +# ── Scalability flattening ──────────────────────────────────────────────────── + +SCALE_DIMS = ("threads", "graph_size", "agents") + + +def flatten_scalability(bench_files: list) -> list: + """Return a flat list of scalability data points. + + Any metric tagged with a recognised scale dimension (threads, graph_size, + or agents) is included — regardless of category — so latency, throughput, + and scalability records all contribute. + """ + rows = [] + for bench in bench_files: + lang = bench.get("_lang", "python") + bench_name = bench.get("benchmark_name", bench["_source_file"]) + for m in bench.get("metrics", []): + tags = m.get("tags", {}) + add = m.get("additional", {}) + scale_dim = next((d for d in SCALE_DIMS if d in tags), None) + if scale_dim is None: + continue + try: + scale_val = int(tags[scale_dim]) + except (ValueError, KeyError): + continue + cat = m.get("category", "") + rows.append({ + "benchmark": bench_name, + "operation": m["name"], + "lang": lang, + "profile": infer_profile(bench, m), + "category": cat, + "scale_dim": scale_dim, + "scale_val": scale_val, + "value": m["value"], + "unit": m.get("unit", ""), + "mean_ns": add.get("mean_ns", 0.0), + "p99_ns": add.get("p99_ns", 0.0), + "ops_per_sec": m["value"] if cat == "throughput" else 0.0, + }) + return rows + + +def compute_efficiency(rows: list) -> list: + """Compute a normalised-performance series for each (benchmark, op, dim). + + threads / agents → parallel efficiency = thr_N / (N × thr_1) × 100 + graph_size → relative throughput = thr_N / thr_min × 100 + (100 % at smallest graph, declining as graph grows) + + Returns a list of {benchmark, operation, scale_dim, scale_val, efficiency, + ops_per_sec} dicts. The JS chart uses the same field regardless of which + formula was applied; the label/title is updated per-dimension in JS. + """ + from collections import defaultdict + + groups: dict = defaultdict(list) + for r in rows: + if r["category"] != "throughput": + continue + key = (r["benchmark"], r["operation"], r["scale_dim"]) + groups[key].append(r) + + result = [] + for (bench, op, dim), pts in groups.items(): + pts_sorted = sorted(pts, key=lambda p: p["scale_val"]) + + if dim in ("threads", "agents"): + baseline = next((p for p in pts_sorted if p["scale_val"] == 1), None) + if baseline is None or baseline["ops_per_sec"] == 0: + continue + thr_1 = baseline["ops_per_sec"] + for p in pts_sorted: + N = p["scale_val"] + if N == 0: + continue + efficiency = (p["ops_per_sec"] / (N * thr_1)) * 100.0 + result.append({ + "benchmark": bench, "operation": op, "scale_dim": dim, + "scale_val": N, "efficiency": round(efficiency, 2), + "ops_per_sec": p["ops_per_sec"], + }) + + elif dim == "graph_size": + if not pts_sorted or pts_sorted[0]["ops_per_sec"] == 0: + continue + thr_min = pts_sorted[0]["ops_per_sec"] + for p in pts_sorted: + relative = (p["ops_per_sec"] / thr_min) * 100.0 + result.append({ + "benchmark": bench, "operation": op, "scale_dim": dim, + "scale_val": p["scale_val"], "efficiency": round(relative, 2), + "ops_per_sec": p["ops_per_sec"], + }) + + return result + + +# ── HTML generation ─────────────────────────────────────────────────────────── + +def generate_html( + run_info: dict, + bench_files: list, + output_path: str, + baseline_info: Optional[dict] = None, + baseline_files: Optional[list] = None, +): + latency, throughput = flatten_metrics(bench_files) + b_latency, b_throughput = (flatten_metrics(baseline_files) if baseline_files else ([], [])) + + scl_rows = flatten_scalability(bench_files) + eff_rows = compute_efficiency(scl_rows) + b_scl_rows = flatten_scalability(baseline_files) if baseline_files else [] + + run_id = run_info.get("id", "unknown") + run_label = run_info.get("label") or run_id + b_id = baseline_info.get("id", "") if baseline_info else "" + b_label = (baseline_info.get("label") or b_id) if baseline_info else "" + comparing = bool(baseline_files) + generated_at = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + latency_json = json.dumps(latency) + throughput_json = json.dumps(throughput) + b_latency_json = json.dumps(b_latency) + b_throughput_json = json.dumps(b_throughput) + run_info_json = json.dumps(run_info) + b_info_json = json.dumps(baseline_info or {}) + scl_json = json.dumps(scl_rows) + eff_json = json.dumps(eff_rows) + b_scl_json = json.dumps(b_scl_rows) + + # Summary rows + summary = [] + for b in bench_files: + summary.append({ + "benchmark": b.get("benchmark_name", b["_source_file"]), + "profile": infer_profile(b), + "timestamp": b.get("timestamp", ""), + "duration": f"{b.get('total_duration_sec', 0):.1f}s", + "metrics": len(b.get("metrics", [])), + "source": b["_source_file"], + }) + summary_json = json.dumps(summary) + + compare_tab = '' if comparing else "" + compare_panel = "" + if comparing: + compare_panel = '
' + + html = f""" + + + + +Cortex Benchmark Report — {run_label} + + + + + + + +
+
+

Cortex Benchmark Report

+ {run_label} + {f' vs baseline: {b_label}' if comparing else ""} +
+
+ Generated: {generated_at}
+ {run_info.get("git_hash") and f"git: {run_info['git_hash']}" or ""} +
+
+ + + +
+ + +
+
+
+
+

Latency — Mean (µs)

+
+
+
+

Throughput (ops/sec)

+
+
+
+
+
+

Run Info

+
+
+
+
+
+

Benchmark Files

+ + + +
BenchmarkTimestampDurationMetricsFile
+
+
+
+ + +
+
+
+ + +
+ + + +
+ +
+
Scroll to zoom · Click & drag to pan · Double-click to reset
+
+

Latency Distribution — Mean / p50 / p95 / p99

+
+
+
+

Latency Detail

+
+
+
+
+ + +
+
+
+ + +
+ + + +
+
+
+

Operations per Second

+
+
+
+

Throughput Detail

+
+
+
+
+ + +
+
+
+ + +
+
+
+

Throughput (ops/sec)

+
+
+
+

Mean Latency (µs)

+
+
+
+
+

Scaling Efficiency (% of ideal linear)

+
+
+
+

Scalability Detail

+ + + + + + +
BenchmarkOperationDimensionScaleThroughputMean LatencyEfficiency %
+
+
+
+ + +{compare_panel} + + +
+
+
+ +
+ + + + +""" + + with open(output_path, "w", encoding="utf-8") as f: + f.write(html) + print(f"Report written to: {os.path.abspath(output_path)}") + + +# ── Entry point ─────────────────────────────────────────────────────────────── + +def main(): + parser = argparse.ArgumentParser(description="Generate visual HTML benchmark report") + parser.add_argument("--run", "-r", help="Run ID to report on (default: latest)") + parser.add_argument("--baseline", "-b", help="Run ID to compare against") + parser.add_argument("--results-root", default=DEFAULT_RESULTS_ROOT) + parser.add_argument("--output", "-o", help="Output HTML file (default: /report.html)") + parser.add_argument("--list", action="store_true", help="List available runs") + args = parser.parse_args() + + runs = load_runs_index() + + if args.list: + if not runs: + print("No runs recorded. Run 'python run_all.py' first.") + return + print(f"{'ID':<22} {'Label':<20} Dir") + print("-" * 70) + for r in runs: + print(f"{r['id']:<22} {(r.get('label') or '-'):<20} {r['dir']}") + return + + # Resolve target run + if args.run: + run_dir = resolve_run_dir(args.run, args.results_root) + elif runs: + # Latest run + latest = runs[-1] + run_dir = os.path.join(args.results_root, latest["dir"]) + print(f"Using latest run: {latest['id']}") + else: + # Fallback: flat results directory (old layout) + run_dir = args.results_root + print(f"No runs index found, reading from: {run_dir}") + + run_info = load_run_info(run_dir) + bench_files = load_run_metrics(run_dir) + if not bench_files: + print(f"No metric JSON files found in: {run_dir}", file=sys.stderr) + sys.exit(1) + print(f"Loaded {len(bench_files)} metric file(s) from run '{run_info.get('id', run_dir)}'") + + # Resolve baseline + baseline_info, baseline_files = None, None + if args.baseline: + b_dir = resolve_run_dir(args.baseline, args.results_root) + baseline_info = load_run_info(b_dir) + baseline_files = load_run_metrics(b_dir) + print(f"Baseline: {len(baseline_files)} file(s) from run '{baseline_info.get('id', b_dir)}'") + + output_path = args.output or os.path.join(run_dir, "report.html") + generate_html(run_info, bench_files, output_path, baseline_info, baseline_files) + + +if __name__ == "__main__": + main() diff --git a/benchmarks/run_benchmarks.py b/benchmarks/run_benchmarks.py new file mode 100644 index 0000000..34ffbf1 --- /dev/null +++ b/benchmarks/run_benchmarks.py @@ -0,0 +1,873 @@ +#!/usr/bin/env python3 +""" +Top-level DSR benchmark runner — executes C++ and Python suites in one shot. + +Usage: + python run_benchmarks.py # run both suites + python run_benchmarks.py --label "after-fix" # named run + python run_benchmarks.py --cpp-only # skip Python + python run_benchmarks.py --python-only # skip C++ + python run_benchmarks.py --build # cmake build before running + python run_benchmarks.py --all # include hidden tests ([.multi], [.extended]) + python run_benchmarks.py --cpp-filter "[LATENCY]"# pass filter to dsr_benchmarks + python run_benchmarks.py --report # open HTML report when done + python run_benchmarks.py --compare # compare against a previous run + python run_benchmarks.py --list # list recorded runs + python run_benchmarks.py --delete # remove a run from the index + python run_benchmarks.py --repeat 5 # run C++ 5× and report median + python run_benchmarks.py --priority -10 # run with higher OS priority (requires root) + python run_benchmarks.py --taskset 0,1 # pin C++ benchmarks to CPU cores 0 and 1 + python run_benchmarks.py --no-cpu-tune # skip governor/turbo tuning (Linux) +""" + +import sys +import os +import subprocess +import time +import json +import argparse +import platform +import shlex +import tempfile +from typing import Optional +from datetime import datetime + +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +PYTHON_DIR = os.path.join(SCRIPT_DIR, "python") +BUILD_DIR = os.path.join(SCRIPT_DIR, "build") +RESULTS_ROOT = os.path.join(SCRIPT_DIR, "results") +RUNS_INDEX = os.path.join(RESULTS_ROOT, "runs.json") +BASELINE_CPP_FILTER = "[BASELINE]~[.multi]" +# Catch2 v3 has no single spec that matches both visible and hidden tests. +# _run_cpp_once detects this sentinel and runs the binary twice: +# 1. no filter → all visible tests +# 2. "[.]" → all hidden tests (tags starting with '.') +ALL_CPP_FILTER = "__ALL_INCLUDING_HIDDEN__" +DEFAULT_STABILITY_WARN_PCT = 5.0 + + +# ── Index helpers (mirrors python/run_all.py) ────────────────────────────────── + +def load_runs() -> list: + if not os.path.isfile(RUNS_INDEX): + return [] + try: + with open(RUNS_INDEX) as f: + return json.load(f) + except PermissionError: + print(f"WARNING: cannot read benchmark index: {RUNS_INDEX} (permission denied)") + return [] + + +def save_runs(runs: list): + os.makedirs(RESULTS_ROOT, exist_ok=True) + try: + fd, tmp_path = tempfile.mkstemp(prefix="runs.", suffix=".json.tmp", dir=RESULTS_ROOT) + try: + with os.fdopen(fd, "w") as f: + json.dump(runs, f, indent=2) + os.replace(tmp_path, RUNS_INDEX) + except Exception: + try: + os.unlink(tmp_path) + except OSError: + pass + raise + except PermissionError: + print(f"WARNING: cannot update benchmark index: {RUNS_INDEX} (permission denied)") + + +def register_run(run_info: dict): + runs = load_runs() + runs = [r for r in runs if r["id"] != run_info["id"]] + runs.append(run_info) + runs.sort(key=lambda r: r["id"]) + save_runs(runs) + + +# ── Locate C++ binary ───────────────────────────────────────────────────────── + +def find_cpp_binary(override: Optional[str]) -> Optional[str]: + if override: + return override if os.path.isfile(override) else None + candidate = os.path.join(BUILD_DIR, "dsr_benchmarks") + return candidate if os.path.isfile(candidate) else None + + +def win_to_wsl(path: str) -> str: + """Convert a Windows absolute path to a WSL /mnt/... path.""" + path = path.replace("\\", "/") + if len(path) >= 2 and path[1] == ":": + drive = path[0].lower() + path = f"/mnt/{drive}{path[2:]}" + return path + + +def is_wsl_needed() -> bool: + """Return True if we're on Windows and wsl.exe is available (ELF binary).""" + if platform.system() != "Windows": + return False + try: + subprocess.run(["wsl", "--version"], capture_output=True, timeout=3) + return True + except Exception: + return False + + +# ── Build step ──────────────────────────────────────────────────────────────── + +def build_cpp() -> bool: + if not os.path.isdir(BUILD_DIR): + print(f"Build directory not found: {BUILD_DIR}") + return False + print("Building C++ benchmarks...") + if is_wsl_needed(): + wsl_build = win_to_wsl(BUILD_DIR) + result = subprocess.run( + ["wsl", "-e", "bash", "-c", f"cmake --build {wsl_build} --parallel"], + cwd=SCRIPT_DIR, + ) + else: + result = subprocess.run( + ["cmake", "--build", BUILD_DIR, "--parallel"], + cwd=SCRIPT_DIR, + ) + return result.returncode == 0 + + +# ── Median merge ────────────────────────────────────────────────────────────── + +def _median(values: list) -> float: + """Return the median of a list of numbers (handles even-length lists).""" + import statistics + return statistics.median(values) if values else 0.0 + + +def _summarize_repeat_stability(src_dirs: list[str], dest_dir: str, + warn_pct: Optional[float] = DEFAULT_STABILITY_WARN_PCT): + import statistics + + summaries = [] + all_files: set[str] = set() + for d in src_dirs: + results_d = os.path.join(d, "results") + if os.path.isdir(results_d): + for f in os.listdir(results_d): + if f.endswith(".json"): + all_files.add(f) + + def metric_key(m: dict) -> str: + tags = m.get("tags", {}) + tag_str = ",".join(f"{k}={v}" for k, v in sorted(tags.items())) + return f"{m.get('category', '')}|{m['name']}|{m.get('unit', '')}|{tag_str}" + + for basename in sorted(all_files): + loaded = [] + for d in src_dirs: + path = os.path.join(d, "results", basename) + if os.path.isfile(path): + with open(path) as fh: + loaded.append(json.load(fh)) + + metric_runs: dict[str, list[dict]] = {} + for run_data in loaded: + for m in run_data.get("metrics", []): + metric_runs.setdefault(metric_key(m), []).append(m) + + for key, peers in sorted(metric_runs.items()): + values = [p["value"] for p in peers if isinstance(p.get("value"), (int, float))] + if len(values) < 2: + continue + median = statistics.median(values) + min_v = min(values) + max_v = max(values) + spread_pct = ((max_v - min_v) / median * 100.0) if median else 0.0 + stdev_pct = ((statistics.stdev(values) / median) * 100.0) if len(values) > 1 and median else 0.0 + exemplar = peers[0] + summaries.append({ + "source_file": basename, + "name": exemplar["name"], + "category": exemplar.get("category", ""), + "unit": exemplar.get("unit", ""), + "tags": exemplar.get("tags", {}), + "repeat_values": values, + "median": median, + "min": min_v, + "max": max_v, + "spread_pct": round(spread_pct, 2), + "stdev_pct": round(stdev_pct, 2), + }) + + os.makedirs(dest_dir, exist_ok=True) + out_path = os.path.join(dest_dir, "stability_summary.json") + with open(out_path, "w") as fh: + json.dump({"metrics": summaries}, fh, indent=2) + + warnings = [] + if summaries: + print("\nRepeat stability summary:") + for s in summaries: + print(f" {s['category']}/{s['name']}: median={s['median']:.3f} {s['unit']} " + f"spread={s['spread_pct']:.2f}% stdev={s['stdev_pct']:.2f}%") + if warn_pct is not None and s["spread_pct"] > warn_pct: + warnings.append(s) + + if warnings: + print(f"\nStability warnings (spread > {warn_pct:.2f}%):") + for s in warnings: + print(f" {s['category']}/{s['name']} tags={s['tags']} spread={s['spread_pct']:.2f}%") + + return { + "warn_threshold_pct": warn_pct, + "warning_count": len(warnings), + "warnings": warnings, + "metrics": summaries, + } + + +def merge_cpp_results(src_dirs: list[str], dest_dir: str): + """ + Load the same JSON result files from N run directories and write a merged + copy to dest_dir where each metric's numerical fields are replaced by the + median across all N runs. Non-numeric fields (name, unit, tags, category) + are taken from the first run. + + This cancels OS-scheduler noise: a single run that was preempted by a + Windows background process no longer inflates the reported mean. + """ + import statistics as _stats + + os.makedirs(dest_dir, exist_ok=True) + + # Collect all JSON basenames present in any source directory + all_files: set[str] = set() + for d in src_dirs: + results_d = os.path.join(d, "results") + if os.path.isdir(results_d): + for f in os.listdir(results_d): + if f.endswith(".json"): + all_files.add(f) + + merged_count = 0 + for basename in sorted(all_files): + # Load this file from every run that has it + loaded = [] + for d in src_dirs: + path = os.path.join(d, "results", basename) + if os.path.isfile(path): + try: + with open(path) as fh: + loaded.append(json.load(fh)) + except Exception as e: + print(f" Warning: could not load {path}: {e}", file=sys.stderr) + + if not loaded: + continue + + if len(loaded) == 1: + # Only one run has this file — copy as-is + import shutil + shutil.copy(os.path.join(src_dirs[0], "results", basename), + os.path.join(dest_dir, basename)) + continue + + # Build merged result: start from first run's structure + merged = json.loads(json.dumps(loaded[0])) # deep copy + + # Index metrics by category+name+unit+tags so latency/throughput records + # for the same operation do not get merged into each other. + def metric_key(m: dict) -> str: + tags = m.get("tags", {}) + tag_str = ",".join(f"{k}={v}" for k, v in sorted(tags.items())) + return f"{m.get('category', '')}|{m['name']}|{m.get('unit', '')}|{tag_str}" + + per_run_metrics: dict[str, list[dict]] = {} + for run_data in loaded: + for m in run_data.get("metrics", []): + k = metric_key(m) + per_run_metrics.setdefault(k, []).append(m) + + merged_metrics = [] + for m in merged.get("metrics", []): + k = metric_key(m) + peers = per_run_metrics.get(k, [m]) + if len(peers) < 2: + merged_metrics.append(m) + continue + + merged_m = json.loads(json.dumps(m)) # deep copy + # Median the top-level value + values = [p["value"] for p in peers if isinstance(p.get("value"), (int, float))] + if values: + merged_m["value"] = _median(values) + + # Median all additional numeric fields + all_add_keys: set[str] = set() + for p in peers: + all_add_keys.update(p.get("additional", {}).keys()) + for key in all_add_keys: + vals = [p.get("additional", {}).get(key) + for p in peers if isinstance(p.get("additional", {}).get(key), (int, float))] + if vals: + merged_m.setdefault("additional", {})[key] = _median(vals) + + merged_metrics.append(merged_m) + + merged["metrics"] = merged_metrics + merged.setdefault("metadata", {})["repeat_runs"] = str(len(loaded)) + merged["metadata"]["aggregation"] = "median" + + out_path = os.path.join(dest_dir, basename) + with open(out_path, "w") as fh: + json.dump(merged, fh, indent=2) + merged_count += 1 + + print(f" Merged {merged_count} result file(s) from {len(src_dirs)} runs (median)") + + +# ── CPU tuning ──────────────────────────────────────────────────────────────── + +def _cpu_count() -> int: + try: + import multiprocessing + return multiprocessing.cpu_count() + except Exception: + return 1 + + +def _read_sysfs(path: str) -> Optional[str]: + try: + with open(path) as f: + return f.read().strip() + except OSError: + return None + + +def _write_sysfs(path: str, value: str) -> bool: + try: + with open(path, "w") as f: + f.write(value + "\n") + return True + except OSError: + return False + + +def setup_cpu_for_benchmarking() -> dict: + """ + Configure the CPU for stable benchmarking: + - Set scaling governor to 'performance' on all CPUs + - Disable turbo boost (Intel pstate or generic cpufreq boost) + + Returns a dict of original settings so restore_cpu_settings() can revert them. + Prints a warning and returns an empty dict if the process lacks write permission. + """ + if platform.system() != "Linux": + return {} + + saved = {"governors": {}, "intel_no_turbo": None, "amd_boost": None} + any_written = False + permission_error = False + + n_cpus = _cpu_count() + for i in range(n_cpus): + gov_path = f"/sys/devices/system/cpu/cpu{i}/cpufreq/scaling_governor" + current = _read_sysfs(gov_path) + if current is None: + continue + saved["governors"][gov_path] = current + if current != "performance": + if _write_sysfs(gov_path, "performance"): + any_written = True + else: + permission_error = True + + # Intel pstate: write "1" to disable turbo + intel_path = "/sys/devices/system/cpu/intel_pstate/no_turbo" + val = _read_sysfs(intel_path) + if val is not None: + saved["intel_no_turbo"] = val + if val != "1": + if _write_sysfs(intel_path, "1"): + any_written = True + else: + permission_error = True + + # AMD / generic: write "0" to disable boost + amd_path = "/sys/devices/system/cpu/cpufreq/boost" + val = _read_sysfs(amd_path) + if val is not None: + saved["amd_boost"] = val + if val != "0": + if _write_sysfs(amd_path, "0"): + any_written = True + else: + permission_error = True + + if permission_error: + print( + "\nWARNING: Could not set CPU governor/turbo (permission denied).\n" + " Run with sudo, or manually run: sudo pyperf system tune\n" + " Benchmarks may show instability due to frequency scaling.\n" + ) + return {} + + if any_written: + print(" CPU tuning: governor=performance, turbo disabled") + + return saved + + +def restore_cpu_settings(saved: dict): + """Revert CPU governor and turbo settings to the values captured by setup_cpu_for_benchmarking().""" + if not saved: + return + + for path, value in saved.get("governors", {}).items(): + _write_sysfs(path, value) + + if saved.get("intel_no_turbo") is not None: + _write_sysfs("/sys/devices/system/cpu/intel_pstate/no_turbo", saved["intel_no_turbo"]) + + if saved.get("amd_boost") is not None: + _write_sysfs("/sys/devices/system/cpu/cpufreq/boost", saved["amd_boost"]) + + print(" CPU settings restored") + + +# ── Run C++ suite ───────────────────────────────────────────────────────────── + +def _build_cpp_cmd(binary: str, catch2_filter: Optional[str], verbose: bool, + priority: Optional[int], taskset: Optional[str]) -> str: + """Build the shell command string for one C++ benchmark invocation.""" + parts = [] + if taskset: + parts += [f"taskset -c {shlex.quote(taskset)}"] + if priority is not None: + parts += [f"nice -n {priority}"] + wsl_binary = win_to_wsl(binary) if is_wsl_needed() else binary + parts.append(shlex.quote(wsl_binary)) + if catch2_filter: + parts.append(shlex.quote(catch2_filter)) + if verbose: + parts.append("--verbose") + return " ".join(parts) + + +def _run_cpp_once(binary: str, cpp_cwd: str, catch2_filter: Optional[str], + verbose: bool, priority: Optional[int], taskset: Optional[str]) -> tuple[bool, float]: + # Catch2 v3 has no single-spec "run everything including hidden". + # Handle the sentinel by running visible tests then hidden tests in the same cwd. + if catch2_filter == ALL_CPP_FILTER: + ok1, dur1 = _run_cpp_once(binary, cpp_cwd, None, verbose, priority, taskset) + ok2, dur2 = _run_cpp_once(binary, cpp_cwd, "[.]", verbose, priority, taskset) + return ok1 and ok2, dur1 + dur2 + + os.makedirs(cpp_cwd, exist_ok=True) + start = time.time() + if is_wsl_needed(): + wsl_cwd = win_to_wsl(cpp_cwd) + cmd_str = _build_cpp_cmd(binary, catch2_filter, verbose, priority, taskset) + bash_cmd = f"cd {wsl_cwd} && {cmd_str}" + result = subprocess.run(["wsl", "-e", "bash", "-c", bash_cmd]) + else: + cmd = [] + if taskset: + cmd += ["taskset", "-c", taskset] + if priority is not None: + cmd += ["nice", "-n", str(priority)] + cmd.append(binary) + if catch2_filter: + cmd.append(catch2_filter) + if verbose: + cmd.append("--verbose") + result = subprocess.run(cmd, cwd=cpp_cwd) + duration = time.time() - start + return result.returncode == 0, duration + + +def run_cpp(binary: str, run_dir: str, catch2_filter: Optional[str], verbose: bool, + repeat: int = 1, priority: Optional[int] = None, taskset: Optional[str] = None, + stability_warn_pct: Optional[float] = DEFAULT_STABILITY_WARN_PCT): + """ + Run dsr_benchmarks 'repeat' times. If repeat > 1, each invocation writes + to a separate cpp_N/ subdirectory; results are then median-merged into + cpp/results/ so the rest of the pipeline sees a single stable result set. + """ + print(f"\n{'=' * 70}") + print(f"Running: C++ benchmarks ({os.path.basename(binary)})") + if catch2_filter == ALL_CPP_FILTER: + print("Filter : (all — visible + hidden)") + elif catch2_filter: + print(f"Filter : {catch2_filter}") + if repeat > 1: + print(f"Repeat : {repeat}× (median aggregation)") + if priority is not None: + print(f"Priority: nice {priority:+d}") + if taskset: + print(f"CPU affinity: {taskset}") + print("=" * 70) + + total_start = time.time() + all_ok = True + stability = None + + if repeat <= 1: + # Single run — original behaviour + cpp_cwd = os.path.join(run_dir, "cpp") + print(f"Output : {cpp_cwd}/results/") + ok, dur = _run_cpp_once(binary, cpp_cwd, catch2_filter, verbose, priority, taskset) + all_ok = ok + else: + # Multiple runs → median merge + run_cwds = [] + for r in range(1, repeat + 1): + cpp_cwd = os.path.join(run_dir, f"cpp_{r}") + print(f"\n--- Run {r}/{repeat} → {cpp_cwd}/results/ ---") + ok, dur = _run_cpp_once(binary, cpp_cwd, catch2_filter, verbose, priority, taskset) + if not ok: + print(f" Warning: run {r} exited non-zero") + all_ok = False + run_cwds.append(cpp_cwd) + + # Merge into canonical cpp/results/ + dest = os.path.join(run_dir, "cpp", "results") + print(f"\nMerging {repeat} runs → {dest}") + merge_cpp_results(run_cwds, dest) + stability = _summarize_repeat_stability(run_cwds, dest, warn_pct=stability_warn_pct) + + total_dur = time.time() - total_start + print(f"\nC++ suite {'PASSED' if all_ok else 'FAILED'} in {total_dur:.1f}s") + return all_ok, total_dur, stability + + +# ── Run Python suite ────────────────────────────────────────────────────────── + +def run_python(run_dir: str, label: Optional[str], baseline: bool = False): + """ + Delegate to python/run_all.py passing BENCH_RESULTS_DIR so Python files + land directly in / (not a subdirectory). + """ + print(f"\n{'=' * 70}") + print("Running: Python benchmarks") + print(f"Output : {run_dir}/") + print("=" * 70) + + env = {**os.environ, "BENCH_RESULTS_DIR": run_dir} + cmd = [sys.executable, os.path.join(PYTHON_DIR, "run_all.py"), "--direct"] + if baseline: + cmd.append("--baseline") + # --direct: benchmarks write to BENCH_RESULTS_DIR, skip run_all.py's own + # index registration so run_benchmarks.py stays the single source of truth. + + start = time.time() + result = subprocess.run(cmd, cwd=PYTHON_DIR, env=env) + duration = time.time() - start + + ok = result.returncode == 0 + print(f"\nPython suite {'PASSED' if ok else 'FAILED'} in {duration:.1f}s") + return ok, duration + + +# ── Ownership / permission helpers ─────────────────────────────────────────── + +def _fix_run_permissions(run_dir: str): + """ + When the script is run via sudo, chown the run directory and the shared + results index back to the original user so they remain accessible without + root. Falls back to world-readable permissions when the original user + cannot be determined (e.g. direct root login). + """ + if os.getuid() != 0: + return # Not running as root — nothing to do. + + sudo_uid_str = os.environ.get("SUDO_UID") + sudo_gid_str = os.environ.get("SUDO_GID") + + if sudo_uid_str: + uid = int(sudo_uid_str) + gid = int(sudo_gid_str) if sudo_gid_str else uid + + def _chown_tree(path: str): + for dirpath, dirnames, filenames in os.walk(path, topdown=False): + for name in filenames: + try: + os.chown(os.path.join(dirpath, name), uid, gid) + except OSError: + pass + try: + os.chown(dirpath, uid, gid) + except OSError: + pass + + _chown_tree(run_dir) + + # Also fix the shared index file and RESULTS_ROOT itself so the user + # can write new runs later without sudo. + for path in (RUNS_INDEX, RESULTS_ROOT): + try: + os.chown(path, uid, gid) + except OSError: + pass + + try: + import pwd as _pwd + username = _pwd.getpwuid(uid).pw_name + print(f" Ownership transferred to {username} (uid={uid}, gid={gid})") + except Exception: + print(f" Ownership transferred to uid={uid}, gid={gid}") + else: + # Direct root login — make results world-readable as a fallback. + import stat + _file_mode = (stat.S_IRUSR | stat.S_IWUSR | + stat.S_IRGRP | + stat.S_IROTH) + _dir_mode = _file_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH + + for dirpath, dirnames, filenames in os.walk(run_dir, topdown=False): + for name in filenames: + try: + os.chmod(os.path.join(dirpath, name), _file_mode) + except OSError: + pass + try: + os.chmod(dirpath, _dir_mode) + except OSError: + pass + + print(" Results made world-readable (root without sudo; SUDO_UID not set)") + + +# ── Commands ────────────────────────────────────────────────────────────────── + +def cmd_list(): + runs = load_runs() + if not runs: + print("No runs recorded yet.") + return + print(f"{'ID':<22} {'Label':<20} {'Suites':<12} {'Duration':>9}") + print("-" * 70) + for r in runs: + suites = ", ".join(r.get("suites_run", [])) or "-" + dur = f"{r.get('total_duration_sec', 0):.1f}s" + label = r.get("label") or "-" + print(f"{r['id']:<22} {label:<20} {suites:<12} {dur:>9}") + + +def cmd_delete(run_id: str): + runs = load_runs() + before = len(runs) + runs = [r for r in runs if r["id"] != run_id] + if len(runs) == before: + print(f"Run '{run_id}' not found in index.") + return + save_runs(runs) + print(f"Removed run '{run_id}' from index (files kept on disk).") + + +def cmd_run(args): + ts = datetime.now() + run_id = ts.strftime("%Y%m%dT%H%M%S%f") + dir_name = run_id if not args.label else f"{run_id}_{args.label.replace(' ', '-')}" + run_dir = os.path.join(RESULTS_ROOT, dir_name) + os.makedirs(run_dir, exist_ok=True) + + print("=" * 70) + print(" DSR Benchmark Suite (C++ + Python)") + print(f" Run ID : {run_id}") + if args.label: + print(f" Label : {args.label}") + print(f" Output : {run_dir}") + print("=" * 70) + + effective_cpp_filter = args.cpp_filter + if args.all and not effective_cpp_filter: + effective_cpp_filter = ALL_CPP_FILTER + elif args.baseline and not effective_cpp_filter: + effective_cpp_filter = BASELINE_CPP_FILTER + + # Optionally build C++ + if args.build: + if not build_cpp(): + print("Build failed — aborting.") + return 1 + + suites_run = [] + results = {} + total_start = time.time() + + # CPU tuning (Linux only, skipped with --no-cpu-tune or when Python-only) + cpu_saved = {} + if not getattr(args, "no_cpu_tune", False) and not args.python_only: + cpu_saved = setup_cpu_for_benchmarking() + + try: + # C++ suite + if not args.python_only: + binary = find_cpp_binary(args.cpp_binary) + if binary: + ok, dur, stability = run_cpp( + binary, run_dir, effective_cpp_filter, args.verbose, + repeat=args.repeat, priority=args.priority, taskset=args.taskset, + stability_warn_pct=args.stability_warn_pct, + ) + results["cpp"] = {"ok": ok, "duration_sec": dur, "stability": stability} + suites_run.append("cpp") + else: + print("\nWARNING: C++ binary not found. Use --cpp-binary or --build.") + print(f" Searched: {os.path.join(BUILD_DIR, 'dsr_benchmarks')}") + results["cpp"] = {"ok": False, "duration_sec": 0, "skipped": True} + + # Python suite + if not args.cpp_only: + ok, dur = run_python(run_dir, args.label, baseline=args.baseline) + results["python"] = {"ok": ok, "duration_sec": dur} + suites_run.append("python") + + finally: + restore_cpu_settings(cpu_saved) + + total_duration = time.time() - total_start + + # Gather git hash + try: + git_hash = subprocess.check_output( + ["git", "rev-parse", "--short", "HEAD"], + cwd=SCRIPT_DIR, stderr=subprocess.DEVNULL, + ).decode().strip() + except Exception: + git_hash = "" + + run_info = { + "id": run_id, + "label": args.label or "", + "dir": dir_name, + "timestamp": ts.isoformat(), + "total_duration_sec": round(total_duration, 2), + "suites_run": suites_run, + "suites_passed": [s for s in suites_run if results.get(s, {}).get("ok")], + "git_hash": git_hash, + "platform": platform.platform(), + "python": sys.version.split()[0], + } + + cpp_stability = results.get("cpp", {}).get("stability") + if cpp_stability: + run_info["cpp_stability"] = { + "warn_threshold_pct": cpp_stability.get("warn_threshold_pct"), + "warning_count": cpp_stability.get("warning_count", 0), + } + + with open(os.path.join(run_dir, "run_info.json"), "w") as f: + json.dump(run_info, f, indent=2) + + register_run(run_info) + + # Summary + print("\n" + "=" * 70) + print(" Summary") + print("=" * 70) + all_ok = True + for suite in ["cpp", "python"]: + if suite not in results: + continue + r = results[suite] + if r.get("skipped"): + print(f" [SKIP] {suite}") + else: + status = "PASS" if r["ok"] else "FAIL" + print(f" [{status}] {suite} ({r['duration_sec']:.1f}s)") + if not r["ok"]: + all_ok = False + + print(f"\n Run ID : {run_id}") + print(f" Results : {run_dir}") + print(f" Index : {RUNS_INDEX}") + + # Generate report + if args.report or args.compare: + report_args = ["--run", run_id, "--results-root", RESULTS_ROOT] + if args.compare: + report_args += ["--baseline", args.compare] + report_path = os.path.join(run_dir, "report.html") + report_args += ["--output", report_path] + + print(f"\nGenerating report...") + subprocess.run( + [sys.executable, os.path.join(SCRIPT_DIR, "report.py")] + report_args, + cwd=SCRIPT_DIR, + ) + + if args.open_report and os.path.isfile(report_path): + import webbrowser + webbrowser.open(f"file://{report_path}") + + _fix_run_permissions(run_dir) + return 0 if all_ok else 1 + + +# ── Entry point ─────────────────────────────────────────────────────────────── + +def main(): + parser = argparse.ArgumentParser( + description="Run DSR C++ and Python benchmarks together", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument("--label", "-l", help="Human-readable label for this run") + parser.add_argument("--cpp-binary", metavar="PATH", + help=f"Path to dsr_benchmarks binary (default: {os.path.join(BUILD_DIR, 'dsr_benchmarks')})") + parser.add_argument("--cpp-filter", metavar="FILTER", + help='Catch2 test filter, e.g. "[LATENCY]" or "[THROUGHPUT]"') + parser.add_argument("--build", action="store_true", + help="Build C++ benchmarks before running") + parser.add_argument("--cpp-only", action="store_true", help="Skip Python suite") + parser.add_argument("--python-only", action="store_true", help="Skip C++ suite") + parser.add_argument("--all", action="store_true", + help="Run all C++ tests including hidden ones ([.multi], [.extended])") + parser.add_argument("--baseline", action="store_true", + help="Run only the curated low-noise baseline benchmark set") + parser.add_argument("--verbose", "-v", action="store_true", + help="Pass --verbose to C++ binary (shows Qt debug messages)") + parser.add_argument("--report", action="store_true", + help="Generate HTML report after the run") + parser.add_argument("--open", dest="open_report", action="store_true", + help="Open the HTML report in a browser after generation") + parser.add_argument("--compare", metavar="RUN_ID", + help="Generate a comparison report against this baseline run") + parser.add_argument("--list", action="store_true", help="List all recorded runs") + parser.add_argument("--delete", metavar="RUN_ID", + help="Remove a run from the index") + parser.add_argument("--repeat", "-r", type=int, default=1, metavar="N", + help="Run C++ benchmarks N times and report the median (reduces OS noise)") + parser.add_argument("--priority", type=int, default=None, metavar="NICE", + help="Set process nice level (e.g. -10); values < 0 require root/sudo") + parser.add_argument("--taskset", metavar="CPULIST", + help="Pin C++ benchmarks to CPU cores via taskset (e.g. '0,1')") + parser.add_argument("--no-cpu-tune", action="store_true", + help="Skip automatic CPU governor/turbo configuration (Linux only)") + parser.add_argument("--stability-warn-pct", type=float, default=DEFAULT_STABILITY_WARN_PCT, + metavar="PCT", + help="Warn when repeated C++ metrics exceed this spread percentage") + + args = parser.parse_args() + + if args.list: + cmd_list() + return 0 + + if args.delete: + cmd_delete(args.delete) + return 0 + + if args.all and args.baseline: + print("Error: --all and --baseline are mutually exclusive.") + return 1 + + if args.cpp_only and args.python_only: + print("Error: --cpp-only and --python-only are mutually exclusive.") + return 1 + + return cmd_run(args) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/benchmarks/scalability/agent_scaling_bench.cpp b/benchmarks/scalability/agent_scaling_bench.cpp new file mode 100644 index 0000000..3764f0d --- /dev/null +++ b/benchmarks/scalability/agent_scaling_bench.cpp @@ -0,0 +1,300 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../core/timing_utils.h" +#include "../core/metrics_collector.h" +#include "../core/report_generator.h" +#include "../fixtures/multi_agent_fixture.h" +#include "../fixtures/graph_generator.h" + +using namespace DSR; +using namespace DSR::Benchmark; +using namespace std::chrono; + +// Multi-agent scaling benchmarks. Tagged [.multi] so they are excluded from +// the default test run (DDS multi-agent tests are slow and require specific +// network setup). Opt in with: --cpp-filter "[SCALABILITY][agents]" +// +// Loop over {1, 2, 4} agents. One thread per agent operates on its own +// DSRGraph instance; a 3-second window measures total throughput and latency. + +static constexpr auto AGENT_DUR = std::chrono::seconds(3); + +// ── Node insert ─────────────────────────────────────────────────────────────── + +TEST_CASE("Node insert agent scaling", "[SCALABILITY][agents][.multi][PROFILE][MULTIAGENT]") { + GraphGenerator generator; + MetricsCollector collector("node_insert_agent_scaling"); + + for (uint32_t N : {1u, 2u, 4u}) { + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(N, config_file)); + fixture.wait_for_sync(); + + std::atomic total_ops{0}; + std::atomic failed_ops{0}; + std::atomic stop_flag{false}; + std::barrier sync_point(N); + + std::vector> per_thread_samples(N); + for (auto& s : per_thread_samples) s.reserve(500000 / N); + + std::vector threads; + threads.reserve(N); + + auto wall_start = steady_clock::now(); + + for (uint32_t i = 0; i < N; ++i) { + threads.emplace_back([&, agent_idx = i]() { + auto* graph = fixture.get_agent(agent_idx); + uint64_t base_id = 800000ULL + agent_idx * 200000ULL; + uint64_t local_ops = 0; + auto& samples = per_thread_samples[agent_idx]; + + sync_point.arrive_and_wait(); + + while (!stop_flag.load(std::memory_order_relaxed)) { + auto node = GraphGenerator::create_test_node( + base_id + local_ops, graph->get_agent_id()); + uint64_t ts = bench_now(); + auto res = graph->insert_node(node); + samples.push_back(bench_now() - ts); + if (!res.has_value()) + failed_ops.fetch_add(1, std::memory_order_relaxed); + local_ops++; + } + + total_ops.fetch_add(local_ops, std::memory_order_relaxed); + }); + } + + std::this_thread::sleep_for(AGENT_DUR); + stop_flag.store(true, std::memory_order_relaxed); + for (auto& th : threads) th.join(); + + if (failed_ops.load() > 0) + std::cerr << "[BENCH node_insert agents=" << N << "] " + << failed_ops.load() << " insert_node calls failed\n"; + + auto dur = duration_cast(steady_clock::now() - wall_start); + + LatencyTracker merged; + for (auto& s : per_thread_samples) + for (auto v : s) merged.record(v); + + const std::string n_str = std::to_string(N); + collector.record_throughput("node_insert", total_ops.load(), dur, + {{"agents", n_str}}); + if (!merged.empty()) + collector.record_latency_stats("node_insert", merged.stats(), + {{"agents", n_str}}); + + double ops_per_sec = static_cast(total_ops.load()) / + (static_cast(dur.count()) / 1000.0); + collector.record_scalability("node_insert", N, ops_per_sec, "ops/sec", + {{"agents", n_str}, {"scale_dim", "agents"}}); + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "node_insert_agent_scaling"); +} + +// ── Node read ───────────────────────────────────────────────────────────────── + +TEST_CASE("Node read agent scaling", "[SCALABILITY][agents][.multi][PROFILE][MULTIAGENT]") { + GraphGenerator generator; + MetricsCollector collector("node_read_agent_scaling"); + + for (uint32_t N : {1u, 2u, 4u}) { + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(N, config_file)); + fixture.wait_for_sync(); + + // Pre-populate 1000 nodes on agent 0; they sync to all agents. + auto* graph0 = fixture.get_agent(0); + std::vector node_ids; + node_ids.reserve(1000); + for (uint64_t i = 0; i < 1000; ++i) { + auto node = GraphGenerator::create_test_node(0, graph0->get_agent_id()); + auto res = graph0->insert_node(node); + REQUIRE(res.has_value()); + node_ids.push_back(res.value()); + } + fixture.wait_for_sync(); + + const size_t pool_size = node_ids.size(); + + std::atomic total_ops{0}; + std::atomic failed_ops{0}; + std::atomic stop_flag{false}; + std::barrier sync_point(N); + + std::vector> per_thread_samples(N); + for (auto& s : per_thread_samples) s.reserve(500000 / N); + + std::vector threads; + threads.reserve(N); + + auto wall_start = steady_clock::now(); + + for (uint32_t i = 0; i < N; ++i) { + threads.emplace_back([&, agent_idx = i]() { + auto* graph = fixture.get_agent(agent_idx); + uint64_t local_ops = 0; + auto& samples = per_thread_samples[agent_idx]; + + sync_point.arrive_and_wait(); + + while (!stop_flag.load(std::memory_order_relaxed)) { + uint64_t id = node_ids[local_ops % pool_size]; + uint64_t ts = bench_now(); + auto node = graph->get_node(id); + samples.push_back(bench_now() - ts); + if (!node.has_value()) + failed_ops.fetch_add(1, std::memory_order_relaxed); + local_ops++; + } + + total_ops.fetch_add(local_ops, std::memory_order_relaxed); + }); + } + + std::this_thread::sleep_for(AGENT_DUR); + stop_flag.store(true, std::memory_order_relaxed); + for (auto& th : threads) th.join(); + + if (failed_ops.load() > 0) + std::cerr << "[BENCH node_read agents=" << N << "] " + << failed_ops.load() << " get_node calls returned empty\n"; + + auto dur = duration_cast(steady_clock::now() - wall_start); + + LatencyTracker merged; + for (auto& s : per_thread_samples) + for (auto v : s) merged.record(v); + + const std::string n_str = std::to_string(N); + collector.record_throughput("node_read", total_ops.load(), dur, + {{"agents", n_str}}); + if (!merged.empty()) + collector.record_latency_stats("node_read", merged.stats(), + {{"agents", n_str}}); + + double ops_per_sec = static_cast(total_ops.load()) / + (static_cast(dur.count()) / 1000.0); + collector.record_scalability("node_read", N, ops_per_sec, "ops/sec", + {{"agents", n_str}, {"scale_dim", "agents"}}); + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "node_read_agent_scaling"); +} + +// ── Node update ─────────────────────────────────────────────────────────────── + +TEST_CASE("Node update agent scaling", "[SCALABILITY][agents][.multi][PROFILE][MULTIAGENT]") { + GraphGenerator generator; + MetricsCollector collector("node_update_agent_scaling"); + + for (uint32_t N : {1u, 2u, 4u}) { + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(N, config_file)); + fixture.wait_for_sync(); + + // Each agent gets its own dedicated node to avoid update contention. + std::vector agent_node_ids(N); + for (uint32_t i = 0; i < N; ++i) { + auto* graph = fixture.get_agent(i); + auto node = GraphGenerator::create_test_node( + 700000 + i, graph->get_agent_id(), + "agent_update_node_" + std::to_string(i)); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + agent_node_ids[i] = res.value(); + } + fixture.wait_for_sync(); + + std::atomic total_ops{0}; + std::atomic failed_ops{0}; + std::atomic stop_flag{false}; + std::barrier sync_point(N); + + std::vector> per_thread_samples(N); + for (auto& s : per_thread_samples) s.reserve(500000 / N); + + std::vector threads; + threads.reserve(N); + + auto wall_start = steady_clock::now(); + + for (uint32_t i = 0; i < N; ++i) { + threads.emplace_back([&, agent_idx = i]() { + auto* graph = fixture.get_agent(agent_idx); + uint64_t nid = agent_node_ids[agent_idx]; + uint64_t local_ops = 0; + auto& samples = per_thread_samples[agent_idx]; + + sync_point.arrive_and_wait(); + + while (!stop_flag.load(std::memory_order_relaxed)) { + auto node = graph->get_node(nid); + if (node) { + graph->add_or_modify_attrib_local( + *node, static_cast(local_ops % 1000)); + uint64_t ts = bench_now(); + bool ok = graph->update_node(*node); + samples.push_back(bench_now() - ts); + if (!ok) + failed_ops.fetch_add(1, std::memory_order_relaxed); + local_ops++; + } else { + failed_ops.fetch_add(1, std::memory_order_relaxed); + } + } + + total_ops.fetch_add(local_ops, std::memory_order_relaxed); + }); + } + + std::this_thread::sleep_for(AGENT_DUR); + stop_flag.store(true, std::memory_order_relaxed); + for (auto& th : threads) th.join(); + + if (failed_ops.load() > 0) + std::cerr << "[BENCH node_update agents=" << N << "] " + << failed_ops.load() << " get_node/update_node calls failed\n"; + + auto dur = duration_cast(steady_clock::now() - wall_start); + + LatencyTracker merged; + for (auto& s : per_thread_samples) + for (auto v : s) merged.record(v); + + const std::string n_str = std::to_string(N); + collector.record_throughput("node_update", total_ops.load(), dur, + {{"agents", n_str}}); + if (!merged.empty()) + collector.record_latency_stats("node_update", merged.stats(), + {{"agents", n_str}}); + + double ops_per_sec = static_cast(total_ops.load()) / + (static_cast(dur.count()) / 1000.0); + collector.record_scalability("node_update", N, ops_per_sec, "ops/sec", + {{"agents", n_str}, {"scale_dim", "agents"}}); + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "node_update_agent_scaling"); +} diff --git a/benchmarks/scalability/graph_size_impact_bench.cpp b/benchmarks/scalability/graph_size_impact_bench.cpp new file mode 100644 index 0000000..618ea84 --- /dev/null +++ b/benchmarks/scalability/graph_size_impact_bench.cpp @@ -0,0 +1,294 @@ +#include +#include + +#include "../core/nanobench_adapter.h" +#include "../core/metrics_collector.h" +#include "../core/report_generator.h" +#include "../fixtures/multi_agent_fixture.h" +#include "../fixtures/graph_generator.h" + +using namespace DSR; +using namespace DSR::Benchmark; + +TEST_CASE("Graph size impact on performance", "[SCALABILITY][graphsize]") { + MetricsCollector collector("graph_size_impact"); + GraphGenerator generator; + + SECTION("Node lookup performance vs graph size") { + for (uint32_t size : {100, 1000, 10000}) { + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + // Populate graph and store actual IDs + std::vector node_ids; + node_ids.reserve(size); + for (uint32_t i = 0; i < static_cast(size); ++i) { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto result = graph->insert_node(node); + REQUIRE(result.has_value()); + node_ids.push_back(result.value()); + } + + // Cache warmup: touch every node once + for (const auto id : node_ids) { (void)graph->get_node(id); } + + size_t idx = 0; + bool last_ok = true; + auto bench = make_latency_bench(1000, 0); // manual warmup done above + bench.run("node_lookup", [&] { + auto node = graph->get_node(node_ids[idx++ % node_ids.size()]); + last_ok = node.has_value(); + ankerl::nanobench::doNotOptimizeAway(node); + }); + REQUIRE(last_ok); + + auto stats = nb_to_stats(bench); + collector.record_scalability( + "node_lookup", + size, + stats.mean_ns, + "ns", + {{"graph_size", std::to_string(size)}}); + + INFO(size << " nodes - Lookup: " << stats.mean_ns << " ns"); + } + } + + SECTION("Node insertion performance vs graph size") { + for (uint32_t size : {100, 1000, 10000}) { + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + // Populate graph to target size + for (uint32_t i = 0; i < static_cast(size); ++i) { + auto node = GraphGenerator::create_test_node( + 2000000 + i, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + } + + // ~35µs/op: 300 iters/epoch × 50 epochs ≈ 0.53 s + uint64_t id_counter = 3000000; + auto bench = make_latency_bench(50); + bench.minEpochIterations(300); + bench.run("node_insert", [&] { + auto node = GraphGenerator::create_test_node( + id_counter++, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + ankerl::nanobench::doNotOptimizeAway(res); + }); + + auto stats = nb_to_stats(bench); + collector.record_scalability( + "node_insert_latency", + size, + stats.mean_us(), + "us", + {{"graph_size", std::to_string(size)}}); + + INFO(size << " existing nodes - Insert: " << stats.mean_us() << " us"); + } + } + + SECTION("Edge operations vs edge count") { + for (uint32_t edge_count : {100, 1000, 5000}) { + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + auto root = graph->get_node_root(); + REQUIRE(root.has_value()); + + // Create nodes for edges and store actual IDs + std::vector node_ids; + node_ids.reserve(edge_count + 100); + for (uint32_t i = 0; i < edge_count + 100; ++i) { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto result = graph->insert_node(node); + REQUIRE(result.has_value()); + node_ids.push_back(result.value()); + } + + // Create edges for the first edge_count nodes + for (uint32_t i = 0; i < edge_count; ++i) { + auto edge = GraphGenerator::create_test_edge( + root->id(), node_ids[i], graph->get_agent_id()); + REQUIRE(graph->insert_or_assign_edge(edge)); + } + + // Cache warmup: touch every existing edge once + for (uint32_t i = 0; i < edge_count; ++i) { + (void)graph->get_edge(root->id(), node_ids[i], "test_edge"); + } + + // Measure edge lookup performance + // ~32µs at edge_count=100 (unstable, needs 300 iters); larger counts are stable. + size_t lookup_min_iters = (edge_count <= 100) ? 300 : 1; + size_t lookup_epochs = (edge_count <= 100) ? 50 : 200; + size_t lookup_idx = 0; + bool last_ok = true; + auto lookup_bench = make_latency_bench(lookup_epochs, 0); // manual warmup done above + lookup_bench.minEpochIterations(lookup_min_iters); + lookup_bench.run("edge_lookup", [&] { + uint64_t target = node_ids[lookup_idx++ % edge_count]; + auto edge = graph->get_edge(root->id(), target, "test_edge"); + last_ok = edge.has_value(); + ankerl::nanobench::doNotOptimizeAway(edge); + }); + REQUIRE(last_ok); + + auto lookup_stats = nb_to_stats(lookup_bench); + collector.record_scalability( + "edge_lookup", + edge_count, + lookup_stats.mean_ns, + "ns", + {{"edge_count", std::to_string(edge_count)}}); + + // Measure edge insertion performance (last 100 nodes have no edges yet) + // ~13µs/op (idempotent upsert): 800 iters/epoch × 50 epochs ≈ 0.52 s + size_t insert_idx = 0; + auto insert_bench = make_latency_bench(50); + insert_bench.minEpochIterations(800); + insert_bench.run("edge_insert", [&] { + uint64_t target = node_ids[edge_count + (insert_idx++ % 100)]; + auto edge = GraphGenerator::create_test_edge( + root->id(), target, graph->get_agent_id()); + bool ok = graph->insert_or_assign_edge(edge); + REQUIRE(ok); + ankerl::nanobench::doNotOptimizeAway(ok); + }); + + auto insert_stats = nb_to_stats(insert_bench); + collector.record_scalability( + "edge_insert_latency", + edge_count, + insert_stats.mean_us(), + "us", + {{"edge_count", std::to_string(edge_count)}}); + + INFO(edge_count << " edges - Lookup: " << lookup_stats.mean_ns + << " ns, Insert: " << insert_stats.mean_us() << " us"); + } + } + + SECTION("get_nodes performance vs graph size") { + for (uint32_t size : {100, 1000, 5000}) { + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + // Populate + for (uint32_t i = 0; i < static_cast(size); ++i) { + auto node = GraphGenerator::create_test_node( + 5000000 + i, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + } + + auto bench = make_latency_bench(100); + bench.run("get_all_nodes", [&] { + auto nodes = graph->get_nodes(); + ankerl::nanobench::doNotOptimizeAway(nodes); + }); + + auto stats = nb_to_stats(bench); + collector.record_scalability( + "get_all_nodes", + size, + stats.mean_us(), + "us", + {{"graph_size", std::to_string(size)}}); + + INFO(size << " nodes - get_nodes: " << stats.mean_us() << " us"); + } + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "graph_size_impact"); +} + +TEST_CASE("Memory pressure impact", "[SCALABILITY][memory]") { + MetricsCollector collector("memory_pressure"); + GraphGenerator generator; + + SECTION("Operation latency under memory pressure") { + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + // Create increasingly large graph and measure periodically + std::vector> size_vs_latency; + + for (uint32_t target_size : {1000, 5000, 10000, 20000}) { + // Add nodes to reach target size + uint64_t current_size = graph->get_nodes().size(); + for (uint64_t i = current_size; i < target_size; ++i) { + auto node = GraphGenerator::create_test_node( + 6000000 + i, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + } + + // ~28–41µs/op: 500 iters/epoch × 50 epochs ≈ 0.7–1.0 s + uint64_t id_counter = 7000000 + static_cast(target_size) * 100; + auto bench = make_latency_bench(50); + bench.minEpochIterations(500); + bench.run("insert_under_pressure", [&] { + auto node = GraphGenerator::create_test_node( + id_counter++, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + ankerl::nanobench::doNotOptimizeAway(res); + }); + + auto stats = nb_to_stats(bench); + collector.record_scalability( + "insert_under_pressure", + target_size, + stats.mean_us(), + "us", + {{"graph_size", std::to_string(target_size)}}); + + size_vs_latency.push_back({target_size, stats.mean_us()}); + INFO(target_size << " nodes - Insert latency: " << stats.mean_us() << " us"); + } + + // Check for non-linear degradation + if (size_vs_latency.size() >= 2) { + double first_latency = size_vs_latency.front().second; + double last_latency = size_vs_latency.back().second; + double size_ratio = static_cast(size_vs_latency.back().first) / + static_cast(size_vs_latency.front().first); + double latency_ratio = last_latency / first_latency; + + collector.record("latency_degradation_ratio", MetricCategory::Scalability, + latency_ratio / size_ratio, "x"); + + INFO("Latency degradation ratio: " << latency_ratio / size_ratio << "x"); + } + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "memory_pressure"); +} diff --git a/benchmarks/scalability/graph_size_scaling_bench.cpp b/benchmarks/scalability/graph_size_scaling_bench.cpp new file mode 100644 index 0000000..8195264 --- /dev/null +++ b/benchmarks/scalability/graph_size_scaling_bench.cpp @@ -0,0 +1,272 @@ +#include + +#include "../core/nanobench_adapter.h" +#include "../core/metrics_collector.h" +#include "../core/report_generator.h" +#include "../fixtures/multi_agent_fixture.h" +#include "../fixtures/graph_generator.h" + +using namespace DSR; +using namespace DSR::Benchmark; + +// For each operation, measures latency (1000 samples) and derives throughput +// from the nanobench mean, at three pre-existing graph sizes: {100, 1000, 10000}. +// "graph_size" = number of nodes already in the graph before measurement begins. + +// ── Node insert ─────────────────────────────────────────────────────────────── + +TEST_CASE("Node insert graph size scaling", "[SCALABILITY][graphsize]") { + GraphGenerator generator; + MetricsCollector collector("node_insert_graphsize_scaling"); + + for (uint32_t N : {100u, 1000u, 10000u}) { + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + // Pre-populate to target size + for (uint32_t i = 0; i < N; ++i) { + auto node = GraphGenerator::create_test_node(2000000 + i, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + } + + // ~38µs/op: 300 iters/epoch × 50 epochs ≈ 0.57 s + uint64_t id_counter = 3000000; + auto bench = make_latency_bench(50); + bench.minEpochIterations(300); + bench.run("node_insert", [&] { + auto node = GraphGenerator::create_test_node(id_counter++, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + ankerl::nanobench::doNotOptimizeAway(res); + }); + + auto stats = nb_to_stats(bench); + const std::string n_str = std::to_string(N); + collector.record_latency_stats("node_insert", stats, {{"graph_size", n_str}}); + collector.record("node_insert", MetricCategory::Throughput, + nb_throughput(bench), "ops/sec", {{"graph_size", n_str}}); + collector.record_scalability("node_insert", N, stats.mean_ns, "ns", + {{"graph_size", n_str}, {"scale_dim", "graph_size"}}); + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "node_insert_graphsize_scaling"); +} + +// ── Node read ───────────────────────────────────────────────────────────────── + +TEST_CASE("Node read graph size scaling", "[SCALABILITY][graphsize]") { + GraphGenerator generator; + MetricsCollector collector("node_read_graphsize_scaling"); + + for (uint32_t N : {100u, 1000u, 10000u}) { + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + std::vector node_ids; + node_ids.reserve(N); + for (uint32_t i = 0; i < N; ++i) { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + node_ids.push_back(res.value()); + } + + // Cache warmup: touch every node once so all N entries are in L3 + for (const auto id : node_ids) { (void)graph->get_node(id); } + + // ~900 ns–1 µs/op: 10 000 iters/epoch × 200 epochs ≈ 1.8 s + size_t idx = 0; + bool last_ok = true; + auto bench = make_latency_bench(200, 0); // manual warmup done above + bench.minEpochIterations(10000); + bench.run("node_read", [&] { + auto node = graph->get_node(node_ids[idx++ % node_ids.size()]); + last_ok = node.has_value(); + ankerl::nanobench::doNotOptimizeAway(node); + }); + REQUIRE(last_ok); + + auto stats = nb_to_stats(bench); + const std::string n_str = std::to_string(N); + collector.record_latency_stats("node_read", stats, {{"graph_size", n_str}}); + collector.record("node_read", MetricCategory::Throughput, + nb_throughput(bench), "ops/sec", {{"graph_size", n_str}}); + collector.record_scalability("node_read", N, stats.mean_ns, "ns", + {{"graph_size", n_str}, {"scale_dim", "graph_size"}}); + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "node_read_graphsize_scaling"); +} + +// ── Node update ─────────────────────────────────────────────────────────────── + +TEST_CASE("Node update graph size scaling", "[SCALABILITY][graphsize]") { + GraphGenerator generator; + MetricsCollector collector("node_update_graphsize_scaling"); + + for (uint32_t N : {100u, 1000u, 10000u}) { + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + std::vector node_ids; + node_ids.reserve(N); + for (uint32_t i = 0; i < N; ++i) { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + node_ids.push_back(res.value()); + } + + // ~35µs/op: 350 iters/epoch × 50 epochs ≈ 0.61 s + uint64_t update_counter = 0; + size_t idx = 0; + auto bench = make_latency_bench(50); + bench.minEpochIterations(350); + bench.run("node_update", [&] { + auto node = graph->get_node(node_ids[idx++ % node_ids.size()]); + REQUIRE(node.has_value()); + graph->add_or_modify_attrib_local( + *node, static_cast(update_counter++ % 1000)); + bool ok = graph->update_node(*node); + REQUIRE(ok); + ankerl::nanobench::doNotOptimizeAway(ok); + }); + + auto stats = nb_to_stats(bench); + const std::string n_str = std::to_string(N); + collector.record_latency_stats("node_update", stats, {{"graph_size", n_str}}); + collector.record("node_update", MetricCategory::Throughput, + nb_throughput(bench), "ops/sec", {{"graph_size", n_str}}); + collector.record_scalability("node_update", N, stats.mean_ns, "ns", + {{"graph_size", n_str}, {"scale_dim", "graph_size"}}); + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "node_update_graphsize_scaling"); +} + +// ── Edge insert ─────────────────────────────────────────────────────────────── + +TEST_CASE("Edge insert graph size scaling", "[SCALABILITY][graphsize]") { + GraphGenerator generator; + MetricsCollector collector("edge_insert_graphsize_scaling"); + + for (uint32_t N : {100u, 1000u, 10000u}) { + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + auto root = graph->get_node_root(); + REQUIRE(root.has_value()); + + // Pre-populate N target nodes + std::vector node_ids; + node_ids.reserve(N); + for (uint32_t i = 0; i < N; ++i) { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + node_ids.push_back(res.value()); + } + + // ~12µs (N≤1000): 800 iters/epoch × 50 epochs ≈ 0.48 s + // ~225µs (N=10000): 100 iters/epoch × 50 epochs ≈ 1.13 s + size_t idx = 0; + auto bench = make_latency_bench(50); + bench.minEpochIterations(N <= 1000 ? 800 : 100); + bench.run("edge_insert", [&] { + uint64_t target = node_ids[idx++ % node_ids.size()]; + auto edge = GraphGenerator::create_test_edge( + root->id(), target, graph->get_agent_id()); + bool ok = graph->insert_or_assign_edge(edge); + REQUIRE(ok); + ankerl::nanobench::doNotOptimizeAway(ok); + }); + + auto stats = nb_to_stats(bench); + const std::string n_str = std::to_string(N); + collector.record_latency_stats("edge_insert", stats, {{"graph_size", n_str}}); + collector.record("edge_insert", MetricCategory::Throughput, + nb_throughput(bench), "ops/sec", {{"graph_size", n_str}}); + collector.record_scalability("edge_insert", N, stats.mean_ns, "ns", + {{"graph_size", n_str}, {"scale_dim", "graph_size"}}); + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "edge_insert_graphsize_scaling"); +} + +// ── Edge read ───────────────────────────────────────────────────────────────── + +TEST_CASE("Edge read graph size scaling", "[SCALABILITY][graphsize]") { + GraphGenerator generator; + MetricsCollector collector("edge_read_graphsize_scaling"); + + for (uint32_t N : {100u, 1000u, 10000u}) { + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + auto root = graph->get_node_root(); + REQUIRE(root.has_value()); + + // Pre-populate N nodes + edges + std::vector target_ids; + target_ids.reserve(N); + for (uint32_t i = 0; i < N; ++i) { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + target_ids.push_back(res.value()); + auto edge = GraphGenerator::create_test_edge( + root->id(), res.value(), graph->get_agent_id()); + REQUIRE(graph->insert_or_assign_edge(edge)); + } + + // Cache warmup: touch every edge once + for (const auto id : target_ids) { (void)graph->get_edge(root->id(), id, "test_edge"); } + + size_t idx = 0; + bool last_ok = true; + auto bench = make_latency_bench(1000, 0); // manual warmup done above + bench.run("edge_read", [&] { + uint64_t target = target_ids[idx++ % target_ids.size()]; + auto edge = graph->get_edge(root->id(), target, "test_edge"); + last_ok = edge.has_value(); + ankerl::nanobench::doNotOptimizeAway(edge); + }); + REQUIRE(last_ok); + + auto stats = nb_to_stats(bench); + const std::string n_str = std::to_string(N); + collector.record_latency_stats("edge_read", stats, {{"graph_size", n_str}}); + collector.record("edge_read", MetricCategory::Throughput, + nb_throughput(bench), "ops/sec", {{"graph_size", n_str}}); + collector.record_scalability("edge_read", N, stats.mean_ns, "ns", + {{"graph_size", n_str}, {"scale_dim", "graph_size"}}); + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "edge_read_graphsize_scaling"); +} diff --git a/benchmarks/scalability/multi_agent_sync_bench.cpp b/benchmarks/scalability/multi_agent_sync_bench.cpp new file mode 100644 index 0000000..6707751 --- /dev/null +++ b/benchmarks/scalability/multi_agent_sync_bench.cpp @@ -0,0 +1,286 @@ +#include +#include +#include +#include + +#include "../core/timing_utils.h" +#include "../core/metrics_collector.h" +#include "../core/report_generator.h" +#include "../fixtures/multi_agent_fixture.h" +#include "../fixtures/graph_generator.h" + +using namespace DSR; +using namespace DSR::Benchmark; + +TEST_CASE("Multi-agent synchronization benchmarks", "[SCALABILITY][sync][.multi][PROFILE][MULTIAGENT]") { + GraphGenerator generator; + MetricsCollector collector("multi_agent_sync"); + + SECTION("Initial sync time vs agent count") { + for (uint32_t num_agents : {2, 4, 8, 16}) { + auto config_file = generator.generate_empty_graph(); + + LatencyTracker tracker(10); + + for (int trial = 0; trial < 10; ++trial) { + MultiAgentFixture fixture; + + uint64_t start = get_unix_timestamp(); + bool created = fixture.create_agents(num_agents, config_file); + if (!created) { + WARN("Could not create " << num_agents << " agents"); + break; + } + + fixture.wait_for_sync(); + bool converged = fixture.verify_convergence(); + uint64_t elapsed = get_unix_timestamp() - start; + + if (converged) { + tracker.record(elapsed); + } + + // Cleanup before next trial + } + + if (tracker.count() > 0) { + auto stats = tracker.stats(); + collector.record_scalability( + "initial_sync_time", + num_agents, + stats.mean_ms(), + "ms", + {{"num_agents", std::to_string(num_agents)}}); + + INFO(num_agents << " agents - Initial sync: " << stats.mean_ms() << " ms"); + } + } + } + + SECTION("Convergence time after operation") { + for (uint32_t num_agents : {2, 4, 8}) { + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + + if (!fixture.create_agents(num_agents, config_file)) { + WARN("Could not create " << num_agents << " agents"); + continue; + } + fixture.wait_for_sync(); + + LatencyTracker tracker(50); + + // Measure convergence time after node insertion + for (int i = 0; i < 50; ++i) { + auto* sender = fixture.get_agent(0); + auto node = GraphGenerator::create_test_node( + 700000 + i, sender->get_agent_id(), + "sync_node_" + std::to_string(i)); + + uint64_t start = get_unix_timestamp(); + sender->insert_node(node); + + auto conv_time = fixture.measure_convergence_time(); + if (conv_time.count() >= 0) { + tracker.record(static_cast(conv_time.count()) * 1'000'000); // ms to ns + } + } + + if (tracker.count() > 0) { + auto stats = tracker.stats(); + collector.record_scalability( + "convergence_after_insert", + num_agents, + stats.mean_ms(), + "ms", + {{"num_agents", std::to_string(num_agents)}}); + + INFO(num_agents << " agents - Convergence time: " << stats.mean_ms() << " ms"); + } + } + } + + SECTION("Broadcast time to all agents") { + for (uint32_t num_agents : {2, 4, 8}) { + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + + if (!fixture.create_agents(num_agents, config_file)) { + WARN("Could not create " << num_agents << " agents"); + continue; + } + fixture.wait_for_sync(); + + LatencyTracker tracker(50); + + // Track when each agent receives the update + std::vector> receive_times(num_agents - 1); + std::vector> received(num_agents - 1); + + for (size_t i = 1; i < num_agents; ++i) { + auto* receiver = fixture.get_agent(i); + QObject::connect(receiver, &DSR::DSRGraph::update_node_signal, receiver, + [&, idx = i - 1](uint64_t id, const std::string& type, DSR::SignalInfo) { + if (id >= 800000 && id < 900000 && !received[idx].load()) { + receive_times[idx].store(get_unix_timestamp()); + received[idx].store(true); + } + }, Qt::DirectConnection); + } + + auto* sender = fixture.get_agent(0); + + for (int i = 0; i < 50; ++i) { + // Reset tracking + for (size_t j = 0; j < num_agents - 1; ++j) { + receive_times[j].store(0); + received[j].store(false); + } + + auto node = GraphGenerator::create_test_node( + 800000 + i, sender->get_agent_id(), + "broadcast_node_" + std::to_string(i)); + + uint64_t send_time = get_unix_timestamp(); + sender->insert_node(node); + + // Wait for all receivers + auto start = std::chrono::steady_clock::now(); + while (true) { + bool all_received = true; + for (size_t j = 0; j < num_agents - 1; ++j) { + if (!received[j].load()) { + all_received = false; + break; + } + } + + if (all_received) break; + + fixture.process_events(1); + + if (std::chrono::steady_clock::now() - start > std::chrono::seconds(5)) { + break; + } + } + + // Find max receive time (last agent to receive) + uint64_t max_time = 0; + for (size_t j = 0; j < num_agents - 1; ++j) { + if (received[j].load()) { + max_time = std::max(max_time, receive_times[j].load()); + } + } + + if (max_time > send_time) { + tracker.record(max_time - send_time); + } + } + + if (tracker.count() > 0) { + auto stats = tracker.stats(); + collector.record_scalability( + "broadcast_to_all", + num_agents, + stats.mean_us(), + "us", + {{"num_agents", std::to_string(num_agents)}}); + + INFO(num_agents << " agents - Broadcast time: " << stats.mean_us() << " us"); + } + } + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "multi_agent_sync"); +} + +TEST_CASE("Scaling efficiency", "[SCALABILITY][efficiency][.multi][PROFILE][MULTIAGENT]") { + GraphGenerator generator; + MetricsCollector collector("scaling_efficiency"); + + std::map throughputs; + + SECTION("Throughput scaling with agents") { + for (uint32_t num_agents : {1, 2, 4, 8}) { + MultiAgentFixture fixture; + auto config_file = generator.generate_empty_graph(); + + if (!fixture.create_agents(num_agents, config_file)) { + WARN("Could not create " << num_agents << " agents"); + continue; + } + fixture.wait_for_sync(); + + constexpr auto TEST_DURATION = std::chrono::seconds(3); + std::atomic total_ops{0}; + std::atomic stop_flag{false}; + + std::vector threads; + threads.reserve(num_agents); + + auto start = std::chrono::steady_clock::now(); + + for (size_t i = 0; i < num_agents; ++i) { + threads.emplace_back([&, agent_idx = i]() { + auto* graph = fixture.get_agent(agent_idx); + uint64_t base_id = 900000 + agent_idx * 50000; + uint64_t local_ops = 0; + + while (!stop_flag.load(std::memory_order_relaxed)) { + auto node = GraphGenerator::create_test_node( + base_id + local_ops, graph->get_agent_id()); + graph->insert_node(node); + local_ops++; + } + + total_ops.fetch_add(local_ops, std::memory_order_relaxed); + }); + } + + std::this_thread::sleep_for(TEST_DURATION); + stop_flag.store(true); + + for (auto& t : threads) { + t.join(); + } + + auto actual_duration = std::chrono::duration_cast( + std::chrono::steady_clock::now() - start); + + double ops_per_sec = static_cast(total_ops.load()) / + (static_cast(actual_duration.count()) / 1000.0); + + throughputs[num_agents] = ops_per_sec; + + collector.record_scalability( + "throughput_scaling", + num_agents, + ops_per_sec, + "ops/sec", + {{"num_agents", std::to_string(num_agents)}}); + + INFO(num_agents << " agents - Throughput: " << ops_per_sec << " ops/sec"); + } + + // Calculate scaling efficiency + if (throughputs.count(1) > 0 && throughputs.count(2) > 0) { + double efficiency_2 = throughputs[2] / (2 * throughputs[1]) * 100; + collector.record("scaling_efficiency_2_agents", MetricCategory::Scalability, + efficiency_2, "%"); + INFO("Scaling efficiency (2 agents): " << efficiency_2 << "%"); + } + + if (throughputs.count(1) > 0 && throughputs.count(4) > 0) { + double efficiency_4 = throughputs[4] / (4 * throughputs[1]) * 100; + collector.record("scaling_efficiency_4_agents", MetricCategory::Scalability, + efficiency_4, "%"); + INFO("Scaling efficiency (4 agents): " << efficiency_4 << "%"); + } + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "scaling_efficiency"); +} diff --git a/benchmarks/scalability/thread_scaling_bench.cpp b/benchmarks/scalability/thread_scaling_bench.cpp new file mode 100644 index 0000000..86f543d --- /dev/null +++ b/benchmarks/scalability/thread_scaling_bench.cpp @@ -0,0 +1,538 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "../core/nanobench_adapter.h" +#include "../core/timing_utils.h" +#include "../core/metrics_collector.h" +#include "../core/report_generator.h" +#include "../fixtures/multi_agent_fixture.h" +#include "../fixtures/graph_generator.h" + +using namespace DSR; +using namespace DSR::Benchmark; +using namespace std::chrono; + +// Measures throughput + latency across {1, 2, 4, 8} threads for each +// operation. Each iteration runs a 5-second window; per-thread raw latency +// samples are merged into a single LatencyTracker for aggregate stats. +// A record_scalability() entry is added so the Scalability tab can plot +// the efficiency curve (scale_dim = "threads"). +// +// nanobench wraps each (op, thread-count) run so results appear in the shared +// nanobench table (stdout + results/nanobench_report.md). bench.batch() is +// set to total_ops so the table shows per-operation throughput, not wall time. + +static constexpr auto THREAD_DUR = std::chrono::seconds(5); + +// ── Node insert ─────────────────────────────────────────────────────────────── + +TEST_CASE("Node insert thread scaling", "[SCALABILITY][threads]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("node_insert_thread_scaling"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + ankerl::nanobench::Bench bench; + bench.output(&nb_report_stream()).warmup(0).epochs(1).epochIterations(1); + + for (uint32_t N : {1u, 2u, 4u, 8u}) { + std::atomic total_ops{0}; + std::atomic failed_ops{0}; + std::atomic stop_flag{false}; + std::barrier sync_point(N); + + std::vector> per_thread_samples(N); + for (auto& s : per_thread_samples) s.reserve(2000000 / N); + + std::vector threads; + threads.reserve(N); + + auto wall_start = steady_clock::now(); + + bench.run("node_insert_" + std::to_string(N) + "t", [&] { + for (uint32_t t = 0; t < N; ++t) { + threads.emplace_back([&, tid = t]() { + uint64_t base_id = 200000ULL + tid * 200000ULL; + uint64_t local_ops = 0; + auto& samples = per_thread_samples[tid]; + + sync_point.arrive_and_wait(); + + while (!stop_flag.load(std::memory_order_relaxed)) { + auto node = GraphGenerator::create_test_node( + base_id + local_ops, graph->get_agent_id()); + uint64_t ts = bench_now(); + auto res = graph->insert_node(node); + samples.push_back(bench_now() - ts); + if (!res.has_value()) + failed_ops.fetch_add(1, std::memory_order_relaxed); + local_ops++; + } + + total_ops.fetch_add(local_ops, std::memory_order_relaxed); + }); + } + + std::this_thread::sleep_for(THREAD_DUR); + stop_flag.store(true, std::memory_order_relaxed); + for (auto& th : threads) th.join(); + + bench.batch(total_ops.load()); + ankerl::nanobench::doNotOptimizeAway(total_ops.load()); + }); + + if (failed_ops.load() > 0) + std::cerr << "[BENCH node_insert threads=" << N << "] " + << failed_ops.load() << " insert_node calls failed\n"; + + auto dur = duration_cast(steady_clock::now() - wall_start); + + LatencyTracker merged; + for (auto& s : per_thread_samples) + for (auto v : s) merged.record(v); + + const std::string n_str = std::to_string(N); + collector.record_throughput("node_insert", total_ops.load(), dur, + {{"threads", n_str}}); + if (!merged.empty()) + collector.record_latency_stats("node_insert", merged.stats(), + {{"threads", n_str}}); + + double ops_per_sec = static_cast(total_ops.load()) / + (static_cast(dur.count()) / 1000.0); + collector.record_scalability("node_insert", N, ops_per_sec, "ops/sec", + {{"threads", n_str}, {"scale_dim", "threads"}}); + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "node_insert_thread_scaling"); +} + +// ── Node read ───────────────────────────────────────────────────────────────── + +TEST_CASE("Node read thread scaling", "[SCALABILITY][threads]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("node_read_thread_scaling"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + // Pre-populate once; all thread-count iterations share this pool. + std::vector node_ids; + node_ids.reserve(1000); + for (uint64_t i = 0; i < 1000; ++i) { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + node_ids.push_back(res.value()); + } + const size_t pool_size = node_ids.size(); + + ankerl::nanobench::Bench bench; + bench.output(&nb_report_stream()).warmup(0).epochs(1).epochIterations(1); + + for (uint32_t N : {1u, 2u, 4u, 8u}) { + std::atomic total_ops{0}; + std::atomic failed_ops{0}; + std::atomic stop_flag{false}; + std::barrier sync_point(N); + + std::vector> per_thread_samples(N); + for (auto& s : per_thread_samples) s.reserve(2000000 / N); + + std::vector threads; + threads.reserve(N); + + auto wall_start = steady_clock::now(); + + bench.run("node_read_" + std::to_string(N) + "t", [&] { + for (uint32_t t = 0; t < N; ++t) { + threads.emplace_back([&, tid = t]() { + uint64_t local_ops = 0; + auto& samples = per_thread_samples[tid]; + + sync_point.arrive_and_wait(); + + while (!stop_flag.load(std::memory_order_relaxed)) { + uint64_t id = node_ids[local_ops % pool_size]; + uint64_t ts = bench_now(); + auto node = graph->get_node(id); + samples.push_back(bench_now() - ts); + if (!node.has_value()) + failed_ops.fetch_add(1, std::memory_order_relaxed); + local_ops++; + } + + total_ops.fetch_add(local_ops, std::memory_order_relaxed); + }); + } + + std::this_thread::sleep_for(THREAD_DUR); + stop_flag.store(true, std::memory_order_relaxed); + for (auto& th : threads) th.join(); + + bench.batch(total_ops.load()); + ankerl::nanobench::doNotOptimizeAway(total_ops.load()); + }); + + if (failed_ops.load() > 0) + std::cerr << "[BENCH node_read threads=" << N << "] " + << failed_ops.load() << " get_node calls returned empty\n"; + + auto dur = duration_cast(steady_clock::now() - wall_start); + + LatencyTracker merged; + for (auto& s : per_thread_samples) + for (auto v : s) merged.record(v); + + const std::string n_str = std::to_string(N); + collector.record_throughput("node_read", total_ops.load(), dur, + {{"threads", n_str}}); + if (!merged.empty()) + collector.record_latency_stats("node_read", merged.stats(), + {{"threads", n_str}}); + + double ops_per_sec = static_cast(total_ops.load()) / + (static_cast(dur.count()) / 1000.0); + collector.record_scalability("node_read", N, ops_per_sec, "ops/sec", + {{"threads", n_str}, {"scale_dim", "threads"}}); + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "node_read_thread_scaling"); +} + +// ── Node update ─────────────────────────────────────────────────────────────── + +TEST_CASE("Node update thread scaling", "[SCALABILITY][threads]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("node_update_thread_scaling"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + // Pre-insert 8 nodes (one per thread for the largest N); each thread + // updates its own node to measure scaling without lock contention. + constexpr uint32_t MAX_THREADS = 8; + std::vector node_ids; + node_ids.reserve(MAX_THREADS); + for (uint32_t t = 0; t < MAX_THREADS; ++t) { + auto node = GraphGenerator::create_test_node( + 500000 + t, graph->get_agent_id(), + "update_node_" + std::to_string(t)); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + node_ids.push_back(res.value()); + } + + ankerl::nanobench::Bench bench; + bench.output(&nb_report_stream()).warmup(0).epochs(1).epochIterations(1); + + for (uint32_t N : {1u, 2u, 4u, 8u}) { + std::atomic total_ops{0}; + std::atomic failed_ops{0}; + std::atomic stop_flag{false}; + std::barrier sync_point(N); + + std::vector> per_thread_samples(N); + for (auto& s : per_thread_samples) s.reserve(2000000 / N); + + std::vector threads; + threads.reserve(N); + + auto wall_start = steady_clock::now(); + + bench.run("node_update_" + std::to_string(N) + "t", [&] { + for (uint32_t t = 0; t < N; ++t) { + threads.emplace_back([&, tid = t]() { + uint64_t local_ops = 0; + auto& samples = per_thread_samples[tid]; + uint64_t nid = node_ids[tid]; + + sync_point.arrive_and_wait(); + + while (!stop_flag.load(std::memory_order_relaxed)) { + auto node = graph->get_node(nid); + if (node) { + graph->add_or_modify_attrib_local( + *node, static_cast(local_ops % 1000)); + uint64_t ts = bench_now(); + bool ok = graph->update_node(*node); + samples.push_back(bench_now() - ts); + if (!ok) + failed_ops.fetch_add(1, std::memory_order_relaxed); + local_ops++; + } else { + failed_ops.fetch_add(1, std::memory_order_relaxed); + } + } + + total_ops.fetch_add(local_ops, std::memory_order_relaxed); + }); + } + + std::this_thread::sleep_for(THREAD_DUR); + stop_flag.store(true, std::memory_order_relaxed); + for (auto& th : threads) th.join(); + + bench.batch(total_ops.load()); + ankerl::nanobench::doNotOptimizeAway(total_ops.load()); + }); + + if (failed_ops.load() > 0) + std::cerr << "[BENCH node_update threads=" << N << "] " + << failed_ops.load() << " get_node/update_node calls failed\n"; + + auto dur = duration_cast(steady_clock::now() - wall_start); + + LatencyTracker merged; + for (auto& s : per_thread_samples) + for (auto v : s) merged.record(v); + + const std::string n_str = std::to_string(N); + collector.record_throughput("node_update", total_ops.load(), dur, + {{"threads", n_str}}); + if (!merged.empty()) + collector.record_latency_stats("node_update", merged.stats(), + {{"threads", n_str}}); + + double ops_per_sec = static_cast(total_ops.load()) / + (static_cast(dur.count()) / 1000.0); + collector.record_scalability("node_update", N, ops_per_sec, "ops/sec", + {{"threads", n_str}, {"scale_dim", "threads"}}); + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "node_update_thread_scaling"); +} + +// ── Edge insert ─────────────────────────────────────────────────────────────── + +TEST_CASE("Edge insert thread scaling", "[SCALABILITY][threads]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("edge_insert_thread_scaling"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + auto root = graph->get_node_root(); + REQUIRE(root.has_value()); + + // Pre-populate target node pool; shared across all N iterations. + constexpr uint32_t POOL_SIZE = 10000; + std::vector pool; + pool.reserve(POOL_SIZE); + for (uint64_t i = 0; i < POOL_SIZE; ++i) { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + pool.push_back(res.value()); + } + const size_t pool_size = pool.size(); + + ankerl::nanobench::Bench bench; + bench.output(&nb_report_stream()).warmup(0).epochs(1).epochIterations(1); + + for (uint32_t N : {1u, 2u, 4u, 8u}) { + std::atomic total_ops{0}; + std::atomic failed_ops{0}; + std::atomic stop_flag{false}; + std::barrier sync_point(N); + + std::vector> per_thread_samples(N); + for (auto& s : per_thread_samples) s.reserve(2000000 / N); + + std::vector threads; + threads.reserve(N); + + const uint32_t stride = static_cast(pool_size / N) + 1; + auto wall_start = steady_clock::now(); + + bench.run("edge_insert_" + std::to_string(N) + "t", [&] { + for (uint32_t t = 0; t < N; ++t) { + threads.emplace_back([&, tid = t]() { + uint64_t local_ops = 0; + auto& samples = per_thread_samples[tid]; + + sync_point.arrive_and_wait(); + + while (!stop_flag.load(std::memory_order_relaxed)) { + uint64_t idx = (local_ops + tid * stride) % pool_size; + auto edge = GraphGenerator::create_test_edge( + root->id(), pool[idx], graph->get_agent_id()); + uint64_t ts = bench_now(); + bool ok = graph->insert_or_assign_edge(edge); + samples.push_back(bench_now() - ts); + if (!ok) + failed_ops.fetch_add(1, std::memory_order_relaxed); + local_ops++; + } + + total_ops.fetch_add(local_ops, std::memory_order_relaxed); + }); + } + + std::this_thread::sleep_for(THREAD_DUR); + stop_flag.store(true, std::memory_order_relaxed); + for (auto& th : threads) th.join(); + + bench.batch(total_ops.load()); + ankerl::nanobench::doNotOptimizeAway(total_ops.load()); + }); + + if (failed_ops.load() > 0) + std::cerr << "[BENCH edge_insert threads=" << N << "] " + << failed_ops.load() << " insert_or_assign_edge calls failed\n"; + + auto dur = duration_cast(steady_clock::now() - wall_start); + + LatencyTracker merged; + for (auto& s : per_thread_samples) + for (auto v : s) merged.record(v); + + const std::string n_str = std::to_string(N); + collector.record_throughput("edge_insert", total_ops.load(), dur, + {{"threads", n_str}}); + if (!merged.empty()) + collector.record_latency_stats("edge_insert", merged.stats(), + {{"threads", n_str}}); + + double ops_per_sec = static_cast(total_ops.load()) / + (static_cast(dur.count()) / 1000.0); + collector.record_scalability("edge_insert", N, ops_per_sec, "ops/sec", + {{"threads", n_str}, {"scale_dim", "threads"}}); + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "edge_insert_thread_scaling"); +} + +// ── Edge read ───────────────────────────────────────────────────────────────── + +TEST_CASE("Edge read thread scaling", "[SCALABILITY][threads]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("edge_read_thread_scaling"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + auto root = graph->get_node_root(); + REQUIRE(root.has_value()); + + // Pre-populate 1000 nodes + edges; shared across all N iterations. + constexpr uint32_t POOL_SIZE = 1000; + std::vector pool; + pool.reserve(POOL_SIZE); + for (uint64_t i = 0; i < POOL_SIZE; ++i) { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + pool.push_back(res.value()); + auto edge = GraphGenerator::create_test_edge( + root->id(), res.value(), graph->get_agent_id()); + REQUIRE(graph->insert_or_assign_edge(edge)); + } + const size_t pool_size = pool.size(); + + ankerl::nanobench::Bench bench; + bench.output(&nb_report_stream()).warmup(0).epochs(1).epochIterations(1); + + for (uint32_t N : {1u, 2u, 4u, 8u}) { + std::atomic total_ops{0}; + std::atomic failed_ops{0}; + std::atomic stop_flag{false}; + std::barrier sync_point(N); + + std::vector> per_thread_samples(N); + for (auto& s : per_thread_samples) s.reserve(2000000 / N); + + std::vector threads; + threads.reserve(N); + + const uint32_t stride = static_cast(pool_size / N) + 1; + auto wall_start = steady_clock::now(); + + bench.run("edge_read_" + std::to_string(N) + "t", [&] { + for (uint32_t t = 0; t < N; ++t) { + threads.emplace_back([&, tid = t]() { + uint64_t local_ops = 0; + auto& samples = per_thread_samples[tid]; + + sync_point.arrive_and_wait(); + + while (!stop_flag.load(std::memory_order_relaxed)) { + uint64_t idx = (local_ops + tid * stride) % pool_size; + uint64_t ts = bench_now(); + auto edge = graph->get_edge(root->id(), pool[idx], "test_edge"); + samples.push_back(bench_now() - ts); + if (!edge.has_value()) + failed_ops.fetch_add(1, std::memory_order_relaxed); + local_ops++; + } + + total_ops.fetch_add(local_ops, std::memory_order_relaxed); + }); + } + + std::this_thread::sleep_for(THREAD_DUR); + stop_flag.store(true, std::memory_order_relaxed); + for (auto& th : threads) th.join(); + + bench.batch(total_ops.load()); + ankerl::nanobench::doNotOptimizeAway(total_ops.load()); + }); + + if (failed_ops.load() > 0) + std::cerr << "[BENCH edge_read threads=" << N << "] " + << failed_ops.load() << " get_edge calls returned empty\n"; + + auto dur = duration_cast(steady_clock::now() - wall_start); + + LatencyTracker merged; + for (auto& s : per_thread_samples) + for (auto v : s) merged.record(v); + + const std::string n_str = std::to_string(N); + collector.record_throughput("edge_read", total_ops.load(), dur, + {{"threads", n_str}}); + if (!merged.empty()) + collector.record_latency_stats("edge_read", merged.stats(), + {{"threads", n_str}}); + + double ops_per_sec = static_cast(total_ops.load()) / + (static_cast(dur.count()) / 1000.0); + collector.record_scalability("edge_read", N, ops_per_sec, "ops/sec", + {{"threads", n_str}, {"scale_dim", "threads"}}); + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "edge_read_thread_scaling"); +} diff --git a/benchmarks/throughput/concurrent_writers_bench.cpp b/benchmarks/throughput/concurrent_writers_bench.cpp new file mode 100644 index 0000000..6c9bda7 --- /dev/null +++ b/benchmarks/throughput/concurrent_writers_bench.cpp @@ -0,0 +1,366 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include "../core/nanobench_adapter.h" +#include "../core/timing_utils.h" +#include "../core/metrics_collector.h" +#include "../core/report_generator.h" +#include "../fixtures/multi_agent_fixture.h" +#include "../fixtures/graph_generator.h" + +using namespace DSR; +using namespace DSR::Benchmark; + +TEST_CASE("Concurrent writers throughput", "[THROUGHPUT][concurrent][PROFILE][LOAD]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("concurrent_writers"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + constexpr auto TEST_DURATION = std::chrono::seconds(5); + + ankerl::nanobench::Bench bench; + bench.output(&nb_report_stream()).warmup(0).epochs(1).epochIterations(1); + + auto run_concurrent_test = [&](uint32_t num_threads, const std::string& test_name) { + std::atomic total_operations{0}; + std::atomic failed_ops{0}; + std::atomic stop_flag{false}; + std::barrier sync_point(num_threads); + + std::vector threads; + threads.reserve(num_threads); + + auto start = std::chrono::steady_clock::now(); + + bench.run(test_name, [&] { + for (uint32_t t = 0; t < num_threads; ++t) { + threads.emplace_back([&, thread_id = t]() { + uint64_t base_id = 100000 + thread_id * 100000; + uint64_t local_ops = 0; + + sync_point.arrive_and_wait(); + + while (!stop_flag.load(std::memory_order_relaxed)) { + auto node = GraphGenerator::create_test_node( + base_id + local_ops, graph->get_agent_id(), + "thread_" + std::to_string(thread_id) + "_node_" + std::to_string(local_ops)); + if (!graph->insert_node(node).has_value()) + failed_ops.fetch_add(1, std::memory_order_relaxed); + local_ops++; + } + + total_operations.fetch_add(local_ops, std::memory_order_relaxed); + }); + } + + std::this_thread::sleep_for(TEST_DURATION); + stop_flag.store(true, std::memory_order_relaxed); + for (auto& t : threads) t.join(); + + bench.batch(total_operations.load()); + ankerl::nanobench::doNotOptimizeAway(total_operations.load()); + }); + + if (failed_ops.load() > 0) + std::cerr << "[BENCH " << test_name << "] " + << failed_ops.load() << " insert_node calls failed\n"; + + auto actual_duration = std::chrono::duration_cast( + std::chrono::steady_clock::now() - start); + + collector.record_throughput(test_name, total_operations.load(), actual_duration, + {{"num_threads", std::to_string(num_threads)}}); + + double ops_per_sec = static_cast(total_operations.load()) / + (static_cast(actual_duration.count()) / 1000.0); + + return ops_per_sec; + }; + + SECTION("2 concurrent writers") { + double ops = run_concurrent_test(2, "concurrent_insert_2t"); + INFO("2 threads: " << ops << " ops/sec"); + CHECK(ops >= MIN_EXPECTED_THROUGHPUT_OPS); + } + + SECTION("4 concurrent writers") { + double ops = run_concurrent_test(4, "concurrent_insert_4t"); + INFO("4 threads: " << ops << " ops/sec"); + } + + SECTION("8 concurrent writers") { + double ops = run_concurrent_test(8, "concurrent_insert_8t"); + INFO("8 threads: " << ops << " ops/sec"); + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "concurrent_writers"); +} + +TEST_CASE("Concurrent read-write throughput", "[THROUGHPUT][concurrent][PROFILE][LOAD]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("concurrent_read_write"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + // Pre-populate graph and store actual IDs + std::vector pre_node_ids; + pre_node_ids.reserve(1000); + for (uint64_t i = 0; i < 1000; ++i) { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto result = graph->insert_node(node); + REQUIRE(result.has_value()); + pre_node_ids.push_back(result.value()); + } + + constexpr auto TEST_DURATION = std::chrono::seconds(5); + + ankerl::nanobench::Bench bench; + bench.output(&nb_report_stream()).warmup(0).epochs(1).epochIterations(1); + + SECTION("Mixed read-write workload") { + constexpr uint32_t NUM_READERS = 4; + constexpr uint32_t NUM_WRITERS = 2; + constexpr uint32_t TOTAL_THREADS = NUM_READERS + NUM_WRITERS; + + std::atomic read_ops{0}; + std::atomic write_ops{0}; + std::atomic write_failures{0}; + std::atomic stop_flag{false}; + std::barrier sync_point(TOTAL_THREADS); + + std::vector threads; + threads.reserve(TOTAL_THREADS); + + auto start = std::chrono::steady_clock::now(); + + bench.run("mixed_read_write", [&] { + // Reader threads + for (uint32_t t = 0; t < NUM_READERS; ++t) { + threads.emplace_back([&, thread_id = t]() { + uint64_t local_ops = 0; + sync_point.arrive_and_wait(); + + while (!stop_flag.load(std::memory_order_relaxed)) { + uint64_t id = pre_node_ids[local_ops % pre_node_ids.size()]; + auto node = graph->get_node(id); + ankerl::nanobench::doNotOptimizeAway(node); + local_ops++; + } + + read_ops.fetch_add(local_ops, std::memory_order_relaxed); + }); + } + + // Writer threads + for (uint32_t t = 0; t < NUM_WRITERS; ++t) { + threads.emplace_back([&, thread_id = t]() { + uint64_t base_id = 300000 + thread_id * 100000; + uint64_t local_ops = 0; + sync_point.arrive_and_wait(); + + while (!stop_flag.load(std::memory_order_relaxed)) { + auto node = GraphGenerator::create_test_node( + base_id + local_ops, graph->get_agent_id()); + if (!graph->insert_node(node).has_value()) + write_failures.fetch_add(1, std::memory_order_relaxed); + local_ops++; + } + + write_ops.fetch_add(local_ops, std::memory_order_relaxed); + }); + } + + std::this_thread::sleep_for(TEST_DURATION); + stop_flag.store(true, std::memory_order_relaxed); + for (auto& t : threads) t.join(); + + bench.batch(read_ops.load() + write_ops.load()); + ankerl::nanobench::doNotOptimizeAway(read_ops.load()); + }); + + if (write_failures.load() > 0) + std::cerr << "[BENCH concurrent_read_write] " + << write_failures.load() << " insert_node calls failed\n"; + + auto actual_duration = std::chrono::duration_cast( + std::chrono::steady_clock::now() - start); + + collector.record_throughput("concurrent_reads", read_ops.load(), actual_duration, + {{"num_readers", std::to_string(NUM_READERS)}}); + collector.record_throughput("concurrent_writes", write_ops.load(), actual_duration, + {{"num_writers", std::to_string(NUM_WRITERS)}}); + + double read_ops_sec = static_cast(read_ops.load()) / + (static_cast(actual_duration.count()) / 1000.0); + double write_ops_sec = static_cast(write_ops.load()) / + (static_cast(actual_duration.count()) / 1000.0); + + INFO("Read throughput: " << read_ops_sec << " ops/sec"); + INFO("Write throughput: " << write_ops_sec << " ops/sec"); + } + + SECTION("Update contention test") { + constexpr uint32_t NUM_THREADS = 4; + + // All threads update the same node + auto test_node = GraphGenerator::create_test_node( + 0, graph->get_agent_id(), "contention_test"); + auto contention_id_opt = graph->insert_node(test_node); + REQUIRE(contention_id_opt.has_value()); + uint64_t contention_node_id = contention_id_opt.value(); + + std::atomic total_ops{0}; + std::atomic successful_ops{0}; + std::atomic stop_flag{false}; + std::barrier sync_point(NUM_THREADS); + + std::vector threads; + threads.reserve(NUM_THREADS); + + auto start = std::chrono::steady_clock::now(); + + bench.run("update_contention", [&] { + for (uint32_t t = 0; t < NUM_THREADS; ++t) { + threads.emplace_back([&, thread_id = t, node_id = contention_node_id]() { + uint64_t local_total = 0; + uint64_t local_success = 0; + sync_point.arrive_and_wait(); + + while (!stop_flag.load(std::memory_order_relaxed)) { + auto node = graph->get_node(node_id); + if (node) { + graph->add_or_modify_attrib_local( + *node, static_cast(thread_id * 1000 + local_total)); + if (graph->update_node(*node)) { + local_success++; + } + } + local_total++; + } + + total_ops.fetch_add(local_total, std::memory_order_relaxed); + successful_ops.fetch_add(local_success, std::memory_order_relaxed); + }); + } + + std::this_thread::sleep_for(TEST_DURATION); + stop_flag.store(true, std::memory_order_relaxed); + for (auto& t : threads) t.join(); + + bench.batch(total_ops.load()); + ankerl::nanobench::doNotOptimizeAway(total_ops.load()); + }); + + auto actual_duration = std::chrono::duration_cast( + std::chrono::steady_clock::now() - start); + + double success_rate = static_cast(successful_ops.load()) / + static_cast(total_ops.load()) * 100.0; + + collector.record("update_contention_total", MetricCategory::Throughput, + static_cast(total_ops.load()), "ops", + {{"num_threads", std::to_string(NUM_THREADS)}}); + collector.record("update_contention_success_rate", MetricCategory::Throughput, + success_rate, "%", + {{"num_threads", std::to_string(NUM_THREADS)}}); + + INFO("Total attempts: " << total_ops.load()); + INFO("Successful updates: " << successful_ops.load()); + INFO("Success rate: " << success_rate << "%"); + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "concurrent_read_write"); +} + +TEST_CASE("Multi-agent concurrent operations", "[THROUGHPUT][concurrent][multiagent][.multi][PROFILE][LOAD][MULTIAGENT]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("multiagent_concurrent"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(4, config_file)); + fixture.wait_for_sync(); + + constexpr auto TEST_DURATION = std::chrono::seconds(5); + + SECTION("Each agent writes independently") { + std::atomic total_ops{0}; + std::atomic failed_ops{0}; + std::atomic stop_flag{false}; + std::barrier sync_point(fixture.agent_count()); + + std::vector threads; + threads.reserve(fixture.agent_count()); + + auto start = std::chrono::steady_clock::now(); + + for (size_t i = 0; i < fixture.agent_count(); ++i) { + threads.emplace_back([&, agent_idx = i]() { + auto* graph = fixture.get_agent(agent_idx); + uint64_t base_id = 600000 + agent_idx * 100000; + uint64_t local_ops = 0; + + sync_point.arrive_and_wait(); + + while (!stop_flag.load(std::memory_order_relaxed)) { + auto node = GraphGenerator::create_test_node( + base_id + local_ops, graph->get_agent_id(), + "agent_" + std::to_string(agent_idx) + "_node_" + std::to_string(local_ops)); + if (!graph->insert_node(node).has_value()) + failed_ops.fetch_add(1, std::memory_order_relaxed); + local_ops++; + } + + total_ops.fetch_add(local_ops, std::memory_order_relaxed); + }); + } + + std::this_thread::sleep_for(TEST_DURATION); + stop_flag.store(true, std::memory_order_relaxed); + + for (auto& t : threads) { + t.join(); + } + + if (failed_ops.load() > 0) + std::cerr << "[BENCH multiagent_concurrent] " + << failed_ops.load() << " insert_node calls failed\n"; + + auto actual_duration = std::chrono::duration_cast( + std::chrono::steady_clock::now() - start); + + collector.record_throughput("multiagent_concurrent_insert", + total_ops.load(), actual_duration, + {{"num_agents", std::to_string(fixture.agent_count())}}); + + double ops_per_sec = static_cast(total_ops.load()) / + (static_cast(actual_duration.count()) / 1000.0); + + INFO("Multi-agent concurrent throughput: " << ops_per_sec << " ops/sec"); + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "multiagent_concurrent"); +} diff --git a/benchmarks/throughput/query_ops_bench.cpp b/benchmarks/throughput/query_ops_bench.cpp new file mode 100644 index 0000000..360d71f --- /dev/null +++ b/benchmarks/throughput/query_ops_bench.cpp @@ -0,0 +1,131 @@ +#include + +#include "../core/nanobench_adapter.h" +#include "../core/metrics_collector.h" +#include "../core/report_generator.h" +#include "../fixtures/multi_agent_fixture.h" +#include "../fixtures/graph_generator.h" + +using namespace DSR; +using namespace DSR::Benchmark; + +TEST_CASE("Graph query convenience operations", "[EXTENDED][LATENCY][THROUGHPUT][query][single][.extended]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("graph_query_baseline"); + collector.add_metadata("profile", "extended"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + auto root = graph->get_node_root(); + REQUIRE(root.has_value()); + + std::vector node_ids; + node_ids.reserve(1000); + for (uint64_t i = 0; i < 1000; ++i) { + auto node = GraphGenerator::create_test_node(i, graph->get_agent_id(), "query_node_" + std::to_string(i)); + auto inserted = graph->insert_node(node); + REQUIRE(inserted.has_value()); + node_ids.push_back(*inserted); + + auto edge = GraphGenerator::create_test_edge(root->id(), *inserted, graph->get_agent_id()); + REQUIRE(graph->insert_or_assign_edge(edge)); + } + + for (auto id : node_ids) { + (void)graph->get_node(id); + } + (void)graph->get_nodes(); + (void)graph->get_nodes_by_type("test_node"); + (void)graph->get_edges(root->id()); + (void)graph->get_edges_to_id(root->id()); + (void)graph->get_edges_by_type("test_edge"); + + { + auto bench = make_latency_bench(1000, 0); + bench.minEpochIterations(10); + bench.run("get_nodes", [&] { + auto nodes = graph->get_nodes(); + ankerl::nanobench::doNotOptimizeAway(nodes); + }); + collector.record_latency_stats("get_nodes", nb_to_stats(bench)); + collector.record("get_nodes", MetricCategory::Throughput, nb_throughput(bench), "ops/sec"); + } + + { + auto bench = make_latency_bench(1000, 0); + bench.minEpochIterations(10); + bench.run("get_nodes_by_type", [&] { + auto nodes = graph->get_nodes_by_type("test_node"); + ankerl::nanobench::doNotOptimizeAway(nodes); + }); + collector.record_latency_stats("get_nodes_by_type", nb_to_stats(bench)); + collector.record("get_nodes_by_type", MetricCategory::Throughput, nb_throughput(bench), "ops/sec"); + } + + { + auto bench = make_latency_bench(1000, 0); + bench.minEpochIterations(20); + bench.run("get_edges_from_root", [&] { + auto edges = graph->get_edges(root->id()); + ankerl::nanobench::doNotOptimizeAway(edges); + }); + collector.record_latency_stats("get_edges_from_root", nb_to_stats(bench)); + collector.record("get_edges_from_root", MetricCategory::Throughput, nb_throughput(bench), "ops/sec"); + } + + { + auto bench = make_latency_bench(1000, 0); + bench.minEpochIterations(20); + bench.run("get_edges_to_root", [&] { + auto edges = graph->get_edges_to_id(root->id()); + ankerl::nanobench::doNotOptimizeAway(edges); + }); + collector.record_latency_stats("get_edges_to_root", nb_to_stats(bench)); + collector.record("get_edges_to_root", MetricCategory::Throughput, nb_throughput(bench), "ops/sec"); + } + + { + auto bench = make_latency_bench(1000, 0); + bench.minEpochIterations(20); + bench.run("get_edges_by_type", [&] { + auto edges = graph->get_edges_by_type("test_edge"); + ankerl::nanobench::doNotOptimizeAway(edges); + }); + collector.record_latency_stats("get_edges_by_type", nb_to_stats(bench)); + collector.record("get_edges_by_type", MetricCategory::Throughput, nb_throughput(bench), "ops/sec"); + } + + { + size_t idx = 0; + auto bench = make_latency_bench(1000, 0); + bench.minEpochIterations(5000); + bench.run("get_name_from_id", [&] { + auto name = graph->get_name_from_id(node_ids[idx++ % node_ids.size()]); + REQUIRE(name.has_value()); + ankerl::nanobench::doNotOptimizeAway(name); + }); + collector.record_latency_stats("get_name_from_id", nb_to_stats(bench)); + collector.record("get_name_from_id", MetricCategory::Throughput, nb_throughput(bench), "ops/sec"); + } + + { + size_t idx = 0; + auto bench = make_latency_bench(1000, 0); + bench.minEpochIterations(5000); + bench.run("get_id_from_name", [&] { + auto id = graph->get_id_from_name("query_node_" + std::to_string(idx++ % node_ids.size())); + REQUIRE(id.has_value()); + ankerl::nanobench::doNotOptimizeAway(id); + }); + collector.record_latency_stats("get_id_from_name", nb_to_stats(bench)); + collector.record("get_id_from_name", MetricCategory::Throughput, nb_throughput(bench), "ops/sec"); + } + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "graph_query_baseline"); +} diff --git a/benchmarks/throughput/single_agent_ops_bench.cpp b/benchmarks/throughput/single_agent_ops_bench.cpp new file mode 100644 index 0000000..3fb79e4 --- /dev/null +++ b/benchmarks/throughput/single_agent_ops_bench.cpp @@ -0,0 +1,397 @@ +#include +#include + +#include "../core/nanobench_adapter.h" +#include "../core/metrics_collector.h" +#include "../core/report_generator.h" +#include "../fixtures/multi_agent_fixture.h" +#include "../fixtures/graph_generator.h" + +using namespace DSR; +using namespace DSR::Benchmark; + +// Each operation gets its own TEST_CASE. nanobench replaces the manual +// 5-second time-window loops: it auto-tunes warmup and iteration count, +// and derives throughput from the mean latency (nb_throughput()). + +TEST_CASE("Node insertion throughput", "[THROUGHPUT][single]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("node_insert_throughput"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + uint64_t id_counter = 0; + auto sampled = run_sampled_benchmark( + 50, + 1000, + [&] { + auto node = GraphGenerator::create_test_node(id_counter++, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + }, + [&] { fixture.process_events(1); }, + 16); + + collector.record_latency_stats("node_insert", sampled.latency); + collector.record_throughput("node_insert", sampled.latency.count, sampled.wall_time); + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "node_insert_throughput"); +} + +TEST_CASE("Node read throughput", "[THROUGHPUT][single]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("node_read_throughput"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + std::vector node_ids; + node_ids.reserve(1000); + for (uint64_t i = 0; i < 1000; ++i) { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto result = graph->insert_node(node); + REQUIRE(result.has_value()); + node_ids.push_back(result.value()); + } + + // Cache warmup + for (auto id : node_ids) { (void)graph->get_node(id); } + + // ~900 ns/op: 10 000 iters/epoch × 200 epochs ≈ 1.8 s + size_t idx = 0; + bool last_ok = true; + auto bench = make_latency_bench(200, 0); // manual warmup done above + bench.minEpochIterations(10000); + bench.run("node_read", [&] { + auto node = graph->get_node(node_ids[idx++ % node_ids.size()]); + last_ok = node.has_value(); + ankerl::nanobench::doNotOptimizeAway(node); + }); + REQUIRE(last_ok); + + collector.record_latency_stats("node_read", nb_to_stats(bench)); + collector.record("node_read", MetricCategory::Throughput, + nb_throughput(bench), "ops/sec"); + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "node_read_throughput"); +} + +TEST_CASE("Node update throughput", "[THROUGHPUT][single]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("node_update_throughput"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + auto test_node = GraphGenerator::create_test_node(0, graph->get_agent_id(), "update_test"); + auto insert_result = graph->insert_node(test_node); + REQUIRE(insert_result.has_value()); + uint64_t node_id = insert_result.value(); + + // ~38µs/op: 300 iters/epoch × 50 epochs ≈ 0.57 s + uint64_t update_counter = 0; + auto bench = make_latency_bench(50); + bench.minEpochIterations(300); + bench.run("node_update", [&] { + auto node = graph->get_node(node_id); + REQUIRE(node.has_value()); + graph->add_or_modify_attrib_local( + *node, static_cast(update_counter++ % 1000)); + bool ok = graph->update_node(*node); + REQUIRE(ok); + ankerl::nanobench::doNotOptimizeAway(ok); + }); + + collector.record_latency_stats("node_update", nb_to_stats(bench)); + collector.record("node_update", MetricCategory::Throughput, + nb_throughput(bench), "ops/sec"); + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "node_update_throughput"); +} + +TEST_CASE("Edge insertion throughput", "[THROUGHPUT][single]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("edge_insert_throughput"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + auto root = graph->get_node_root(); + REQUIRE(root.has_value()); + + std::vector target_ids; + target_ids.reserve(10000); + for (uint64_t i = 0; i < 10000; ++i) { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto result = graph->insert_node(node); + REQUIRE(result.has_value()); + target_ids.push_back(result.value()); + } + + size_t idx = 0; + auto sampled = run_sampled_benchmark( + 50, + 1000, + [&] { + uint64_t target = target_ids[idx++ % target_ids.size()]; + auto edge = GraphGenerator::create_test_edge(root->id(), target, graph->get_agent_id()); + bool ok = graph->insert_or_assign_edge(edge); + REQUIRE(ok); + }, + [&] { fixture.process_events(1); }, + 8); + + collector.record_latency_stats("edge_insert", sampled.latency); + collector.record_throughput("edge_insert", sampled.latency.count, sampled.wall_time); + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "edge_insert_throughput"); +} + +TEST_CASE("Edge read throughput", "[THROUGHPUT][single]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("edge_read_throughput"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + auto root = graph->get_node_root(); + REQUIRE(root.has_value()); + + std::vector target_ids; + target_ids.reserve(1000); + for (uint64_t i = 0; i < 1000; ++i) { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto result = graph->insert_node(node); + REQUIRE(result.has_value()); + target_ids.push_back(result.value()); + auto edge = GraphGenerator::create_test_edge( + root->id(), result.value(), graph->get_agent_id()); + REQUIRE(graph->insert_or_assign_edge(edge)); + } + + // Cache warmup + for (auto tid : target_ids) { (void)graph->get_edge(root->id(), tid, "test_edge"); } + + size_t idx = 0; + bool last_ok = true; + auto bench = make_latency_bench(1000, 0); // manual warmup done above + bench.run("edge_read", [&] { + uint64_t target = target_ids[idx++ % target_ids.size()]; + auto edge = graph->get_edge(root->id(), target, "test_edge"); + last_ok = edge.has_value(); + ankerl::nanobench::doNotOptimizeAway(edge); + }); + REQUIRE(last_ok); + + collector.record_latency_stats("edge_read", nb_to_stats(bench)); + collector.record("edge_read", MetricCategory::Throughput, + nb_throughput(bench), "ops/sec"); + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "edge_read_throughput"); +} + +TEST_CASE("Mixed operations throughput", "[THROUGHPUT][single]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("mixed_ops_throughput"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + auto root = graph->get_node_root(); + REQUIRE(root.has_value()); + + std::vector node_ids; + node_ids.reserve(600); // 500 initial + up to ~100 inserts from 30% insert rate × 1100 calls + for (uint64_t i = 0; i < 500; ++i) { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto result = graph->insert_node(node); + REQUIRE(result.has_value()); + node_ids.push_back(result.value()); + } + + uint64_t ops = 0; + auto sampled = run_sampled_benchmark( + 50, + 1000, + [&] { + int op_type = static_cast(ops % 10); + if (op_type < 4) { + auto node = graph->get_node(node_ids[ops % node_ids.size()]); + ankerl::nanobench::doNotOptimizeAway(node); + } else if (op_type < 7) { + auto node = GraphGenerator::create_test_node(ops, graph->get_agent_id()); + auto result = graph->insert_node(node); + REQUIRE(result.has_value()); + node_ids.push_back(result.value()); + } else { + auto node = graph->get_node(node_ids[ops % node_ids.size()]); + REQUIRE(node.has_value()); + graph->add_or_modify_attrib_local( + *node, static_cast(ops)); + bool ok = graph->update_node(*node); + REQUIRE(ok); + } + ++ops; + }, + [&] { fixture.process_events(1); }, + 16); + + collector.record_latency_stats("mixed_ops", sampled.latency); + collector.record_throughput("mixed_ops", sampled.latency.count, sampled.wall_time); + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "mixed_ops_throughput"); +} + +TEST_CASE("Node deletion throughput", "[THROUGHPUT][single]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("node_delete_throughput"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + // Pool is 3× the expected maximum (warmup + epochs) so nanobench auto-tuning + // cannot exhaust it; the REQUIRE fires loudly if it somehow does. + std::vector node_ids; + node_ids.reserve(3000); + for (uint64_t i = 0; i < 3000; ++i) { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + node_ids.push_back(res.value()); + } + + size_t pool_idx = 0; + auto sampled = run_sampled_benchmark( + 50, + 1000, + [&] { + REQUIRE(pool_idx < node_ids.size()); + bool ok = graph->delete_node(node_ids[pool_idx++]); + REQUIRE(ok); + }, + [&] { fixture.process_events(1); }, + 16); + + collector.record_latency_stats("node_delete", sampled.latency); + collector.record_throughput("node_delete", sampled.latency.count, sampled.wall_time); + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "node_delete_throughput"); +} + +TEST_CASE("Edge deletion throughput", "[THROUGHPUT][single]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("edge_delete_throughput"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + auto root = graph->get_node_root(); + REQUIRE(root.has_value()); + + // Pool is 3× the expected maximum (warmup + epochs) so nanobench auto-tuning + // cannot exhaust it; the REQUIRE fires loudly if it somehow does. + std::vector target_ids; + target_ids.reserve(3000); + for (uint64_t i = 0; i < 3000; ++i) { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + auto edge = GraphGenerator::create_test_edge( + root->id(), res.value(), graph->get_agent_id()); + REQUIRE(graph->insert_or_assign_edge(edge)); + target_ids.push_back(res.value()); + } + + size_t pool_idx = 0; + auto sampled = run_sampled_benchmark( + 50, + 1000, + [&] { + REQUIRE(pool_idx < target_ids.size()); + bool ok = graph->delete_edge(root->id(), target_ids[pool_idx++], "test_edge"); + REQUIRE(ok); + }, + [&] { fixture.process_events(1); }, + 8); + + collector.record_latency_stats("edge_delete", sampled.latency); + collector.record_throughput("edge_delete", sampled.latency.count, sampled.wall_time); + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "edge_delete_throughput"); +} + +// Catch2 BENCHMARK macros (microbenchmark mode, run with [!benchmark]) +TEST_CASE("Single agent operations (Catch2 BENCHMARK)", "[THROUGHPUT][single][!benchmark]") { + MultiAgentFixture fixture; + GraphGenerator generator; + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + BENCHMARK("Node insert") { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + return graph->insert_node(node); + }; + + auto read_node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto read_id_opt = graph->insert_node(read_node); + REQUIRE(read_id_opt.has_value()); + uint64_t read_id = read_id_opt.value(); + + BENCHMARK("Node read") { + return graph->get_node(read_id); + }; + + BENCHMARK("Node update") { + auto node = graph->get_node(read_id); + if (node) { + graph->add_or_modify_attrib_local(*node, 42); + return graph->update_node(*node); + } + return false; + }; +} diff --git a/benchmarks/throughput/single_agent_ops_with_latency_bench.cpp b/benchmarks/throughput/single_agent_ops_with_latency_bench.cpp new file mode 100644 index 0000000..3bde6d8 --- /dev/null +++ b/benchmarks/throughput/single_agent_ops_with_latency_bench.cpp @@ -0,0 +1,330 @@ +#include + +#include "../core/nanobench_adapter.h" +#include "../core/metrics_collector.h" +#include "../core/report_generator.h" +#include "../fixtures/multi_agent_fixture.h" +#include "../fixtures/graph_generator.h" + +using namespace DSR; +using namespace DSR::Benchmark; + +// Each TEST_CASE measures both latency and throughput simultaneously using +// nanobench. Steady-state read/update paths may raise minEpochIterations() to +// reduce timer noise. Destructive/stateful workloads use +// make_single_op_latency_bench() so graph growth does not drift across runs. +// Tags {"threads","1","graph_size","0"} mark these as the single-thread, +// empty-graph baseline for the Scalability tab. + +TEST_CASE("Node insert latency+throughput", "[THROUGHPUT][LATENCY][single]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("node_insert_lat_thr"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + uint64_t id_counter = 0; + auto sampled = run_sampled_benchmark( + 50, + 1000, + [&] { + auto node = GraphGenerator::create_test_node(id_counter++, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + }, + [&] { fixture.process_events(1); }, + 16); + + collector.record_latency_stats("node_insert", sampled.latency, + {{"threads", "1"}, {"graph_size", "0"}}); + collector.record_throughput("node_insert", sampled.latency.count, sampled.wall_time, + {{"threads", "1"}, {"graph_size", "0"}}); + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "node_insert_lat_thr"); +} + +TEST_CASE("Node read latency+throughput", "[THROUGHPUT][LATENCY][single][EXTENDED][.extended]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("node_read_lat_thr"); + collector.add_metadata("profile", "extended"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + // Pre-populate 1000 nodes for round-robin reads + std::vector node_ids; + node_ids.reserve(1000); + for (uint64_t i = 0; i < 1000; ++i) { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + node_ids.push_back(res.value()); + } + + // Cache warmup: touch every node once so the read loop is warm + for (auto id : node_ids) { (void)graph->get_node(id); } + + // ~900 ns/op: 10 000 iters/epoch × 200 epochs ≈ 1.8 s + size_t idx = 0; + bool last_ok = true; + auto bench = make_latency_bench(200, 0); // manual warmup done above + bench.minEpochIterations(10000); + bench.run("node_read", [&] { + auto node = graph->get_node(node_ids[idx++ % node_ids.size()]); + last_ok = node.has_value(); + ankerl::nanobench::doNotOptimizeAway(node); + }); + REQUIRE(last_ok); + + auto stats = nb_to_stats(bench); + collector.record_latency_stats("node_read", stats, + {{"threads", "1"}, {"graph_size", "0"}}); + collector.record("node_read", MetricCategory::Throughput, + nb_throughput(bench), "ops/sec", + {{"threads", "1"}, {"graph_size", "0"}}); + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "node_read_lat_thr"); +} + +TEST_CASE("Node update latency+throughput", "[THROUGHPUT][LATENCY][single][EXTENDED][.extended]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("node_update_lat_thr"); + collector.add_metadata("profile", "extended"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + auto test_node = GraphGenerator::create_test_node(0, graph->get_agent_id(), "update_test"); + auto insert_result = graph->insert_node(test_node); + REQUIRE(insert_result.has_value()); + uint64_t node_id = insert_result.value(); + + // ~38µs/op: 300 iters/epoch × 50 epochs ≈ 0.57 s + uint64_t update_counter = 0; + auto bench = make_latency_bench(50); + bench.minEpochIterations(300); + bench.run("node_update", [&] { + auto node = graph->get_node(node_id); + REQUIRE(node.has_value()); + graph->add_or_modify_attrib_local( + *node, static_cast(update_counter++ % 1000)); + bool ok = graph->update_node(*node); + REQUIRE(ok); + ankerl::nanobench::doNotOptimizeAway(ok); + }); + + auto stats = nb_to_stats(bench); + collector.record_latency_stats("node_update", stats, + {{"threads", "1"}, {"graph_size", "0"}}); + collector.record("node_update", MetricCategory::Throughput, + nb_throughput(bench), "ops/sec", + {{"threads", "1"}, {"graph_size", "0"}}); + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "node_update_lat_thr"); +} + +TEST_CASE("Edge insert latency+throughput", "[THROUGHPUT][LATENCY][single]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("edge_insert_lat_thr"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + auto root = graph->get_node_root(); + REQUIRE(root.has_value()); + + // Pre-populate target node pool + std::vector target_ids; + target_ids.reserve(10000); + for (uint64_t i = 0; i < 10000; ++i) { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + target_ids.push_back(res.value()); + } + + size_t idx = 0; + auto sampled = run_sampled_benchmark( + 50, + 1000, + [&] { + uint64_t target = target_ids[idx++ % target_ids.size()]; + auto edge = GraphGenerator::create_test_edge(root->id(), target, graph->get_agent_id()); + bool ok = graph->insert_or_assign_edge(edge); + REQUIRE(ok); + }, + [&] { fixture.process_events(1); }, + 8); + + collector.record_latency_stats("edge_insert", sampled.latency, + {{"threads", "1"}, {"graph_size", "0"}}); + collector.record_throughput("edge_insert", sampled.latency.count, sampled.wall_time, + {{"threads", "1"}, {"graph_size", "0"}}); + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "edge_insert_lat_thr"); +} + +TEST_CASE("Edge read latency+throughput", "[THROUGHPUT][LATENCY][single][EXTENDED][.extended]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("edge_read_lat_thr"); + collector.add_metadata("profile", "extended"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + auto root = graph->get_node_root(); + REQUIRE(root.has_value()); + + // Pre-populate 1000 nodes + edges + std::vector target_ids; + target_ids.reserve(1000); + for (uint64_t i = 0; i < 1000; ++i) { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + target_ids.push_back(res.value()); + auto edge = GraphGenerator::create_test_edge( + root->id(), res.value(), graph->get_agent_id()); + REQUIRE(graph->insert_or_assign_edge(edge)); + } + + // Cache warmup + for (auto tid : target_ids) { (void)graph->get_edge(root->id(), tid, "test_edge"); } + + size_t idx = 0; + bool last_ok = true; + auto bench = make_latency_bench(1000, 0); // manual warmup done above + bench.run("edge_read", [&] { + uint64_t target = target_ids[idx++ % target_ids.size()]; + auto edge = graph->get_edge(root->id(), target, "test_edge"); + last_ok = edge.has_value(); + ankerl::nanobench::doNotOptimizeAway(edge); + }); + REQUIRE(last_ok); + + auto stats = nb_to_stats(bench); + collector.record_latency_stats("edge_read", stats, + {{"threads", "1"}, {"graph_size", "0"}}); + collector.record("edge_read", MetricCategory::Throughput, + nb_throughput(bench), "ops/sec", + {{"threads", "1"}, {"graph_size", "0"}}); + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "edge_read_lat_thr"); +} + +TEST_CASE("Node delete latency+throughput", "[THROUGHPUT][LATENCY][single]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("node_delete_lat_thr"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + // Pool is 3× the expected maximum (warmup + epochs) so nanobench auto-tuning + // cannot exhaust it; the REQUIRE fires loudly if it somehow does. + std::vector node_ids; + node_ids.reserve(3000); + for (uint64_t i = 0; i < 3000; ++i) { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + node_ids.push_back(res.value()); + } + + size_t pool_idx = 0; + auto sampled = run_sampled_benchmark( + 50, + 1000, + [&] { + REQUIRE(pool_idx < node_ids.size()); + bool ok = graph->delete_node(node_ids[pool_idx++]); + REQUIRE(ok); + }, + [&] { fixture.process_events(1); }, + 16); + + collector.record_latency_stats("node_delete", sampled.latency, + {{"threads", "1"}, {"graph_size", "0"}}); + collector.record_throughput("node_delete", sampled.latency.count, sampled.wall_time, + {{"threads", "1"}, {"graph_size", "0"}}); + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "node_delete_lat_thr"); +} + +TEST_CASE("Edge delete latency+throughput", "[THROUGHPUT][LATENCY][single]") { + MultiAgentFixture fixture; + GraphGenerator generator; + MetricsCollector collector("edge_delete_lat_thr"); + + auto config_file = generator.generate_empty_graph(); + REQUIRE(fixture.create_agents(1, config_file)); + auto* graph = fixture.get_agent(0); + REQUIRE(graph != nullptr); + + auto root = graph->get_node_root(); + REQUIRE(root.has_value()); + + // Pool is 3× the expected maximum (warmup + epochs) so nanobench auto-tuning + // cannot exhaust it; the REQUIRE fires loudly if it somehow does. + std::vector target_ids; + target_ids.reserve(3000); + for (uint64_t i = 0; i < 3000; ++i) { + auto node = GraphGenerator::create_test_node(0, graph->get_agent_id()); + auto res = graph->insert_node(node); + REQUIRE(res.has_value()); + auto edge = GraphGenerator::create_test_edge( + root->id(), res.value(), graph->get_agent_id()); + REQUIRE(graph->insert_or_assign_edge(edge)); + target_ids.push_back(res.value()); + } + + size_t pool_idx = 0; + auto sampled = run_sampled_benchmark( + 50, + 1000, + [&] { + REQUIRE(pool_idx < target_ids.size()); + bool ok = graph->delete_edge(root->id(), target_ids[pool_idx++], "test_edge"); + REQUIRE(ok); + }, + [&] { fixture.process_events(1); }, + 8); + + collector.record_latency_stats("edge_delete", sampled.latency, + {{"threads", "1"}, {"graph_size", "0"}}); + collector.record_throughput("edge_delete", sampled.latency.count, sampled.wall_time, + {{"threads", "1"}, {"graph_size", "0"}}); + + auto result = collector.finalize(); + ReportGenerator reporter("results"); + reporter.export_all(result, "edge_delete_lat_thr"); +} diff --git a/core/include/dsr/core/rtps/dsrparticipant.h b/core/include/dsr/core/rtps/dsrparticipant.h index 662897a..e3e59d2 100644 --- a/core/include/dsr/core/rtps/dsrparticipant.h +++ b/core/include/dsr/core/rtps/dsrparticipant.h @@ -17,6 +17,7 @@ class DSRParticipant DSRParticipant(); virtual ~DSRParticipant(); [[nodiscard]] std::tuple init(uint32_t agent_id, const std::string& agent_name, int localhost, std::function fn, int8_t domain_id=0); + [[nodiscard]] int8_t get_domain_id() const { return domain_id_; } [[nodiscard]] const eprosima::fastdds::rtps::GUID_t& getID() const; [[nodiscard]] const char *getNodeTopicName() const { return dsrgraphType->get_name().data();} [[nodiscard]] const char *getRequestTopicName() const { return graphrequestType->get_name().data();} @@ -41,6 +42,7 @@ class DSRParticipant void remove_participant_and_entities(); private: + int8_t domain_id_ {0}; eprosima::fastdds::dds::DomainParticipant* mp_participant{}; eprosima::fastdds::dds::Topic* topic_node{}; diff --git a/core/include/dsr/core/rtps/dsrpublisher.h b/core/include/dsr/core/rtps/dsrpublisher.h index a55f000..65d8707 100644 --- a/core/include/dsr/core/rtps/dsrpublisher.h +++ b/core/include/dsr/core/rtps/dsrpublisher.h @@ -16,7 +16,11 @@ class DSRPublisher public: DSRPublisher(); virtual ~DSRPublisher(); - [[nodiscard]] std::tuple init(eprosima::fastdds::dds::DomainParticipant *mp_participant_, eprosima::fastdds::dds::Topic *topic, bool isStreamData = false); + [[nodiscard]] std::tuple init( + eprosima::fastdds::dds::DomainParticipant *mp_participant_, + eprosima::fastdds::dds::Topic *topic, + int8_t domain_id, + bool isStreamData = false); [[nodiscard]] eprosima::fastdds::rtps::GUID_t getParticipantID() const; bool write(IDL::GraphRequest *object); bool write(IDL::MvregNode *object); diff --git a/core/include/dsr/core/rtps/dsrsubscriber.h b/core/include/dsr/core/rtps/dsrsubscriber.h index d51f317..bedd98b 100644 --- a/core/include/dsr/core/rtps/dsrsubscriber.h +++ b/core/include/dsr/core/rtps/dsrsubscriber.h @@ -16,6 +16,7 @@ class DSRSubscriber [[nodiscard]] std::tuple init(eprosima::fastdds::dds::DomainParticipant *mp_participant_, eprosima::fastdds::dds::Topic *topic, + int8_t domain_id, const std::function& f_, std::mutex& mtx, bool isStreamData = false); diff --git a/core/rtps/dsrparticipant.cpp b/core/rtps/dsrparticipant.cpp index 02026dd..2139130 100644 --- a/core/rtps/dsrparticipant.cpp +++ b/core/rtps/dsrparticipant.cpp @@ -10,6 +10,23 @@ using namespace eprosima::fastdds::dds; using namespace eprosima::fastdds::rtps; +namespace { +std::vector host_ipv4_interfaces() +{ + std::vector ips{"127.0.0.1"}; + std::vector found; + IPFinder::getIPs(&found, false); + for (const auto& ip : found) { + if (ip.type == IPFinder::IP4) { + if (std::find(ips.begin(), ips.end(), ip.name) == ips.end()) { + ips.push_back(ip.name); + } + } + } + return ips; +} +} + DSRParticipant::DSRParticipant() : mp_participant(nullptr), dsrgraphType(new MvregNodePubSubType()), graphrequestType(new GraphRequestPubSubType()), @@ -32,6 +49,7 @@ DSRParticipant::~DSRParticipant() std::tuple DSRParticipant::init(uint32_t agent_id, const std::string& agent_name, int localhost, std::function fn, int8_t domain_id) { + domain_id_ = domain_id; // Create RTPSParticipant DomainParticipantQos PParam; PParam.name(("Participant_" + std::to_string(agent_id)+ " ( " + agent_name + " )").data() ); @@ -40,32 +58,24 @@ std::tuple DSRParticipant::ini //Disable the built-in Transport Layer. PParam.transport().use_builtin_transports = false; - //Create a descriptor for the new transport. - auto custom_transport = std::make_shared(); - //auto custom_transport = std::make_shared(); - //custom_transport->sendBufferSize = 33554432; // commented it will use the OS default - //custom_transport->receiveBufferSize = 33554432; // commented it will use the OS default - custom_transport->maxMessageSize = 65000; - - PParam.transport().user_transports.push_back(custom_transport); - - - custom_transport->interface_allowlist.emplace_back("127.0.0.1"); - - /*if (not localhost) - { - - - std::vector ips; - eprosima::fastdds::rtps::IPFinder::getIPs(&ips, false); - - for (auto &ip : ips) { - if (ip.type == eprosima::fastdds::rtps::IPFinder::IP4 ) { - //custom_transport->interfaceWhiteList.emplace_back(ip.name); - } + if (localhost) { + // Same-host deployments should prefer shared memory. Keep loopback UDP + // as a discovery/data fallback for environments where SHM is limited. + auto shm_transport = std::make_shared(); + PParam.transport().user_transports.push_back(shm_transport); + + auto udp_transport = std::make_shared(); + udp_transport->maxMessageSize = 65000; + udp_transport->interface_allowlist.emplace_back("127.0.0.1"); + PParam.transport().user_transports.push_back(udp_transport); + } else { + auto udp_transport = std::make_shared(); + udp_transport->maxMessageSize = 65000; + for (const auto& ip : host_ipv4_interfaces()) { + udp_transport->interface_allowlist.emplace_back(ip); } - - }*/ + PParam.transport().user_transports.push_back(udp_transport); + } PParam.transport().send_socket_buffer_size = 33554432; PParam.transport().listen_socket_buffer_size = 33554432; diff --git a/core/rtps/dsrpublisher.cpp b/core/rtps/dsrpublisher.cpp index 04d3208..ea13b5d 100644 --- a/core/rtps/dsrpublisher.cpp +++ b/core/rtps/dsrpublisher.cpp @@ -15,6 +15,19 @@ using namespace eprosima::fastdds; using namespace eprosima::fastdds::rtps; using namespace eprosima::fastdds::dds; +namespace { +Locator_t domain_multicast_locator(int8_t domain_id) +{ + const auto domain = static_cast(domain_id); + Locator_t locator; + locator.port = 7900; + locator.kind = LOCATOR_KIND_UDPv4; + IPLocator::setIPv4(locator, + ("239.255." + std::to_string(domain / 250) + "." + std::to_string(1 + (domain % 250))).c_str()); + return locator; +} +} + DSRPublisher::DSRPublisher() : mp_participant(nullptr), mp_publisher(nullptr), mp_writer(nullptr) {} @@ -23,7 +36,7 @@ DSRPublisher::~DSRPublisher() } std::tuple - DSRPublisher::init(eprosima::fastdds::dds::DomainParticipant *mp_participant_, eprosima::fastdds::dds::Topic *topic, bool isStreamData ) + DSRPublisher::init(eprosima::fastdds::dds::DomainParticipant *mp_participant_, eprosima::fastdds::dds::Topic *topic, int8_t domain_id, bool isStreamData ) { mp_participant = mp_participant_; @@ -46,12 +59,7 @@ std::tuple(domain_id); + Locator_t locator; + locator.port = 7900; + locator.kind = LOCATOR_KIND_UDPv4; + IPLocator::setIPv4(locator, + ("239.255." + std::to_string(domain / 250) + "." + std::to_string(1 + (domain % 250))).c_str()); + return locator; +} +} + DSRSubscriber::DSRSubscriber() : mp_participant(nullptr), mp_subscriber(nullptr), mp_reader(nullptr) {} DSRSubscriber::~DSRSubscriber() @@ -22,6 +35,7 @@ DSRSubscriber::~DSRSubscriber() std::tuple DSRSubscriber::init(eprosima::fastdds::dds::DomainParticipant *mp_participant_, eprosima::fastdds::dds::Topic *topic, + int8_t domain_id, const std::function& f_, std::mutex& mtx, bool isStreamData) @@ -52,11 +66,7 @@ std::tupleget_qos().transport().user_transports.end(); if (not local) { - Locator_t locator; - locator.port = 7900; - locator.kind = LOCATOR_KIND_UDPv4; - IPLocator::setIPv4(locator, "239.255.1.33"); - dataReaderQos.endpoint().multicast_locator_list.push_back(locator); + dataReaderQos.endpoint().multicast_locator_list.push_back(domain_multicast_locator(domain_id)); } //Check latency @@ -121,4 +131,3 @@ void DSRSubscriber::SubListener::on_data_available(eprosima::fastdds::dds::DataR { f(sub); } - diff --git a/tests/graph/edge_operations.cpp b/tests/graph/edge_operations.cpp index d983537..61da995 100644 --- a/tests/graph/edge_operations.cpp +++ b/tests/graph/edge_operations.cpp @@ -130,6 +130,33 @@ auto n = Node::create(); REQUIRE_FALSE(r); } + SECTION("Deleting one edge type between a pair leaves other types intact") { + auto n1 = Node::create(); + auto id1 = G.insert_node(n1); + REQUIRE(id1.has_value()); + + auto n2 = Node::create(); + auto id2 = G.insert_node(n2); + REQUIRE(id2.has_value()); + + // Two different edge types between the same node pair + auto e_in = Edge::create(*id1, *id2); + auto e_knows = Edge::create(*id1, *id2); + REQUIRE(G.insert_or_assign_edge(e_in)); + REQUIRE(G.insert_or_assign_edge(e_knows)); + + // Delete only the "in" edge + REQUIRE(G.delete_edge(*id1, *id2, std::string(in_edge_type::attr_name))); + + // "in" must be gone + REQUIRE_FALSE(G.get_edge(*id1, *id2, std::string(in_edge_type::attr_name)).has_value()); + + // "knows" must still be visible + REQUIRE(G.get_edge(*id1, *id2, std::string(knows_edge_type::attr_name)).has_value()); + auto remaining = G.get_edges_by_type(std::string(knows_edge_type::attr_name)); + REQUIRE(remaining.size() == 1); + } + } diff --git a/tests/graph/node_operations.cpp b/tests/graph/node_operations.cpp index 7d3b7b6..0edec07 100644 --- a/tests/graph/node_operations.cpp +++ b/tests/graph/node_operations.cpp @@ -6,6 +6,7 @@ #include "catch2/catch_test_macros.hpp" #include "dsr/core/types/user_types.h" +#include "dsr/core/types/type_checking/dsr_edge_type.h" #include "dsr/api/dsr_api.h" #include "../utils.h" @@ -120,6 +121,28 @@ TEST_CASE("Graph node operations", "[NODE]") { REQUIRE_FALSE(r); } + SECTION("Deleting a node removes incoming edges from other nodes") { + auto n1 = Node::create(); + auto id1 = G.insert_node(n1); + REQUIRE(id1.has_value()); + + auto n2 = Node::create(); + auto id2 = G.insert_node(n2); + REQUIRE(id2.has_value()); + + // Create edge n1 -> n2 + auto e = Edge::create(*id1, *id2); + REQUIRE(G.insert_or_assign_edge(e)); + REQUIRE(G.get_edge(*id1, *id2, std::string(in_edge_type::attr_name)).has_value()); + + // Deleting n2 must also remove the incoming edge from n1 + REQUIRE(G.delete_node(*id2)); + REQUIRE_FALSE(G.get_edge(*id1, *id2, std::string(in_edge_type::attr_name)).has_value()); + + // n1 should still exist + REQUIRE(G.get_node(*id1).has_value()); + } + SECTION("Create a node with an user defined name") { auto name = random_string(); Node n; diff --git a/tests/synchronization/graph_synchronization.cpp b/tests/synchronization/graph_synchronization.cpp index 76b39df..c3fba1b 100644 --- a/tests/synchronization/graph_synchronization.cpp +++ b/tests/synchronization/graph_synchronization.cpp @@ -1,10 +1,6 @@ // // Created by jc on 5/11/24. // - - - - #include "dsr/api/dsr_api.h" #include "../utils.h" #include @@ -12,9 +8,28 @@ #include "catch2/catch_test_macros.hpp" #include "catch2/generators/catch_generators.hpp" +#include "dsr/core/topics/IDLGraph.hpp" + using namespace DSR; using namespace std::chrono_literals; +namespace DSR +{ +class DSRGraphTestAccess +{ +public: + static std::map Map(DSRGraph& graph) + { + return graph.Map(); + } + + static void join_full_graph(DSRGraph& graph, IDL::OrMap&& full_graph) + { + graph.join_full_graph(std::move(full_graph)); + } +}; +} + TEST_CASE("Connect and receive the graph from other agent", "[SYNCHRONIZATION][GRAPH]"){ @@ -27,4 +42,83 @@ TEST_CASE("Connect and receive the graph from other agent", "[SYNCHRONIZATION][G std::this_thread::sleep_for(200ms); REQUIRE(G2.size() == G.size()); -} \ No newline at end of file +} + +TEST_CASE("Same-process agents discover each other and exchange updates", "[SYNCHRONIZATION][GRAPH][REGRESSION][DDS]") +{ + const auto same_host = GENERATE(true, false); + auto ctx = make_edge_config_file(); + auto id1 = static_cast(rand() % 1000 + 1000); + auto id2 = id1 + 1; + + DSRGraph loader(random_string(10), id1, ctx, same_host); + DSRGraph follower(random_string(11), id2, std::string{}, same_host); + + auto wait_until = [](auto&& predicate, std::chrono::milliseconds timeout = 2000ms) + { + const auto deadline = std::chrono::steady_clock::now() + timeout; + while (std::chrono::steady_clock::now() < deadline) + { + if (predicate()) + return true; + std::this_thread::sleep_for(50ms); + } + return predicate(); + }; + + REQUIRE(wait_until([&] { return follower.size() == loader.size(); })); + REQUIRE(wait_until([&] { return !loader.get_connected_agents().empty(); })); + REQUIRE(wait_until([&] { return !follower.get_connected_agents().empty(); })); + + auto root_loader = loader.get_node("root"); + REQUIRE(root_loader.has_value()); + root_loader->attrs()["same_process_loader_" + std::to_string(same_host)] = + Attribute(std::string("loader"), get_unix_timestamp(), loader.get_agent_id()); + REQUIRE(loader.update_node(root_loader.value())); + + REQUIRE(wait_until([&] { + auto root_follower = follower.get_node("root"); + return root_follower.has_value() && + root_follower->attrs().contains("same_process_loader_" + std::to_string(same_host)); + })); + + auto root_follower = follower.get_node("root"); + REQUIRE(root_follower.has_value()); + root_follower->attrs()["same_process_follower_" + std::to_string(same_host)] = + Attribute(std::string("follower"), get_unix_timestamp(), follower.get_agent_id()); + REQUIRE(follower.update_node(root_follower.value())); + + REQUIRE(wait_until([&] { + auto updated_root_loader = loader.get_node("root"); + return updated_root_loader.has_value() && + updated_root_loader->attrs().contains("same_process_follower_" + std::to_string(same_host)); + })); +} + +TEST_CASE("Full graph join does not leave empty node registers after local deletion", "[SYNCHRONIZATION][GRAPH][REGRESSION]") +{ + auto ctx = make_empty_config_file(); + DSRGraph graph(random_string(10), static_cast(rand() % 4000), ctx); + const auto initial_size = graph.size(); + + auto node = Node::create("regression_node"); + node.id(1000); + node.agent_id(graph.get_agent_id()); + + REQUIRE(graph.insert_node_with_id(node).has_value()); + REQUIRE(graph.size() == initial_size + 1); + + IDL::OrMap full_graph; + full_graph.id(graph.get_agent_id()); + full_graph.to_id(graph.get_agent_id()); + full_graph.m(DSRGraphTestAccess::Map(graph)); + + REQUIRE(graph.delete_node(node.id())); + REQUIRE(graph.size() == initial_size); + REQUIRE_FALSE(graph.get_node(node.id()).has_value()); + + DSRGraphTestAccess::join_full_graph(graph, std::move(full_graph)); + + REQUIRE(graph.size() == initial_size); + REQUIRE_FALSE(graph.get_node(node.id()).has_value()); +} diff --git a/tools/same_host_smoke/agent_worker.py b/tools/same_host_smoke/agent_worker.py new file mode 100644 index 0000000..cc8953d --- /dev/null +++ b/tools/same_host_smoke/agent_worker.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +import argparse +import json +import os +import sys +import time +from pathlib import Path + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Run one DSR agent worker process") + parser.add_argument("--agent-name", required=True) + parser.add_argument("--agent-id", required=True, type=int) + parser.add_argument("--domain-id", required=True, type=int) + parser.add_argument("--same-host", required=True, choices=("true", "false")) + parser.add_argument("--graph-file", default="") + parser.add_argument("--artifacts-dir", required=True) + parser.add_argument("--local-attr", required=True) + parser.add_argument("--local-value", required=True) + parser.add_argument("--remote-attr", required=True) + parser.add_argument("--remote-value", required=True) + parser.add_argument("--startup-delay", default=0.0, type=float) + parser.add_argument("--sync-timeout", default=30.0, type=float) + parser.add_argument("--hold-seconds", default=0.0, type=float) + return parser.parse_args() + + +def wait_for(predicate, timeout_s: float, interval_s: float = 0.1, error: str = "timeout"): + deadline = time.monotonic() + timeout_s + while time.monotonic() < deadline: + value = predicate() + if value: + return value + time.sleep(interval_s) + raise TimeoutError(error) + + +def read_root_attr(graph, attr_name: str): + root = graph.get_node("root") + if root is None: + return None + if attr_name not in root.attrs: + return None + return root.attrs[attr_name].value + + +def main() -> int: + args = parse_args() + artifacts_dir = Path(args.artifacts_dir) + artifacts_dir.mkdir(parents=True, exist_ok=True) + result_path = artifacts_dir / f"{args.agent_name}.json" + + build_python_wrapper = Path(__file__).resolve().parents[2] / "build" / "python-wrapper" + sys.path.insert(0, str(build_python_wrapper)) + + import pydsr + + time.sleep(args.startup_delay) + + graph = pydsr.DSRGraph( + 0, + args.agent_name, + args.agent_id, + args.graph_file, + args.same_host == "true", + args.domain_id, + ) + + result = { + "agent_name": args.agent_name, + "agent_id": args.agent_id, + "domain_id": args.domain_id, + "same_host": args.same_host == "true", + "graph_file_loaded": bool(args.graph_file), + } + + try: + initial_nodes = wait_for( + lambda: len(graph.get_nodes()) if graph.get_node("root") is not None else 0, + timeout_s=args.sync_timeout, + error="graph root never became available", + ) + result["initial_node_count"] = initial_nodes + + root = wait_for( + lambda: graph.get_node("root"), + timeout_s=args.sync_timeout, + error="root node not available", + ) + root.attrs[args.local_attr] = pydsr.Attribute(args.local_value) + update_ok = graph.update_node(root) + if not update_ok: + raise RuntimeError(f"failed to update root with {args.local_attr}") + + observed_remote = wait_for( + lambda: read_root_attr(graph, args.remote_attr), + timeout_s=args.sync_timeout, + error=f"remote attribute {args.remote_attr} not observed", + ) + if observed_remote != args.remote_value: + raise RuntimeError( + f"unexpected value for {args.remote_attr}: {observed_remote!r} != {args.remote_value!r}" + ) + + final_root = graph.get_node("root") + result["final_node_count"] = len(graph.get_nodes()) + result["local_attr_value"] = final_root.attrs[args.local_attr].value + result["remote_attr_value"] = final_root.attrs[args.remote_attr].value + if args.hold_seconds > 0: + time.sleep(args.hold_seconds) + result["status"] = "ok" + except Exception as exc: + result["status"] = "error" + result["error"] = str(exc) + finally: + result_path.write_text(json.dumps(result, indent=2), encoding="utf-8") + + return 0 if result["status"] == "ok" else 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tools/same_host_smoke/run_same_host_smoke.sh b/tools/same_host_smoke/run_same_host_smoke.sh new file mode 100644 index 0000000..5a10da4 --- /dev/null +++ b/tools/same_host_smoke/run_same_host_smoke.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +ARTIFACT_ROOT="${ROOT_DIR}/.artifacts/same_host_smoke" +GRAPH_FILE="${ROOT_DIR}/python-wrapper/etc/autonomyLab_objects.simscene.json" +WORKER="${ROOT_DIR}/tools/same_host_smoke/agent_worker.py" + +export PYTHONPATH="${ROOT_DIR}/build/python-wrapper${PYTHONPATH:+:${PYTHONPATH}}" +export LD_LIBRARY_PATH="${ROOT_DIR}/build/api:${ROOT_DIR}/build/core${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" + +mkdir -p "${ARTIFACT_ROOT}" + +run_case() { + local same_host="$1" + local domain_id="$2" + local case_dir="${ARTIFACT_ROOT}/same_host_${same_host}" + + rm -rf "${case_dir}" + mkdir -p "${case_dir}" + + python3 "${WORKER}" \ + --agent-name "same_host_${same_host}_loader" \ + --agent-id $((domain_id * 10 + 1)) \ + --domain-id "${domain_id}" \ + --same-host "${same_host}" \ + --graph-file "${GRAPH_FILE}" \ + --artifacts-dir "${case_dir}" \ + --local-attr "sync_from_loader_${same_host}" \ + --local-value "loader_${same_host}" \ + --remote-attr "sync_from_follower_${same_host}" \ + --remote-value "follower_${same_host}" \ + > "${case_dir}/loader.log" 2>&1 & + local pid_a=$! + + python3 "${WORKER}" \ + --agent-name "same_host_${same_host}_follower" \ + --agent-id $((domain_id * 10 + 2)) \ + --domain-id "${domain_id}" \ + --same-host "${same_host}" \ + --artifacts-dir "${case_dir}" \ + --local-attr "sync_from_follower_${same_host}" \ + --local-value "follower_${same_host}" \ + --remote-attr "sync_from_loader_${same_host}" \ + --remote-value "loader_${same_host}" \ + --startup-delay 1.0 \ + > "${case_dir}/follower.log" 2>&1 & + local pid_b=$! + + local rc=0 + wait "${pid_a}" || rc=1 + wait "${pid_b}" || rc=1 + + if [[ "${rc}" -ne 0 ]]; then + echo "Scenario same_host=${same_host} failed. See ${case_dir}" >&2 + return "${rc}" + fi + + python3 - "${case_dir}" "${same_host}" <<'PY' +import json +import sys +from pathlib import Path + +case_dir = Path(sys.argv[1]) +same_host = sys.argv[2] +loader = json.loads((case_dir / f"same_host_{same_host}_loader.json").read_text(encoding="utf-8")) +follower = json.loads((case_dir / f"same_host_{same_host}_follower.json").read_text(encoding="utf-8")) + +for result in (loader, follower): + if result["status"] != "ok": + raise SystemExit(f"{result['agent_name']} failed: {result.get('error', 'unknown error')}") + +if follower["initial_node_count"] <= 0: + raise SystemExit("Follower did not receive the initial graph") + +if loader["remote_attr_value"] != f"follower_{same_host}": + raise SystemExit("Loader did not observe follower mutation") + +if follower["remote_attr_value"] != f"loader_{same_host}": + raise SystemExit("Follower did not observe loader mutation") + +print(f"same_host={same_host}: PASS") +PY +} + +run_case true 41 +run_case false 42 + +echo "Artifacts written to ${ARTIFACT_ROOT}" diff --git a/tools/same_host_smoke/verify_transports.sh b/tools/same_host_smoke/verify_transports.sh new file mode 100644 index 0000000..d60dded --- /dev/null +++ b/tools/same_host_smoke/verify_transports.sh @@ -0,0 +1,148 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +ARTIFACT_ROOT="${ROOT_DIR}/.artifacts/same_host_transport" +GRAPH_FILE="${ROOT_DIR}/python-wrapper/etc/autonomyLab_objects.simscene.json" +WORKER="${ROOT_DIR}/tools/same_host_smoke/agent_worker.py" + +export PYTHONPATH="${ROOT_DIR}/build/python-wrapper${PYTHONPATH:+:${PYTHONPATH}}" +export LD_LIBRARY_PATH="${ROOT_DIR}/build/api:${ROOT_DIR}/build/core${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" + +mkdir -p "${ARTIFACT_ROOT}" + +snapshot_case() { + local case_dir="$1" + local pid_a="$2" + local pid_b="$3" + + { + echo "loader_pid=${pid_a}" + echo "follower_pid=${pid_b}" + } > "${case_dir}/pids.txt" + + lsof -p "${pid_a}" > "${case_dir}/loader.lsof" || true + lsof -p "${pid_b}" > "${case_dir}/follower.lsof" || true + ss -uapn > "${case_dir}/ss.txt" || true + ip maddr show > "${case_dir}/ip_maddr.txt" || true +} + +verify_case() { + local same_host="$1" + local case_dir="${ARTIFACT_ROOT}/same_host_${same_host}" + local domain_id loader_id follower_id + + if [[ "${same_host}" == "true" ]]; then + domain_id=51 + loader_id=1501 + follower_id=1502 + else + domain_id=52 + loader_id=1511 + follower_id=1512 + fi + + mkdir -p "${case_dir}" + + python3 "${WORKER}" \ + --agent-name "transport_${same_host}_loader" \ + --agent-id "${loader_id}" \ + --domain-id "${domain_id}" \ + --same-host "${same_host}" \ + --graph-file "${GRAPH_FILE}" \ + --artifacts-dir "${case_dir}" \ + --local-attr "transport_loader_${same_host}" \ + --local-value "loader_${same_host}" \ + --remote-attr "transport_follower_${same_host}" \ + --remote-value "follower_${same_host}" \ + --hold-seconds 12 \ + > "${case_dir}/loader.log" 2>&1 & + local pid_a=$! + + python3 "${WORKER}" \ + --agent-name "transport_${same_host}_follower" \ + --agent-id "${follower_id}" \ + --domain-id "${domain_id}" \ + --same-host "${same_host}" \ + --artifacts-dir "${case_dir}" \ + --local-attr "transport_follower_${same_host}" \ + --local-value "follower_${same_host}" \ + --remote-attr "transport_loader_${same_host}" \ + --remote-value "loader_${same_host}" \ + --startup-delay 1 \ + --hold-seconds 12 \ + > "${case_dir}/follower.log" 2>&1 & + local pid_b=$! + + sleep 4 + snapshot_case "${case_dir}" "${pid_a}" "${pid_b}" + + wait "${pid_a}" + wait "${pid_b}" + + python3 - "${case_dir}" "${same_host}" <<'PY' +import json +import sys +from pathlib import Path + +case_dir = Path(sys.argv[1]) +same_host = sys.argv[2] + +loader = json.loads(next(case_dir.glob("*loader.json")).read_text(encoding="utf-8")) +follower = json.loads(next(case_dir.glob("*follower.json")).read_text(encoding="utf-8")) +def read_if_exists(path: Path) -> str: + return path.read_text(encoding="utf-8", errors="ignore") if path.exists() else "" + +lsof_loader = read_if_exists(case_dir / "loader.lsof") +lsof_follower = read_if_exists(case_dir / "follower.lsof") +ss_txt = read_if_exists(case_dir / "ss.txt") +ip_maddr = read_if_exists(case_dir / "ip_maddr.txt") + +for result in (loader, follower): + if result["status"] != "ok": + raise SystemExit(f"{result['agent_name']} failed: {result.get('error', 'unknown error')}") + +combined_lsof = lsof_loader + "\n" + lsof_follower + +evidence_lines = [] +for line in combined_lsof.splitlines(): + if "/dev/shm/fastdds_" in line or "239.255." in line: + evidence_lines.append(line.strip()) + +uses_multicast = any(marker in (combined_lsof + "\n" + ss_txt + "\n" + ip_maddr) for marker in ( + "239.255.0.1", + "239.255.0.53", +)) +uses_shm = "/dev/shm" in combined_lsof + +summary = { + "same_host": same_host == "true", + "uses_multicast": uses_multicast, + "uses_shm": uses_shm, + "evidence_lines": evidence_lines[:12], +} +(case_dir / "transport_summary.json").write_text(json.dumps(summary, indent=2), encoding="utf-8") + +if same_host == "true" and not uses_shm: + raise SystemExit("shared-memory evidence not found for same_host=true") + +# Multicast is the discovery mechanism for cross-host (same_host=false). +# For same_host=true, DSR uses SHM + loopback-UDP unicast — no multicast +# group is joined, so absence of 239.255.x.x evidence is expected and correct. +if same_host == "false" and not uses_multicast: + raise SystemExit("multicast evidence not found for same_host=false") + +if same_host == "false" and uses_shm: + raise SystemExit("unexpected shared-memory evidence found for same_host=false") + +print(json.dumps(summary)) +print("evidence:") +for line in summary["evidence_lines"]: + print(f" {line}") +PY +} + +verify_case true +verify_case false + +echo "Transport artifacts written to ${ARTIFACT_ROOT}"