diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index aecbe84..638b840 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -16,6 +16,6 @@ -- [ ] All CI checks pass +- [ ] Pre-commit checks pass - [ ] New/updated tests cover the changes - [ ] Tested locally with `conan create .` diff --git a/benchmarks/armv8-Macos-clang-20.txt b/benchmarks/armv8-Macos-clang-20.txt index 7c63fb8..34343d7 100644 --- a/benchmarks/armv8-Macos-clang-20.txt +++ b/benchmarks/armv8-Macos-clang-20.txt @@ -1,23 +1,23 @@ -Unable to determine clock rate from sysctl: hw.cpufrequency: No such file or directory +nable to determine clock rate from sysctl: hw.cpufrequency: No such file or directory This does not affect benchmark measurements, only the metadata output. ***WARNING*** Failed to set thread affinity. Estimated CPU frequency may be incorrect. -2026-04-19T20:39:17-07:00 +2026-04-20T15:44:47-07:00 Running ./build/armv8-Macos-clang-20/Release/benchmark Run on (10 X 24 MHz CPU s) CPU Caches: L1 Data 64 KiB L1 Instruction 128 KiB L2 Unified 4096 KiB (x10) -Load Average: 2.28, 2.58, 2.87 +Load Average: 3.26, 2.57, 2.57 ---------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------- -bm_function_pointer_call 2.18 ns 2.18 ns 321788778 -bm_virtual_call 2.19 ns 2.19 ns 320281115 -bm_virtual_call_variant 3.11 ns 3.10 ns 225398553 -bm_future_sync_return 4.11 ns 4.07 ns 172938999 -bm_future_coroutine 26.5 ns 26.5 ns 26412802 -bm_future_sync_await 19.8 ns 19.7 ns 35554653 -bm_future_mixed 11.0 ns 11.0 ns 63749374 -bm_future_void_coroutine 28.1 ns 28.1 ns 24933659 -bm_future_void_coroutine_context_resume 26.9 ns 26.9 ns 26055431 +bm_function_pointer_call 2.19 ns 2.19 ns 318488725 +bm_virtual_call 2.20 ns 2.20 ns 310187442 +bm_virtual_call_variant 3.11 ns 3.11 ns 225400005 +bm_future_sync_return 4.04 ns 4.04 ns 172996698 +bm_future_coroutine 26.1 ns 26.1 ns 26803184 +bm_future_sync_await 17.9 ns 17.9 ns 38689637 +bm_future_mixed 9.63 ns 9.62 ns 72107708 +bm_future_void_coroutine 26.1 ns 26.1 ns 26864904 +bm_future_void_coroutine_context_resume 26.2 ns 26.2 ns 26681913 diff --git a/modules/coroutine.cppm b/modules/coroutine.cppm index a453f6a..e4be93a 100644 --- a/modules/coroutine.cppm +++ b/modules/coroutine.cppm @@ -399,8 +399,13 @@ public: constexpr void initialize_stack_memory(std::span p_stack_memory) { cancel(); - m_stack = p_stack_memory; - m_stack_pointer = m_stack.data(); + + // NOTE: subtract 1 because we use the end of the stack for holding the + // length of the stack. + auto const capacity = p_stack_memory.size() - 1uz; + p_stack_memory.back() = capacity; + m_stack_pointer = &p_stack_memory.front(); + m_stack_end = &p_stack_memory.back(); } /** @@ -648,19 +653,6 @@ public: } } - /** - * @brief Get the amount of stack memory used by active coroutines - * - * This method returns how much stack space has been consumed by currently - * active coroutines. - * - * @return The number of `stack_word` sized words used in the stack - */ - [[nodiscard]] constexpr auto memory_used() const noexcept - { - return m_stack_pointer - m_stack.data(); - } - /** * @brief Get the total capacity of the stack memory * @@ -671,7 +663,7 @@ public: */ [[nodiscard]] constexpr auto capacity() const noexcept { - return m_stack.size(); + return *m_stack_end; } /** @@ -684,7 +676,20 @@ public: */ [[nodiscard]] constexpr auto memory_remaining() const noexcept { - return capacity() - memory_used(); + return m_stack_end - m_stack_pointer; + } + + /** + * @brief Get the amount of stack memory used by active coroutines + * + * This method returns how much stack space has been consumed by currently + * active coroutines. + * + * @return The number of `stack_word` sized words used in the stack + */ + [[nodiscard]] constexpr auto memory_used() const noexcept + { + return capacity() - memory_remaining(); } /** @@ -838,7 +843,7 @@ private: size_t const words_to_allocate = 1uz + ((p_bytes + mask) >> shift); auto const new_stack_index = m_stack_pointer + words_to_allocate; - if (new_stack_index > &m_stack.back()) [[unlikely]] { + if (new_stack_index > m_stack_end) [[unlikely]] { throw bad_coroutine_alloc(this); } @@ -856,16 +861,16 @@ private: // A concern for this library is how large the context objet is thus the word // sizes for each field is denoted below. + //////////////////////////////////////////////////////--- // word 0 + blocked_by m_state = blocked_by::nothing; // 1B (u8) pad 4 + sleep_duration m_sleep_time = sleep_duration::zero(); // 4B (u32) std::coroutine_handle<> m_active_handle = noop_sentinel; // word 1 stack_word* m_stack_pointer = nullptr; // word 2 - std::span m_stack{}; // word 3-4 - context_listener* m_listener = nullptr; // word 5 - context* m_original = nullptr; // word 6 - context* m_awaited_context = nullptr; // word 7 - context* m_awaiting_caller = nullptr; // word 8 - // ---- Members below are below word length --- - sleep_duration m_sleep_time = sleep_duration::zero(); // 4B (uint32_t) - blocked_by m_state = blocked_by::nothing; // 1B (uint8_t) + stack_word* m_stack_end{}; // word 3 + context_listener* m_listener = nullptr; // word 4 + context* m_original = nullptr; // word 5 + context* m_awaited_context = nullptr; // word 6 + context* m_awaiting_caller = nullptr; // word 7 }; /** @@ -942,7 +947,7 @@ public: // Restore parent stack, by setting its range to be the start of its // stack and the end of our stack. - m_parent->m_stack = { m_parent->m_stack.begin(), m_stack.end() }; + m_parent->m_stack_end = m_stack_end; } private: @@ -966,13 +971,12 @@ private: // Our proxy will take control over the rest of the unused stack memory from // the above context. - auto remaining_words = p_parent.m_stack_pointer - p_parent.m_stack.data(); - m_stack = p_parent.m_stack.last(remaining_words); - m_stack_pointer = m_stack.data(); + m_stack_pointer = p_parent.m_stack_pointer; + m_stack_end = p_parent.m_stack_end; // Shrink the parent's stack to its current stack pointer, preventing it // from allocating again. - p_parent.m_stack = { p_parent.m_stack.data(), p_parent.m_stack_pointer }; + p_parent.m_stack_end = p_parent.m_stack_pointer; // If this is a proxy, take its pointer to the origin if (p_parent.is_proxy()) { @@ -1005,8 +1009,15 @@ public: "Stack memory must be greater than 0 words."); inplace_context() - : context(m_stack) + : context() { + // NOTE: Passing m_stack to context() in the initializer list would + // initialize the stack. But when inplace_context's constructor runs, it + // clears the memory of m_stack, which would overwrite the capacity value + // that initialize_stack_memory() writes into m_stack.back(). + // + // And thus the line below is load bearing. + initialize_stack_memory(m_stack); } inplace_context(inplace_context const&) = delete; diff --git a/tests/context_listener.test.cpp b/tests/context_listener.test.cpp index f086b5d..f0c5f9d 100644 --- a/tests/context_listener.test.cpp +++ b/tests/context_listener.test.cpp @@ -52,11 +52,11 @@ void context_listener_test() co_return; }; - // Exercise 1 + // Exercise auto future1 = coro(ctx1); auto future2 = coro(ctx2); - // Verify 1 + // Verify expect(that % not future1.done()); expect(that % not future2.done()); expect(that % async::blocked_by::nothing == ctx1.state()); @@ -65,11 +65,11 @@ void context_listener_test() expect(that % nullptr == listener_obj.sync_blocked); expect(that % nullptr == listener_obj.sync_blocker); - // Exercise 2 + // Exercise future1.resume(); // should acquire resource and get blocked by time. future2.resume(); // should block by sync - // Verify 2 + // Verify expect(that % async::blocked_by::time == ctx1.state()); expect(that % async::blocked_by::sync == ctx2.state()); expect(that % 1ms == ctx1.sleep_time()); @@ -78,11 +78,11 @@ void context_listener_test() expect(that % &ctx2 == listener_obj.sync_blocked); expect(that % &ctx1 == listener_obj.sync_blocker); - // Exercise 3 + // Exercise listener_obj.reset(); ctx1.unblock(); - // Verify 3 + // Verify expect(that % async::blocked_by::nothing == ctx1.state()); expect(that % async::blocked_by::sync == ctx2.state()); expect(that % &ctx1 == mutex.owner()); @@ -90,11 +90,11 @@ void context_listener_test() expect(that % nullptr == listener_obj.sync_blocked); expect(that % nullptr == listener_obj.sync_blocker); - // Exercise 4 + // Exercise listener_obj.reset(); ctx2.unblock(); - // Verify 4 + // Verify expect(that % async::blocked_by::nothing == ctx1.state()); expect(that % async::blocked_by::nothing == ctx2.state()); expect(that % &ctx1 == mutex.owner()); @@ -106,7 +106,7 @@ void context_listener_test() listener_obj.reset(); future2.resume(); - // Verify 4: ctx2 is re-blocked by sync because ctx1 still has the lock + // Verify: ctx2 is re-blocked by sync because ctx1 still has the lock expect(that % async::blocked_by::nothing == ctx1.state()); expect(that % async::blocked_by::sync == ctx2.state()); expect(that % &ctx1 == mutex.owner()); @@ -114,12 +114,12 @@ void context_listener_test() expect(that % &ctx2 == listener_obj.sync_blocked); expect(that % &ctx1 == listener_obj.sync_blocker); - // Exercise 5 + // Exercise listener_obj.reset(); ctx1.unblock(); // unblock the time based wait future1.resume(); // finishes and releases lock - // Verify 5 + // Verify expect(that % future1.done()); expect(that % async::blocked_by::sync == ctx2.state()); expect(that % nullptr == mutex.owner()); @@ -127,12 +127,12 @@ void context_listener_test() expect(that % nullptr == listener_obj.sync_blocked); expect(that % nullptr == listener_obj.sync_blocker); - // Exercise 6 + // Exercise listener_obj.reset(); ctx2.unblock(); future2.resume(); // acquires lock blocks by time - // Verify 6 + // Verify expect(that % async::blocked_by::nothing == ctx1.state()); expect(that % async::blocked_by::time == ctx2.state()); expect(that % 1ms == ctx2.sleep_time()); @@ -141,12 +141,12 @@ void context_listener_test() expect(that % nullptr == listener_obj.sync_blocked); expect(that % nullptr == listener_obj.sync_blocker); - // Exercise 7 + // Exercise listener_obj.reset(); ctx2.unblock(); future2.resume(); // finishes and releases lock - // Verify 7 + // Verify expect(that % async::blocked_by::nothing == ctx1.state()); expect(that % async::blocked_by::nothing == ctx2.state()); expect(that % future1.done()); diff --git a/tests/mutex.test.cpp b/tests/mutex.test.cpp index fa4e285..f98af67 100644 --- a/tests/mutex.test.cpp +++ b/tests/mutex.test.cpp @@ -13,124 +13,122 @@ void guards_tests() { using namespace boost::ut; using namespace std::chrono_literals; - "Exclusive Access"_test = []() { - // Setup - struct listener : public async::context_listener + + // Setup + struct listener : public async::context_listener + { + void on_sync_block(async::context& p_blocked, + async::context const& p_blocker) noexcept override { - void on_sync_block(async::context& p_blocked, - async::context const& p_blocker) noexcept override - { - - std::println("✉️ on_sync_block"); - sync_blocker = &p_blocker; - sync_blocked = &p_blocked; - } - }; - - async::inplace_context<1024> ctx1; - async::inplace_context<1024> ctx2; - - async::mutex mutex; - - listener test_listener; - ctx1.set_listener(&test_listener); - ctx2.set_listener(&test_listener); - - auto single_resource = - [&](async::context& p_context) -> async::future { - std::println("Executing 'single_resource' coroutine"); - - // Acquire guard for this scope - auto guard = co_await mutex.lock(p_context); - - // setup dma transaction... - std::println("Waiting on io complete flag, blocking by I/O"); - - // Would normally wrap this in a while loop to check if the resource is - // fread. - co_await p_context.block_by_signal(); - - // Normally NO cleanup would should be done at this point as it could - // become a race condition. - std::println("IO operation complete! Returning!"); - co_return; - }; - - // Exercise - expect(that % 0 == ctx1.memory_used()); - expect(that % 0 == ctx2.memory_used()); - - std::println("🧱 Future setup"); - auto access_first = single_resource(ctx1); - auto access_second = single_resource(ctx2); - - expect(that % 0 < ctx1.memory_used()); - expect(that % 0 < ctx2.memory_used()); - expect(that % async::blocked_by::nothing == ctx1.state()); - expect(that % async::blocked_by::nothing == ctx2.state()); - - // access_first will claim the resource and will return control, and be - // blocked by IO. - std::println("▶️ [1] Resume 1st"); - access_first.resume(); - expect(that % async::blocked_by::signal == ctx1.state()); - expect(that % async::blocked_by::nothing == ctx2.state()); - expect(that % nullptr == sync_blocker); - expect(that % nullptr == sync_blocked); - - std::println("▶️ [2] Resume 2nd"); - access_second.resume(); - expect(that % async::blocked_by::signal == ctx1.state()); - expect(that % async::blocked_by::sync == ctx2.state()); - expect(that % &ctx1 == sync_blocker); - expect(that % &ctx2 == sync_blocked); - sync_blocker = nullptr; - sync_blocked = nullptr; - - std::println("🟢 [3] Unblock 2nd Context"); - ctx2.unblock(); - expect(that % async::blocked_by::signal == ctx1.state()); - expect(that % async::blocked_by::nothing == ctx2.state()); - - std::println("▶️ [4] Resume 2nd, should re-block on sync"); - access_second.resume(); - expect(that % async::blocked_by::signal == ctx1.state()); - expect(that % async::blocked_by::sync == ctx2.state()); - expect(that % &ctx1 == sync_blocker); - expect(that % &ctx2 == sync_blocked); - sync_blocker = nullptr; - sync_blocked = nullptr; - - std::println("🟢 [5] Unblock & Release Exclusive"); - mutex.unblock_and_release(); - expect(that % async::blocked_by::nothing == ctx1.state()); - expect(that % async::blocked_by::sync == ctx2.state()); - - std::println("▶️ [6] Resume 1st, this should finish the operation"); - access_first.resume(); - expect(that % async::blocked_by::nothing == ctx1.state()); - expect(that % ctx1.done()); - expect(that % async::blocked_by::sync == ctx2.state()); - - std::println("🟢 [7] Unblock context 2"); - ctx2.unblock_without_notification(); - std::println("▶️ [7] Resume 2nd, should block by signal"); - access_second.resume(); - expect(that % async::blocked_by::signal == ctx2.state()); - - std::println("🟢 [8] Unblock & Release Exclusive"); - mutex.unblock_and_release(); - expect(that % async::blocked_by::nothing == ctx2.state()); - expect(that % not ctx2.done()); - - std::println("▶️ [9] Resume 2nd should complete"); - access_second.resume(); - expect(that % ctx2.state() == async::blocked_by::nothing); - expect(that % access_second.done()); - - expect(that % 0 == ctx1.memory_used()); - expect(that % 0 == ctx2.memory_used()); + + std::println("✉️ on_sync_block"); + sync_blocker = &p_blocker; + sync_blocked = &p_blocked; + } + }; + + async::inplace_context<1024> ctx1; + async::inplace_context<1024> ctx2; + + async::mutex mutex; + + listener test_listener; + ctx1.set_listener(&test_listener); + ctx2.set_listener(&test_listener); + + auto single_resource = [&](async::context& p_context) -> async::future { + std::println("Executing 'single_resource' coroutine"); + + // Acquire guard for this scope + auto guard = co_await mutex.lock(p_context); + + // setup dma transaction... + std::println("Waiting on io complete flag, blocking by I/O"); + + // Would normally wrap this in a while loop to check if the resource is + // fread. + co_await p_context.block_by_signal(); + + // Normally NO cleanup would should be done at this point as it could + // become a race condition. + std::println("IO operation complete! Returning!"); + co_return; }; + + // Exercise + expect(that % 0 == ctx1.memory_used()); + expect(that % 0 == ctx2.memory_used()); + + std::println("🧱 Future setup"); + auto access_first = single_resource(ctx1); + auto access_second = single_resource(ctx2); + + expect(that % 0 < ctx1.memory_used()); + expect(that % 0 < ctx2.memory_used()); + expect(that % async::blocked_by::nothing == ctx1.state()); + expect(that % async::blocked_by::nothing == ctx2.state()); + + // access_first will claim the resource and will return control, and be + // blocked by IO. + std::println("▶️ [1] Resume 1st"); + access_first.resume(); + expect(that % async::blocked_by::signal == ctx1.state()); + expect(that % async::blocked_by::nothing == ctx2.state()); + expect(that % nullptr == sync_blocker); + expect(that % nullptr == sync_blocked); + + std::println("▶️ [2] Resume 2nd"); + access_second.resume(); + expect(that % async::blocked_by::signal == ctx1.state()); + expect(that % async::blocked_by::sync == ctx2.state()); + expect(that % &ctx1 == sync_blocker); + expect(that % &ctx2 == sync_blocked); + sync_blocker = nullptr; + sync_blocked = nullptr; + + std::println("🟢 [3] Unblock 2nd Context"); + ctx2.unblock(); + expect(that % async::blocked_by::signal == ctx1.state()); + expect(that % async::blocked_by::nothing == ctx2.state()); + + std::println("▶️ [4] Resume 2nd, should re-block on sync"); + access_second.resume(); + expect(that % async::blocked_by::signal == ctx1.state()); + expect(that % async::blocked_by::sync == ctx2.state()); + expect(that % &ctx1 == sync_blocker); + expect(that % &ctx2 == sync_blocked); + sync_blocker = nullptr; + sync_blocked = nullptr; + + std::println("🟢 [5] Unblock & Release Exclusive"); + mutex.unblock_and_release(); + expect(that % async::blocked_by::nothing == ctx1.state()); + expect(that % async::blocked_by::sync == ctx2.state()); + + std::println("▶️ [6] Resume 1st, this should finish the operation"); + access_first.resume(); + expect(that % async::blocked_by::nothing == ctx1.state()); + expect(that % ctx1.done()); + expect(that % async::blocked_by::sync == ctx2.state()); + + std::println("🟢 [7] Unblock context 2"); + ctx2.unblock_without_notification(); + std::println("▶️ [7] Resume 2nd, should block by signal"); + access_second.resume(); + expect(that % async::blocked_by::signal == ctx2.state()); + + std::println("🟢 [8] Unblock & Release Exclusive"); + mutex.unblock_and_release(); + expect(that % async::blocked_by::nothing == ctx2.state()); + expect(that % not ctx2.done()); + + std::println("▶️ [9] Resume 2nd should complete"); + access_second.resume(); + expect(that % ctx2.state() == async::blocked_by::nothing); + expect(that % access_second.done()); + + expect(that % 0 == ctx1.memory_used()); + expect(that % 0 == ctx2.memory_used()); }; int main() diff --git a/tests/sync_wait.test.cpp b/tests/sync_wait.test.cpp index f1c6abd..4ee5501 100644 --- a/tests/sync_wait.test.cpp +++ b/tests/sync_wait.test.cpp @@ -15,6 +15,7 @@ void context() "sync_wait --> future"_test = []() { // Setup async::inplace_context ctx; + auto const expected_starting_capacity = ctx.capacity(); auto future = [](async::context&) -> async::future { co_return 5; @@ -29,12 +30,13 @@ void context() expect(that % future.done()); expect(that % future.has_value()); expect(that % 5 == future.value()); - expect(that % stack_size == ctx.capacity()); + expect(that % expected_starting_capacity == ctx.capacity()); }; "co_await coroutine"_test = []() { // Setup async::inplace_context ctx; + auto const expected_starting_capacity = ctx.capacity(); static constexpr int expected_return_value = 1413; unsigned step = 0; @@ -80,12 +82,13 @@ void context() expect(that % expected_return_value == future.value()); expect(that % 4 == step); - expect(that % stack_size == ctx.capacity()); + expect(that % expected_starting_capacity == ctx.capacity()); }; "co_await coroutine"_test = []() { // Setup async::inplace_context ctx; + auto const expected_starting_capacity = ctx.capacity(); static constexpr int return_value1 = 1413; static constexpr int return_value2 = 4324; @@ -122,12 +125,13 @@ void context() expect(that % expected_total == future.value()); expect(that % 2 == step); - expect(that % stack_size == ctx.capacity()); + expect(that % expected_starting_capacity == ctx.capacity()); }; "co_await Xms + sync_wait"_test = []() { // Setup async::inplace_context ctx; + auto const expected_starting_capacity = ctx.capacity(); static constexpr int return_value1 = 1413; static constexpr int return_value2 = 4324; @@ -166,7 +170,7 @@ void context() expect(that % sleep_cycles == std::vector{ 44ms, 100ms, 50ms }); - expect(that % stack_size == ctx.capacity()); + expect(that % expected_starting_capacity == ctx.capacity()); }; };