aboutsummaryrefslogtreecommitdiffhomepage
path: root/content/2023-09-01-cas-llsc-aba/a64-basic-llsc.cc
diff options
context:
space:
mode:
Diffstat (limited to 'content/2023-09-01-cas-llsc-aba/a64-basic-llsc.cc')
-rw-r--r--content/2023-09-01-cas-llsc-aba/a64-basic-llsc.cc89
1 files changed, 89 insertions, 0 deletions
diff --git a/content/2023-09-01-cas-llsc-aba/a64-basic-llsc.cc b/content/2023-09-01-cas-llsc-aba/a64-basic-llsc.cc
new file mode 100644
index 0000000..82c68e1
--- /dev/null
+++ b/content/2023-09-01-cas-llsc-aba/a64-basic-llsc.cc
@@ -0,0 +1,89 @@
+#include <cassert>
+#include <cstdint>
+#include <cstdio>
+#include <thread>
+
+#ifndef __aarch64__
+#error "This must be compiled for arm64!"
+#endif
+
+// NOTES on the inline assembly:
+//
+// * AArch64 constraint.
+// https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html
+//
+// Q: A memory address which uses a single base register with no offset.
+//
+// * Output constraint.
+// https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Output-Operands
+//
+// Use the '&' constraint modifier on all output operands that must not
+// overlap an input. Otherwise, GCC may allocate the output operand in the
+// same register as an unrelated input operand, on the assumption that the
+// assembler code consumes its inputs before producing outputs. This
+// assumption may be false if the assembler code actually consists of more
+// than one instruction.
+
+// LDXR: Load exclusive register wrapper.
+//
+// Read from ADDR and marked address for exclusive access (exclusive monitor).
+//
+// Return value read from memory.
+//
+// NOTE: No memory ordering semantics.
+//
+// https://developer.arm.com/documentation/ddi0596/latest/Base-Instructions/LDXR--Load-Exclusive-Register-?lang=en
+inline uint64_t ldxr(uint64_t* addr) {
+ uint64_t ret;
+ asm volatile("ldxr %0, %1" : "=r"(ret) : "Q"(*addr) : "memory");
+ return ret;
+}
+
+// STXR: Store exclusive register wrapper.
+//
+// Conditionally write VAL to ADDR if ADDR is marked for exclusive access by a
+// previous exclusive load (eg LDXR).
+//
+// Return 0 if the write was successful, 1 otherwise.
+//
+// NOTE: No memory ordering semantics.
+//
+// https://developer.arm.com/documentation/ddi0596/latest/Base-Instructions/STXR--Store-Exclusive-Register-?lang=en
+inline bool stxr(uint64_t* addr, uint64_t val) {
+ uint32_t ret;
+ asm volatile("stxr %w0, %2, %1"
+ : "=&r"(ret), "=Q"(*addr)
+ : "r"(val)
+ : "memory");
+ return ret == 0;
+}
+
+int main() {
+ uint64_t mem = 42;
+
+ auto T1 = std::thread([&mem]() {
+ // Write to exclusive location (does clear exclusive monitor).
+ mem = 2222;
+ // Full memory barrier.
+ __sync_synchronize();
+ });
+
+ uint64_t old = ldxr(&mem);
+
+ // Some artificial delay w/o an explicit context switch (eg syscall) as that
+ // would clear the exclusive monitor, though it can still be interupted by
+ // the scheduler.
+ // Delay is "tuned" for my ARM silicon.
+ for (int i = 0; i < (1 << 13); ++i) {
+ asm volatile("nop");
+ }
+
+ // Full memory barrier.
+ __sync_synchronize();
+
+ bool ok = stxr(&mem, 1111);
+ printf("old: %lu -> mem: %lu | ok: %d\n", old, mem, ok);
+
+ T1.join();
+ return ok ? 0 : 1;
+}