aboutsummaryrefslogtreecommitdiffhomepage
path: root/content/2023-09-01-cas-llsc-aba/a64-basic-llsc.cc
blob: 82c68e193a0aefffd876352801e118600284b979 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#include <cassert>
#include <cstdint>
#include <cstdio>
#include <thread>

#ifndef __aarch64__
#error "This must be compiled for arm64!"
#endif

// NOTES on the inline assembly:
//
// * AArch64 constraint.
//   https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html
//
//   Q: A memory address which uses a single base register with no offset.
//
// * Output constraint.
//   https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Output-Operands
//
//   Use the '&' constraint modifier on all output operands that must not
//   overlap an input. Otherwise, GCC may allocate the output operand in the
//   same register as an unrelated input operand, on the assumption that the
//   assembler code consumes its inputs before producing outputs. This
//   assumption may be false if the assembler code actually consists of more
//   than one instruction.

// LDXR: Load exclusive register wrapper.
//
// Read from ADDR and marked address for exclusive access (exclusive monitor).
//
// Return value read from memory.
//
// NOTE: No memory ordering semantics.
//
// https://developer.arm.com/documentation/ddi0596/latest/Base-Instructions/LDXR--Load-Exclusive-Register-?lang=en
inline uint64_t ldxr(uint64_t* addr) {
  uint64_t ret;
  asm volatile("ldxr %0, %1" : "=r"(ret) : "Q"(*addr) : "memory");
  return ret;
}

// STXR: Store exclusive register wrapper.
//
// Conditionally write VAL to ADDR if ADDR is marked for exclusive access by a
// previous exclusive load (eg LDXR).
//
// Return 0 if the write was successful, 1 otherwise.
//
// NOTE: No memory ordering semantics.
//
// https://developer.arm.com/documentation/ddi0596/latest/Base-Instructions/STXR--Store-Exclusive-Register-?lang=en
inline bool stxr(uint64_t* addr, uint64_t val) {
  uint32_t ret;
  asm volatile("stxr %w0, %2, %1"
               : "=&r"(ret), "=Q"(*addr)
               : "r"(val)
               : "memory");
  return ret == 0;
}

int main() {
  uint64_t mem = 42;

  auto T1 = std::thread([&mem]() {
    // Write to exclusive location (does clear exclusive monitor).
    mem = 2222;
    // Full memory barrier.
    __sync_synchronize();
  });

  uint64_t old = ldxr(&mem);

  // Some artificial delay w/o an explicit context switch (eg syscall) as that
  // would clear the exclusive monitor, though it can still be interupted by
  // the scheduler.
  // Delay is "tuned" for my ARM silicon.
  for (int i = 0; i < (1 << 13); ++i) {
    asm volatile("nop");
  }

  // Full memory barrier.
  __sync_synchronize();

  bool ok = stxr(&mem, 1111);
  printf("old: %lu -> mem: %lu | ok: %d\n", old, mem, ok);

  T1.join();
  return ok ? 0 : 1;
}