diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | Makefile | 11 | ||||
-rw-r--r-- | example/counting.cc | 21 | ||||
-rw-r--r-- | perf.hpp | 232 |
4 files changed, 265 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..89f9ac0 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +out/ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..89b2a3d --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +EXAMPLES := $(wildcard example/*) +BINARIES := $(patsubst example/%.cc, out/%, $(EXAMPLES)) + +all: $(BINARIES) + +out/%: example/%.cc + @mkdir -p out + g++ -o $@ -g -O2 $^ -I $(PWD) + +clean: + $(RM) -r out diff --git a/example/counting.cc b/example/counting.cc new file mode 100644 index 0000000..a2af7f9 --- /dev/null +++ b/example/counting.cc @@ -0,0 +1,21 @@ +#include <linux/perf_event.h> +#include "perf.hpp" + +int main() { + perf_group pg({ + {.name = "CPUCLK", + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_CPU_CLOCK}, + {.name = "TASK", + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_TASK_CLOCK}, + {.name = "INSN", + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_INSTRUCTIONS}, + }); + pg.start(); + asm volatile("nop"); + asm volatile("nop"); + pg.stop(); + pg.dump(); +} diff --git a/perf.hpp b/perf.hpp new file mode 100644 index 0000000..bcccba7 --- /dev/null +++ b/perf.hpp @@ -0,0 +1,232 @@ +/** + * MIT License + * + * Copyright (c) 2024 Johannes Stölp + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + **/ + +#ifndef LIBPERF_H +#define LIBPERF_H + +#include <asm/unistd.h> +#include <asm/unistd_64.h> +#include <linux/perf_event.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <unistd.h> + +#include <cassert> +#include <cstdint> +#include <cstdio> +#include <cstdlib> +#include <cstring> + +#include <vector> + +// -- SYS ---------------------------------------------------------------------- + +namespace sys { +// Wrapper for perf_event_open(2) syscall. +static inline long perf_event_open(struct perf_event_attr* attr, + pid_t pid, + int cpu, + int group_fd, + unsigned long flags) { + return ::syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); +} + +namespace checked { +// Checked ioctl(2) syscall. +static inline void ioctl(int fd, unsigned long rqst, unsigned long arg) { + int ret = ::ioctl(fd, rqst, arg); + assert(ret == 0); +} + +template <typename T> +static inline void read(int fd, T& buf) { + ssize_t ret = ::read(fd, &buf, sizeof(buf)); + assert(ret == sizeof(buf)); +} +} // namespace checked +} // namespace sys + +// -- PERF_GROUP --------------------------------------------------------------- + +class perf_group { +public: + struct pmc_desc { + const char* name; + uint64_t type; + uint64_t config; + }; + + enum mode : bool { + RUN_SINGLE = false, + RUN_GROUP = true, + }; + +public: + perf_group(const std::vector<pmc_desc>& pmcs); + perf_group(mode m, const std::vector<pmc_desc>& pmcs) : perf_group(pmcs) { + mode_ = m; + } + perf_group(const perf_group&) = delete; + perf_group(perf_group&&) = delete; + ~perf_group(); + + void start(); + void stop(); + void dump() const; + +private: + struct read_fmt { + uint64_t value; + uint64_t time_enabled; + uint64_t time_running; + }; + + struct pmc { + pmc_desc def; + int fd; + read_fmt last_value; + }; + +private: + mode mode_{RUN_GROUP}; + int leader_{-1}; + std::vector<pmc> pmcs_; + +private: + const read_fmt* find(uint64_t type, uint64_t config) const; +}; + +// -- PERF_GROUP IMPL ---------------------------------------------------------- + +inline perf_group::perf_group(const std::vector<pmc_desc>& pmcs) { + pmcs_.reserve(pmcs.size()); + + struct perf_event_attr pe; + std::memset(&pe, 0, sizeof(pe)); + pe.size = sizeof(pe); + pe.exclude_kernel = 1; + pe.exclude_hv = 1; + pe.read_format = + PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; + + for (auto& pmc : pmcs) { + bool is_leader = mode_ == RUN_GROUP ? leader_ == -1 : false; + + pe.type = pmc.type; + pe.config = pmc.config; + // Only disable the leader, if running in group mode. + pe.disabled = is_leader; + + int fd = sys::perf_event_open(&pe, 0 /* pid */, -1 /* cpu */, + leader_ /*group_fd */, 0 /* flags */); + if (fd == -1) { + perror("perf_event_open"); + exit(1); + } + + if (is_leader) { + leader_ = fd; + } + + // Reset performance counter. + sys::checked::ioctl(fd, PERF_EVENT_IOC_RESET, 0); + + pmcs_.push_back({pmc, fd, {}}); + } +} + +inline perf_group::~perf_group() { + for (auto& pmc : pmcs_) { + ::close(pmc.fd); + } +} + +inline void perf_group::start() { + if (mode_ == RUN_GROUP) { + assert(leader_ > -1); + sys::checked::ioctl(leader_, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP); + sys::checked::ioctl(leader_, PERF_EVENT_IOC_ENABLE, 0); + } else { + for (auto& pmc : pmcs_) { + sys::checked::ioctl(pmc.fd, PERF_EVENT_IOC_RESET, 0); + } + for (auto& pmc : pmcs_) { + sys::checked::ioctl(pmc.fd, PERF_EVENT_IOC_ENABLE, 0); + } + } +} + +inline void perf_group::stop() { + if (mode_ == RUN_GROUP) { + assert(leader_ > -1); + sys::checked::ioctl(leader_, PERF_EVENT_IOC_DISABLE, 0); + } else { + for (auto& pmc : pmcs_) { + sys::checked::ioctl(pmc.fd, PERF_EVENT_IOC_DISABLE, 0); + } + } + + for (auto& pmc : pmcs_) { + sys::checked::read(pmc.fd, pmc.last_value); + } +} + +inline void perf_group::dump() const { + auto correct = [](const read_fmt& data) -> uint64_t { + if (data.value == 0) + return 0ul; + + // Correction of error due pmc multiplexing (simple approximation, assuming + // linear increment). + double multiplex_correction = static_cast<double>(data.time_enabled) / + static_cast<double>(data.time_running); + return static_cast<double>(data.value) * multiplex_correction; + }; + + for (auto& pmc : pmcs_) { + auto value = + mode_ == RUN_GROUP ? correct(pmc.last_value) : pmc.last_value.value; + std::printf("%-10s: %12ld | E=%ld R=%ld\n", pmc.def.name, value, + pmc.last_value.time_enabled, pmc.last_value.time_running); + } + + if (auto clk = find(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK)) { + double val = static_cast<double>(clk->value); + std::printf("Profiled %.3fs %.3fms %.3fus\n", + val / 1000.0 / 1000.0 / 1000.0, val / 1000.0 / 1000.0, + val / 1000.0); + } +} + +inline const perf_group::read_fmt* perf_group::find(uint64_t type, + uint64_t config) const { + for (auto& pmc : pmcs_) { + if (pmc.def.type == type && pmc.def.config == config) { + return &pmc.last_value; + } + } + return nullptr; +} + +#endif |