summaryrefslogblamecommitdiff
path: root/perf.hpp
blob: bcccba7851c5b02654d8365da87dc889f45da77d (plain) (tree)







































































































































































































































                                                                                
/**
 * MIT License
 *
 * Copyright (c) 2024 Johannes Stölp
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 **/

#ifndef LIBPERF_H
#define LIBPERF_H

#include <asm/unistd.h>
#include <asm/unistd_64.h>
#include <linux/perf_event.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <unistd.h>

#include <cassert>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>

#include <vector>

// -- SYS ----------------------------------------------------------------------

namespace sys {
// Wrapper for perf_event_open(2) syscall.
static inline long perf_event_open(struct perf_event_attr* attr,
                                   pid_t pid,
                                   int cpu,
                                   int group_fd,
                                   unsigned long flags) {
  return ::syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
}

namespace checked {
// Checked ioctl(2) syscall.
static inline void ioctl(int fd, unsigned long rqst, unsigned long arg) {
  int ret = ::ioctl(fd, rqst, arg);
  assert(ret == 0);
}

template <typename T>
static inline void read(int fd, T& buf) {
  ssize_t ret = ::read(fd, &buf, sizeof(buf));
  assert(ret == sizeof(buf));
}
}  // namespace checked
}  // namespace sys

// -- PERF_GROUP ---------------------------------------------------------------

class perf_group {
public:
  struct pmc_desc {
    const char* name;
    uint64_t type;
    uint64_t config;
  };

  enum mode : bool {
    RUN_SINGLE = false,
    RUN_GROUP  = true,
  };

public:
  perf_group(const std::vector<pmc_desc>& pmcs);
  perf_group(mode m, const std::vector<pmc_desc>& pmcs) : perf_group(pmcs) {
    mode_ = m;
  }
  perf_group(const perf_group&) = delete;
  perf_group(perf_group&&)      = delete;
  ~perf_group();

  void start();
  void stop();
  void dump() const;

private:
  struct read_fmt {
    uint64_t value;
    uint64_t time_enabled;
    uint64_t time_running;
  };

  struct pmc {
    pmc_desc def;
    int fd;
    read_fmt last_value;
  };

private:
  mode mode_{RUN_GROUP};
  int leader_{-1};
  std::vector<pmc> pmcs_;

private:
  const read_fmt* find(uint64_t type, uint64_t config) const;
};

// -- PERF_GROUP IMPL ----------------------------------------------------------

inline perf_group::perf_group(const std::vector<pmc_desc>& pmcs) {
  pmcs_.reserve(pmcs.size());

  struct perf_event_attr pe;
  std::memset(&pe, 0, sizeof(pe));
  pe.size           = sizeof(pe);
  pe.exclude_kernel = 1;
  pe.exclude_hv     = 1;
  pe.read_format =
      PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;

  for (auto& pmc : pmcs) {
    bool is_leader = mode_ == RUN_GROUP ? leader_ == -1 : false;

    pe.type   = pmc.type;
    pe.config = pmc.config;
    // Only disable the leader, if running in group mode.
    pe.disabled = is_leader;

    int fd = sys::perf_event_open(&pe, 0 /* pid */, -1 /* cpu */,
                                  leader_ /*group_fd */, 0 /* flags */);
    if (fd == -1) {
      perror("perf_event_open");
      exit(1);
    }

    if (is_leader) {
      leader_ = fd;
    }

    // Reset performance counter.
    sys::checked::ioctl(fd, PERF_EVENT_IOC_RESET, 0);

    pmcs_.push_back({pmc, fd, {}});
  }
}

inline perf_group::~perf_group() {
  for (auto& pmc : pmcs_) {
    ::close(pmc.fd);
  }
}

inline void perf_group::start() {
  if (mode_ == RUN_GROUP) {
    assert(leader_ > -1);
    sys::checked::ioctl(leader_, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
    sys::checked::ioctl(leader_, PERF_EVENT_IOC_ENABLE, 0);
  } else {
    for (auto& pmc : pmcs_) {
      sys::checked::ioctl(pmc.fd, PERF_EVENT_IOC_RESET, 0);
    }
    for (auto& pmc : pmcs_) {
      sys::checked::ioctl(pmc.fd, PERF_EVENT_IOC_ENABLE, 0);
    }
  }
}

inline void perf_group::stop() {
  if (mode_ == RUN_GROUP) {
    assert(leader_ > -1);
    sys::checked::ioctl(leader_, PERF_EVENT_IOC_DISABLE, 0);
  } else {
    for (auto& pmc : pmcs_) {
      sys::checked::ioctl(pmc.fd, PERF_EVENT_IOC_DISABLE, 0);
    }
  }

  for (auto& pmc : pmcs_) {
    sys::checked::read(pmc.fd, pmc.last_value);
  }
}

inline void perf_group::dump() const {
  auto correct = [](const read_fmt& data) -> uint64_t {
    if (data.value == 0)
      return 0ul;

    // Correction of error due pmc multiplexing (simple approximation, assuming
    // linear increment).
    double multiplex_correction = static_cast<double>(data.time_enabled) /
                                  static_cast<double>(data.time_running);
    return static_cast<double>(data.value) * multiplex_correction;
  };

  for (auto& pmc : pmcs_) {
    auto value =
        mode_ == RUN_GROUP ? correct(pmc.last_value) : pmc.last_value.value;
    std::printf("%-10s: %12ld | E=%ld R=%ld\n", pmc.def.name, value,
                pmc.last_value.time_enabled, pmc.last_value.time_running);
  }

  if (auto clk = find(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK)) {
    double val = static_cast<double>(clk->value);
    std::printf("Profiled %.3fs %.3fms %.3fus\n",
                val / 1000.0 / 1000.0 / 1000.0, val / 1000.0 / 1000.0,
                val / 1000.0);
  }
}

inline const perf_group::read_fmt* perf_group::find(uint64_t type,
                                                    uint64_t config) const {
  for (auto& pmc : pmcs_) {
    if (pmc.def.type == type && pmc.def.config == config) {
      return &pmc.last_value;
    }
  }
  return nullptr;
}

#endif