Skip to content

Commit ef95445

Browse files
committed
f
1 parent 722dfc2 commit ef95445

File tree

1 file changed

+7
-208
lines changed

1 file changed

+7
-208
lines changed

perf

Lines changed: 7 additions & 208 deletions
Original file line numberDiff line numberDiff line change
@@ -156,9 +156,11 @@
156156
> import perf; // #include <perf>
157157
>
158158
> int main() {
159-
> erf::profiler profiler{...};
159+
> perf::profiler profiler{
160+
> perf::stat::tsc, perf::stat::cycles, perf::trace::instructions
161+
> };
160162
>
161-
> constexpr auto invoke = [](auto& profiler, auto&& fn, auto&&... ts) {
163+
> auto invoke = [&](auto&& fn, auto&&... ts) {
162164
> profiler.start();
163165
> perf::compiler::prevent_elision(fn(ts...));
164166
> profiler.stop();
@@ -169,7 +171,9 @@
169171
> perf::log(profiler[]);
170172
> perf::verify(profiler[perf::stat::tsc] > 0ns);
171173
>
172-
> perf::analyzer analyzer{perf::mca::assembly, perf::mca::timeline};
174+
> perf::analyzer analyzer{
175+
> perf::mca::assembly, perf::mca::timeline
176+
> };
173177
> analyzer << profiler[perf::trace::instructions];
174178
>
175179
> perf::log(analyzer[]);
@@ -528,211 +532,6 @@
528532
> <details>
529533
> <summary>Profiling/Analyzing</summary>
530534
>
531-
> ```cpp
532-
> // stat::timer
533-
> perf::stat::timer t{perf::stat::steady_time, perf::stat::cpu};
534-
>
535-
> t.start();
536-
> fn();
537-
> t.stop();
538-
>
539-
> assert(t[perf::stat::steady_time] > 0ns);
540-
> assert(t[perf::stat::cpu_time] > 0ns);
541-
>
542-
> // `t[]` - returns std::tuple of all timers
543-
> assert(std::get<0u>(t[]) > 0ns); // steady_time
544-
> assert(std::get<1u>(t[]) > 0ns); // time_cpu
545-
> ```
546-
>
547-
> ```cpp
548-
> perf::stat::tsc - time-stamp-counter
549-
> perf::stat::steady_time - monotonic time
550-
> perf::stat::cpu_time - user time + sys time
551-
> perf::stat::thread_time - cpu time for the current thread
552-
> perf::stat::real_time - wall clock time
553-
> ```
554-
>
555-
> ```cpp
556-
> // stat::counter
557-
>
558-
> // metrics/dsl
559-
> // top_down
560-
> ```
561-
>
562-
> ```cpp
563-
> // instruction per cycle (ipc)
564-
> ipc = instructions / cycles;
565-
>
566-
> // cycles per instruction (cpi, inverse of ipc)
567-
> cpi = cycles / instructions;
568-
>
569-
> // branch miss rate (branch misses per branch instruction)
570-
> branch_miss_rate = branch_misses / branches;
571-
>
572-
> // cache miss rate (cache misses per cache reference)
573-
> cache_miss_rate = cache_misses / cache_references;
574-
>
575-
> // llc miss rate
576-
> llc_miss_rate = llc_misses / cache_references;
577-
>
578-
> // l1 data cache miss rate
579-
> l1_dcache_miss_rate = l1_dcache_load_misses / l1_dcache_loads;
580-
>
581-
> // l1 instruction cache miss rate
582-
> l1_icache_miss_rate = l1_icache_load_misses / l1_icache_loads;
583-
>
584-
> // dtlb miss rate
585-
> dtlb_miss_rate = dtlb_load_misses / dtlb_loads;
586-
>
587-
> // itlb miss rate
588-
> itlb_miss_rate = itlb_load_misses / itlb_loads;
589-
>
590-
> // stalled cycles rate (frontend)
591-
> frontend_stall_rate = stalled_cycles_frontend / cycles;
592-
>
593-
> // stalled cycles rate (backend)
594-
> backend_stall_rate = stalled_cycles_backend / cycles;
595-
>
596-
> // memory access rate
597-
> memory_stall_ratio = stalled_cycles_backend / cycles;
598-
>
599-
> // overall stall rate
600-
> total_stall_rate = (stalled_cycles_backend + stalled_cycles_frontend) / cycles;
601-
>
602-
> // cpu migrations per cycles
603-
> cpu_migration_rate = cpu_migrations / cycles;
604-
>
605-
> // context switches per cycles
606-
> context_switch_rate = context_switches / cycles;
607-
>
608-
> // page fault rate
609-
> page_fault_rate = faults / cycles;
610-
>
611-
> // page fault rate (major faults per total faults)
612-
> major_fault_rate = major_faults / cycles;
613-
>
614-
> // page fault rate (minor faults per total faults)
615-
> minor_fault_rate = minor_faults / cycles;
616-
> ```
617-
>
618-
> ```cpp
619-
> // record::sampler
620-
> ```
621-
>
622-
> ```cpp
623-
> // https://github.com/qlibs/prof
624-
> // prof::callgrind
625-
> // prof::...
626-
> ```
627-
>
628-
> ```cpp
629-
> // trace::tracer
630-
> auto&& instructions = perf::trace::trace(
631-
> [&] { return fizz_buzz(i); }
632-
> )[perf::trace::instructions];
633-
>
634-
> perf::analyzer analyzer{perf::mca::assembly};
635-
> for (auto&& i : (analyzer << instructions)[perf::mca::assembly]) {
636-
> perf::log("{}", i);
637-
> }
638-
> ```
639-
>
640-
> ```cpp
641-
> // perf::analyzer
642-
> ```
643-
>
644-
> </details>
645-
>
646-
> <details>
647-
> <summary>Benchmarking</summary>
648-
>
649-
> ```cpp
650-
> // perf::runner
651-
> auto&& runner = [](auto&& fn, auto&&... ts) {
652-
> perf::dataset ds{};
653-
> setat::timer timer{perf::stat::steady_time};
654-
> timer.start();
655-
> for (auto i = 0u; i < 1'000; ++i) {
656-
> compiler::prevent_elision(fn(ts...));
657-
> }
658-
> timer.stop();
659-
> ds += timer;
660-
> return ds;
661-
> };
662-
>
663-
> // `runner` deduces what to run based on the usage of `bench[...]`
664-
> // unless otherwise explicilty specified
665-
> perf::runner bench{perf::named("runner", runner}};
666-
>
667-
> static auto fizz_buzz = [](int n) {
668-
> if (n % 15 == 0) {
669-
> return "FizzBuzz";
670-
> } else if (n % 3 == 0) {
671-
> return "Fizz";
672-
> } else if (n % 5 == 0) {
673-
> return "Buzz";
674-
> } else {
675-
> return "Unknown";
676-
> }
677-
> };
678-
>
679-
> bench(fizz_buzz, 15);
680-
> bench(fizz_buzz, 3);
681-
> bench(fizz_buzz, 5);
682-
>
683-
> perf::report(bench[perf::stat::steady_time]);
684-
> ```
685-
>
686-
> ```cpp
687-
> // perf::data
688-
> bench(fizz_buzz, perf::data::sequence<int>{{3,5,15}});
689-
> bench(fizz_buzz, perf::data::uniform<int>{.min = 0, .max = 15});
690-
> // choice
691-
> ```
692-
>
693-
> ```cpp
694-
> // perf::bench::latency
695-
> auto add = [](int a, int b) { return a + b; };
696-
> auto sub = [](int a, int b) { return a - b; };
697-
> auto mult = [](int a, int b) { return a * b; };
698-
> auto div = [](int a, int b) { return a / b; };
699-
>
700-
> perf::runner bench{perf::bench::latency{}};
701-
>
702-
> bench(add, 0, 0);
703-
> bench(sub, 0, 0);
704-
> bench(mult, 0, 0);
705-
> bench(div, 0, 0);
706-
>
707-
> using perf::metric::operator/;
708-
> auto ops = perf::bench::operations;
709-
> perf::report(bench[perf::stat::tsc / ops, perf::stat::cycles / ops]);
710-
> ```
711-
>
712-
> ```cpp
713-
> // perf::bench::throughput
714-
> ```
715-
>
716-
> ```cpp
717-
> latency = time / operations;
718-
> throughput = operations / time;
719-
> inverse_throughput = time / operations;
720-
> ```
721-
>
722-
> ```cpp
723-
> // perf::bench::baseline
724-
> // perf::bench::debug
725-
> ```
726-
>
727-
> ```cpp
728-
> // perf::plot::hist
729-
> // perf::plot::bar
730-
> // perf::plot::box
731-
> // perf::plot::line
732-
> // perf::plot::ecdf
733-
> // perf::plot::complexity
734-
> ```
735-
>
736535
> </details>
737536
>
738537
> [`Synopsis`](https://github.com/qlibs/perf/blob/main/perf.cppm#L39)

0 commit comments

Comments
 (0)