aboutsummaryrefslogtreecommitdiffhomepage
path: root/development/pgo.html
blob: 7f8d9c8356c4655c0c0a3fcf78dc14e02073d9ff (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
<!DOCTYPE HTML>
<html lang="en" class="light sidebar-visible" dir="ltr">
    <head>
        <!-- Book generated using mdBook -->
        <meta charset="UTF-8">
        <title>pgo - Notes</title>


        <!-- Custom HTML head -->

        <meta name="description" content="">
        <meta name="viewport" content="width=device-width, initial-scale=1">
        <meta name="theme-color" content="#ffffff">

        <link rel="icon" href="../favicon.svg">
        <link rel="shortcut icon" href="../favicon.png">
        <link rel="stylesheet" href="../css/variables.css">
        <link rel="stylesheet" href="../css/general.css">
        <link rel="stylesheet" href="../css/chrome.css">
        <link rel="stylesheet" href="../css/print.css" media="print">

        <!-- Fonts -->
        <link rel="stylesheet" href="../FontAwesome/css/font-awesome.css">
        <link rel="stylesheet" href="../fonts/fonts.css">

        <!-- Highlight.js Stylesheets -->
        <link rel="stylesheet" href="../highlight.css">
        <link rel="stylesheet" href="../tomorrow-night.css">
        <link rel="stylesheet" href="../ayu-highlight.css">

        <!-- Custom theme stylesheets -->


        <!-- Provide site root to javascript -->
        <script>
            var path_to_root = "../";
            var default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? "navy" : "light";
        </script>
        <!-- Start loading toc.js asap -->
        <script src="../toc.js"></script>
    </head>
    <body>
    <div id="body-container">
        <!-- Work around some values being stored in localStorage wrapped in quotes -->
        <script>
            try {
                var theme = localStorage.getItem('mdbook-theme');
                var sidebar = localStorage.getItem('mdbook-sidebar');

                if (theme.startsWith('"') && theme.endsWith('"')) {
                    localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
                }

                if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
                    localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
                }
            } catch (e) { }
        </script>

        <!-- Set the theme before any content is loaded, prevents flash -->
        <script>
            var theme;
            try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
            if (theme === null || theme === undefined) { theme = default_theme; }
            const html = document.documentElement;
            html.classList.remove('light')
            html.classList.add(theme);
            html.classList.add("js");
        </script>

        <input type="checkbox" id="sidebar-toggle-anchor" class="hidden">

        <!-- Hide / unhide sidebar before it is displayed -->
        <script>
            var sidebar = null;
            var sidebar_toggle = document.getElementById("sidebar-toggle-anchor");
            if (document.body.clientWidth >= 1080) {
                try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
                sidebar = sidebar || 'visible';
            } else {
                sidebar = 'hidden';
            }
            sidebar_toggle.checked = sidebar === 'visible';
            html.classList.remove('sidebar-visible');
            html.classList.add("sidebar-" + sidebar);
        </script>

        <nav id="sidebar" class="sidebar" aria-label="Table of contents">
            <!-- populated by js -->
            <mdbook-sidebar-scrollbox class="sidebar-scrollbox"></mdbook-sidebar-scrollbox>
            <noscript>
                <iframe class="sidebar-iframe-outer" src="../toc.html"></iframe>
            </noscript>
            <div id="sidebar-resize-handle" class="sidebar-resize-handle">
                <div class="sidebar-resize-indicator"></div>
            </div>
        </nav>

        <div id="page-wrapper" class="page-wrapper">

            <div class="page">
                <div id="menu-bar-hover-placeholder"></div>
                <div id="menu-bar" class="menu-bar sticky">
                    <div class="left-buttons">
                        <label id="sidebar-toggle" class="icon-button" for="sidebar-toggle-anchor" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
                            <i class="fa fa-bars"></i>
                        </label>
                        <button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
                            <i class="fa fa-paint-brush"></i>
                        </button>
                        <ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
                            <li role="none"><button role="menuitem" class="theme" id="light">Light</button></li>
                            <li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
                            <li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
                            <li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
                            <li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
                        </ul>
                        <button id="search-toggle" class="icon-button" type="button" title="Search. (Shortkey: s)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="S" aria-controls="searchbar">
                            <i class="fa fa-search"></i>
                        </button>
                    </div>

                    <h1 class="menu-title">Notes</h1>

                    <div class="right-buttons">
                        <a href="../print.html" title="Print this book" aria-label="Print this book">
                            <i id="print-button" class="fa fa-print"></i>
                        </a>
                        <a href="https://github.com/johannst/notes" title="Git repository" aria-label="Git repository">
                            <i id="git-repository-button" class="fa fa-github"></i>
                        </a>

                    </div>
                </div>

                <div id="search-wrapper" class="hidden">
                    <form id="searchbar-outer" class="searchbar-outer">
                        <input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
                    </form>
                    <div id="searchresults-outer" class="searchresults-outer hidden">
                        <div id="searchresults-header" class="searchresults-header"></div>
                        <ul id="searchresults">
                        </ul>
                    </div>
                </div>

                <!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
                <script>
                    document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
                    document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
                    Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
                        link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
                    });
                </script>

                <div id="content" class="content">
                    <main>
                        <h1 id="profile-guided-optimization-pgo"><a class="header" href="#profile-guided-optimization-pgo">Profile guided optimization (pgo)</a></h1>
<p><code>pgo</code> is an optimization technique to optimize a program for its usual
workload.</p>
<p>It is applied in two phases:</p>
<ol>
<li>Collect profiling data (best with representative benchmarks).</li>
<li>Optimize program based on collected profiling data.</li>
</ol>
<p>The following simple program is used as demonstrator.</p>
<pre><code class="language-c">#include &lt;stdio.h&gt;

#define NOINLINE __attribute__((noinline))

NOINLINE void foo() { puts("foo()"); }
NOINLINE void bar() { puts("bar()"); }

int main(int argc, char *argv[]) {
  if (argc == 2) {
    foo();
  } else {
    bar();
  }
}
</code></pre>
<h2 id="clang"><a class="header" href="#clang">clang</a></h2>
<p>On the actual machine with <code>clang 15.0.7</code>, the following code is generated for
the <code>main()</code> function.</p>
<pre><code class="language-x86asm"># clang -o test test.c -O3

0000000000001160 &lt;main&gt;:
    1160:  50                   push   rax
    ; Jump if argc != 2.
    1161:  83 ff 02             cmp    edi,0x2
    1164:  75 09                jne    116f &lt;main+0xf&gt;
    ; foor() is on the hot path (fall-through).
    1166:  e8 d5 ff ff ff       call   1140 &lt;_Z3foov&gt;
    116b:  31 c0                xor    eax,eax
    116d:  59                   pop    rcx
    116e:  c3                   ret
    ; bar() is on the cold path (branch).
    116f:  e8 dc ff ff ff       call   1150 &lt;_Z3barv&gt;
    1174:  31 c0                xor    eax,eax
    1176:  59                   pop    rcx
    1177:  c3                   ret
</code></pre>
<p>The following shows how to compile with profiling instrumentation and how to
optimize the final program with the collected profiling data (<a href="https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization">llvm
pgo</a>).</p>
<p>The arguments to <code>./test</code> are chosen such that <code>9/10</code> runs call <code>bar()</code>, which
is currently on the <code>cold path</code>.</p>
<pre><code class="language-bash"># Compile test program with profiling instrumentation.
clang -o test test.cc -O3 -fprofile-instr-generate

# Collect profiling data from multiple runs.
for i in {0..10}; do
    LLVM_PROFILE_FILE="prof.clang/%p.profraw" ./test $(seq 0 $i)
done

# Merge raw profiling data into single profile data.
llvm-profdata merge -o pgo.profdata prof.clang/*.profraw

# Optimize test program with profiling data.
clang -o test test.cc -O3 -fprofile-use=pgo.profdata
</code></pre>
<blockquote>
<p>NOTE: If <code>LLVM_PROFILE_FILE</code> is not given the profile data is written to
<code>default.profraw</code> which is re-written on each run. If the <code>LLVM_PROFILE_FILE</code>
contains a <code>%m</code> in the filename, a unique integer will be generated and
consecutive runs will update the same generated profraw file,
<code>LLVM_PROFILE_FILE</code> can specify a new file every time, however that requires
more storage in general.</p>
</blockquote>
<p>After optimizing the program with the profiling data, the <code>main()</code> function
looks as follows.</p>
<pre><code class="language-x86asm">0000000000001060 &lt;main&gt;:
    1060:  50                    push   rax
    ; Jump if argc == 2.
    1061:  83 ff 02              cmp    edi,0x2
    1064:  74 09                 je     106f &lt;main+0xf&gt;
    ; bar() is on the hot path (fall-through).
    1066:  e8 e5 ff ff ff        call   1050 &lt;_Z3barv&gt;
    106b:  31 c0                 xor    eax,eax
    106d:  59                    pop    rcx
    106e:  c3                    ret
    ; foo() is on the cold path (branch).
    106f:  e8 cc ff ff ff        call   1040 &lt;_Z3foov&gt;
    1074:  31 c0                 xor    eax,eax
    1076:  59                    pop    rcx
    1077:  c3                    ret
</code></pre>
<h2 id="gcc"><a class="header" href="#gcc">gcc</a></h2>
<p>With <code>gcc 13.2.1</code> on the current machine, the optimizer puts <code>bar()</code> on the
<code>hot path</code> by default.</p>
<pre><code class="language-x86asm">0000000000001040 &lt;main&gt;:
    1040:  48 83 ec 08          sub    rsp,0x8
    ; Jump if argc == 2.
    1044:  83 ff 02             cmp    edi,0x2
    1047:  74 0c                je     1055 &lt;main+0x15&gt;
    ; bar () is on the hot path (fall-through).
    1049:  e8 22 01 00 00       call   1170 &lt;_Z3barv&gt;
    104e:  31 c0                xor    eax,eax
    1050:  48 83 c4 08          add    rsp,0x8
    1054:  c3                   ret
    ; foo() is on the cold path (branch).
    1055:  e8 06 01 00 00       call   1160 &lt;_Z3foov&gt;
    105a:  eb f2                jmp    104e &lt;main+0xe&gt;
    105c:  0f 1f 40 00          nop    DWORD PTR [rax+0x0]

</code></pre>
<p>The following shows how to compile with profiling instrumentation and how to
optimize the final program with the collected profiling data.</p>
<p>The arguments to <code>./test</code> are chosen such that <code>2/3</code> runs call <code>foo()</code>, which
is currently on the <code>cold path</code>.</p>
<pre><code class="language-bash">gcc -o test test.cc -O3 -fprofile-generate
./test 1
./test 1
./test 2 2
gcc -o test test.cc -O3 -fprofile-use
</code></pre>
<blockquote>
<p>NOTE: Consecutive runs update the generated <code>test.gcda</code> profile data file
rather than re-write it.</p>
</blockquote>
<p>After optimizing the program with the profiling data, the <code>main()</code> function</p>
<pre><code class="language-x86asm">0000000000001040 &lt;main.cold&gt;:
    ; bar() is on the cold path (branch).
    1040:  e8 05 00 00 00       call   104a &lt;_Z3barv&gt;
    1045:  e9 25 00 00 00       jmp    106f &lt;main+0xf&gt;

0000000000001060 &lt;main&gt;:
    1060:  51                   push   rcx
    ; Jump if argc != 2.
    1061:  83 ff 02             cmp    edi,0x2
    1064:  0f 85 d6 ff ff ff    jne    1040 &lt;main.cold&gt;
    ; for() is on the hot path (fall-through).
    106a:  e8 11 01 00 00       call   1180 &lt;_Z3foov&gt;
    106f:  31 c0                xor    eax,eax
    1071:  5a                   pop    rdx
    1072:  c3                   ret
</code></pre>

                    </main>

                    <nav class="nav-wrapper" aria-label="Page navigation">
                        <!-- Mobile navigation buttons -->
                            <a rel="prev" href="../development/gcov.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
                                <i class="fa fa-angle-left"></i>
                            </a>

                            <a rel="next prefetch" href="../linux/index.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
                                <i class="fa fa-angle-right"></i>
                            </a>

                        <div style="clear: both"></div>
                    </nav>
                </div>
            </div>

            <nav class="nav-wide-wrapper" aria-label="Page navigation">
                    <a rel="prev" href="../development/gcov.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
                        <i class="fa fa-angle-left"></i>
                    </a>

                    <a rel="next prefetch" href="../linux/index.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
                        <i class="fa fa-angle-right"></i>
                    </a>
            </nav>

        </div>




        <script>
            window.playground_copyable = true;
        </script>


        <script src="../elasticlunr.min.js"></script>
        <script src="../mark.min.js"></script>
        <script src="../searcher.js"></script>

        <script src="../clipboard.min.js"></script>
        <script src="../highlight.js"></script>
        <script src="../book.js"></script>

        <!-- Custom JS scripts -->


    </div>
    </body>
</html>