Skip to content

Commit aa129f7

Browse files
vdso: switch from DT_HASH to DT_GNU_HASH (aarch64)
Trying to run latest CRIU on CentOS Stream 10 or Ubuntu 24.04 (aarch64) fails like this: # criu/criu check -v4 [...] (00.096460) vdso: Parsing at ffffb2e2a000 ffffb2e2c000 (00.096539) vdso: PT_LOAD p_vaddr: 0 (00.096567) vdso: DT_STRTAB: 1d0 (00.096592) vdso: DT_SYMTAB: 128 (00.096616) vdso: DT_STRSZ: 8a (00.096640) vdso: DT_SYMENT: 18 (00.096663) Error (criu/pie-util-vdso.c:193): vdso: Not all dynamic entries are present (00.096688) Error (criu/vdso.c:627): vdso: Failed to fill self vdso symtable (00.096713) Error (criu/kerndat.c:1906): kerndat_vdso_fill_symtable failed when initializing kerndat. (00.096812) Found mmap_min_addr 0x10000 (00.096881) files stat: fs/nr_open 1073741816 (00.096908) Error (criu/crtools.c:267): Could not initialize kernel features detection. This seems to be related to the kernel (6.12.0-41.el10.aarch64). The Ubuntu user-space is running in a container on the same kernel. Looking at the kernel this seems to be related to: commit 48f6430505c0b0498ee9020ce3cf9558b1caaaeb Author: Fangrui Song <i@maskray.me> Date: Thu Jul 18 10:34:23 2024 -0700 arm64/vdso: Remove --hash-style=sysv glibc added support for .gnu.hash in 2006 and .hash has been obsoleted for more than one decade in many Linux distributions. Using --hash-style=sysv might imply unaddressed issues and confuse readers. Just drop the option and rely on the linker default, which is likely "both", or "gnu" when the distribution really wants to eliminate sysv hash overhead. Similar to commit 6b7e26547fad ("x86/vdso: Emit a GNU hash"). The commit basically does: -ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \ +ldflags-y := -shared -soname=linux-vdso.so.1 \ Which results in only a GNU hash being added to the ELF header. This change has been merged with 6.11. Looking at the referenced x86 commit: commit 6b7e26547fad7ace3dcb27a5babd2317fb9d1e12 Author: Andy Lutomirski <luto@amacapital.net> Date: Thu Aug 6 14:45:45 2015 -0700 x86/vdso: Emit a GNU hash Some dynamic loaders may be slightly faster if a GNU hash is available. Strangely, this seems to have no effect at all on the vdso size. This is unlikely to have any measurable effect on the time it takes to resolve vdso symbols (since there are so few of them). In some contexts, it can be a win for a different reason: if every DSO has a GNU hash section, then libc can avoid calculating SysV hashes at all. Both musl and glibc appear to have this optimization. It's plausible that this breaks some ancient glibc version. If so, then, depending on what glibc versions break, we could either require COMPAT_VDSO for them or consider reverting. Which is also a really simple change: -VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \ +VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=both) \ The big difference here is that for x86 both hash sections are generated. For aarch64 only the newer GNU hash is generated. That is why we only see this error on kernel >= 6.11 and aarch64. Changing from DT_HASH to DT_GNU_HASH seems to work on aarch64. The test suite runs without any errors. Unfortunately I am not aware of all implication of this change and if a successful test suite run means that it still works. Looking at the kernel I see following hash styles for the VDSO: aarch64: not specified (only GNU hash style) arm: --hash-style=sysv loongarch: --hash-style=sysv mips: --hash-style=sysv powerpc: --hash-style=both riscv: --hash-style=both s390: --hash-style=both x86: --hash-style=both Only aarch64 on kernels >= 6.11 is a problem right now, because all other platforms provide the old style hashing. Signed-off-by: Adrian Reber <areber@redhat.com> Co-developed-by: Dmitry Safonov <dima@arista.com> Co-authored-by: Dmitry Safonov <dima@arista.com> Signed-off-by: Dmitry Safonov <dima@arista.com> Signed-off-by: Adrian Reber <areber@redhat.com>
1 parent 343e731 commit aa129f7

File tree

1 file changed

+194
-46
lines changed

1 file changed

+194
-46
lines changed

criu/pie/util-vdso.c

Lines changed: 194 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <fcntl.h>
66
#include <errno.h>
77
#include <stdint.h>
8+
#include <stdbool.h>
89

910
#include <sys/types.h>
1011
#include <sys/stat.h>
@@ -48,10 +49,25 @@ static bool __ptr_struct_oob(uintptr_t ptr, size_t struct_size, uintptr_t start,
4849
return __ptr_oob(ptr, start, size) || __ptr_struct_end_oob(ptr, struct_size, start, size);
4950
}
5051

52+
/* Local strlen implementation */
53+
static size_t __strlen(const char *str)
54+
{
55+
const char *ptr;
56+
57+
if (!str)
58+
return 0;
59+
60+
ptr = str;
61+
while (*ptr != '\0')
62+
ptr++;
63+
64+
return ptr - str;
65+
}
66+
5167
/*
5268
* Elf hash, see format specification.
5369
*/
54-
static unsigned long elf_hash(const unsigned char *name)
70+
static unsigned long elf_sysv_hash(const unsigned char *name)
5571
{
5672
unsigned long h = 0, g;
5773

@@ -65,6 +81,15 @@ static unsigned long elf_hash(const unsigned char *name)
6581
return h;
6682
}
6783

84+
/* * The GNU hash format. Taken from glibc. */
85+
static unsigned long elf_gnu_hash(const unsigned char *name)
86+
{
87+
unsigned long h = 5381;
88+
for (unsigned char c = *name; c != '\0'; c = *++name)
89+
h = h * 33 + c;
90+
return h;
91+
}
92+
6893
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
6994
#define BORD ELFDATA2MSB /* 0x02 */
7095
#else
@@ -149,11 +174,14 @@ static int parse_elf_phdr(uintptr_t mem, size_t size, Phdr_t **dynamic, Phdr_t *
149174
* Output parameters are:
150175
* @dyn_strtab - address of the symbol table
151176
* @dyn_symtab - address of the string table section
152-
* @dyn_hash - address of the symbol hash table
177+
* @dyn_hash - address of the symbol hash table
178+
* @use_gnu_hash - the format of hash DT_HASH or DT_GNU_HASH
153179
*/
154-
static int parse_elf_dynamic(uintptr_t mem, size_t size, Phdr_t *dynamic, Dyn_t **dyn_strtab, Dyn_t **dyn_symtab,
155-
Dyn_t **dyn_hash)
180+
static int parse_elf_dynamic(uintptr_t mem, size_t size, Phdr_t *dynamic,
181+
Dyn_t **dyn_strtab, Dyn_t **dyn_symtab,
182+
Dyn_t **dyn_hash, bool *use_gnu_hash)
156183
{
184+
Dyn_t *dyn_gnu_hash = NULL, *dyn_sysv_hash = NULL;
157185
Dyn_t *dyn_syment = NULL;
158186
Dyn_t *dyn_strsz = NULL;
159187
uintptr_t addr;
@@ -184,16 +212,52 @@ static int parse_elf_dynamic(uintptr_t mem, size_t size, Phdr_t *dynamic, Dyn_t
184212
dyn_syment = d;
185213
pr_debug("DT_SYMENT: %lx\n", (unsigned long)d->d_un.d_val);
186214
} else if (d->d_tag == DT_HASH) {
187-
*dyn_hash = d;
215+
dyn_sysv_hash = d;
188216
pr_debug("DT_HASH: %lx\n", (unsigned long)d->d_un.d_ptr);
217+
} else if (d->d_tag == DT_GNU_HASH) {
218+
/*
219+
* This is complicated.
220+
*
221+
* Looking at the Linux kernel source following can be seen
222+
* in regards which hashing style the VDSO uses on each arch:
223+
*
224+
* aarch64: not specified (depends on linker, can be
225+
* only GNU hash style)
226+
* arm: --hash-style=sysv
227+
* loongarch: --hash-style=sysv
228+
* mips: --hash-style=sysv
229+
* powerpc: --hash-style=both
230+
* riscv: --hash-style=both
231+
* s390: --hash-style=both
232+
* x86: --hash-style=both
233+
*
234+
* Some architectures are using both hash-styles, that
235+
* is the easiest for CRIU. Some architectures are only
236+
* using the old style (sysv), that is what CRIU supports.
237+
*
238+
* Starting with Linux 6.11, aarch64 unfortunately decided
239+
* to switch from '--hash-style=sysv' to ''. Specifying
240+
* nothing unfortunately may mean GNU hash style only and not
241+
* 'both' (depending on the linker).
242+
*/
243+
dyn_gnu_hash = d;
244+
pr_debug("DT_GNU_HASH: %lx\n", (unsigned long)d->d_un.d_ptr);
189245
}
190246
}
191247

192-
if (!*dyn_strtab || !*dyn_symtab || !dyn_strsz || !dyn_syment || !*dyn_hash) {
248+
if (!*dyn_strtab || !*dyn_symtab || !dyn_strsz || !dyn_syment ||
249+
(!dyn_gnu_hash && !dyn_sysv_hash)) {
193250
pr_err("Not all dynamic entries are present\n");
194251
return -EINVAL;
195252
}
196253

254+
/*
255+
* Prefer DT_HASH over DT_GNU_HASH as it's been more tested and
256+
* as a result more stable.
257+
*/
258+
*use_gnu_hash = !dyn_sysv_hash;
259+
*dyn_hash = dyn_sysv_hash ?: dyn_gnu_hash;
260+
197261
return 0;
198262

199263
err_oob:
@@ -208,60 +272,138 @@ typedef unsigned long Hash_t;
208272
typedef Word_t Hash_t;
209273
#endif
210274

211-
static void parse_elf_symbols(uintptr_t mem, size_t size, Phdr_t *load, struct vdso_symtable *t,
212-
uintptr_t dynsymbol_names, Hash_t *hash, Dyn_t *dyn_symtab)
275+
static bool elf_symbol_match(uintptr_t mem, size_t size,
276+
uintptr_t dynsymbol_names, Sym_t *sym,
277+
const char *symbol, const size_t vdso_symbol_length)
213278
{
214-
ARCH_VDSO_SYMBOLS_LIST
279+
uintptr_t addr = (uintptr_t)sym;
280+
char *name;
215281

216-
const char *vdso_symbols[VDSO_SYMBOL_MAX] = { ARCH_VDSO_SYMBOLS };
217-
const size_t vdso_symbol_length = sizeof(t->symbols[0].name) - 1;
282+
if (__ptr_struct_oob(addr, sizeof(Sym_t), mem, size))
283+
return false;
218284

219-
Hash_t nbucket, nchain;
220-
Hash_t *bucket, *chain;
285+
if (ELF_ST_TYPE(sym->st_info) != STT_FUNC && ELF_ST_BIND(sym->st_info) != STB_GLOBAL)
286+
return false;
221287

222-
unsigned int i, j, k;
223-
uintptr_t addr;
288+
addr = dynsymbol_names + sym->st_name;
289+
if (__ptr_struct_oob(addr, vdso_symbol_length, mem, size))
290+
return false;
291+
name = (void *)addr;
224292

225-
nbucket = hash[0];
226-
nchain = hash[1];
227-
bucket = &hash[2];
228-
chain = &hash[nbucket + 2];
293+
return !std_strncmp(name, symbol, vdso_symbol_length);
294+
}
229295

230-
pr_debug("nbucket %lx nchain %lx bucket %lx chain %lx\n", (long)nbucket, (long)nchain, (unsigned long)bucket,
231-
(unsigned long)chain);
232296

233-
for (i = 0; i < VDSO_SYMBOL_MAX; i++) {
234-
const char *symbol = vdso_symbols[i];
235-
k = elf_hash((const unsigned char *)symbol);
297+
static unsigned long elf_symbol_lookup(uintptr_t mem, size_t size,
298+
const char *symbol, uint32_t symbol_hash, unsigned int sym_off,
299+
uintptr_t dynsymbol_names, Dyn_t *dyn_symtab, Phdr_t *load,
300+
Hash_t nbucket, Hash_t nchain, Hash_t *bucket, Hash_t *chain,
301+
const size_t vdso_symbol_length, bool use_gnu_hash)
302+
{
303+
unsigned int j;
304+
uintptr_t addr;
236305

237-
for (j = bucket[k % nbucket]; j < nchain && j != STN_UNDEF; j = chain[j]) {
238-
Sym_t *sym;
239-
char *name;
306+
j = bucket[symbol_hash % nbucket];
307+
if (j == STN_UNDEF)
308+
return 0;
309+
310+
addr = mem + dyn_symtab->d_un.d_ptr - load->p_vaddr;
311+
312+
if (use_gnu_hash) {
313+
uint32_t *h = bucket + nbucket + (j - sym_off);
314+
uint32_t hash_val;
315+
316+
symbol_hash |= 1;
317+
do {
318+
Sym_t *sym = (void *)addr + sizeof(Sym_t) * j;
319+
320+
hash_val = *h++;
321+
if ((hash_val | 1) == symbol_hash &&
322+
elf_symbol_match(mem, size, dynsymbol_names, sym,
323+
symbol, vdso_symbol_length))
324+
return sym->st_value;
325+
j++;
326+
} while (!(hash_val & 1));
327+
} else {
328+
for (; j < nchain && j != STN_UNDEF; j = chain[j]) {
329+
Sym_t *sym = (void *)addr + sizeof(Sym_t) * j;
330+
331+
if (elf_symbol_match(mem, size, dynsymbol_names, sym,
332+
symbol, vdso_symbol_length))
333+
return sym->st_value;
334+
}
335+
}
336+
return 0;
337+
}
240338

241-
addr = mem + dyn_symtab->d_un.d_ptr - load->p_vaddr;
339+
static int parse_elf_symbols(uintptr_t mem, size_t size, Phdr_t *load,
340+
struct vdso_symtable *t, uintptr_t dynsymbol_names,
341+
Hash_t *hash, Dyn_t *dyn_symtab, bool use_gnu_hash)
342+
{
343+
ARCH_VDSO_SYMBOLS_LIST
242344

243-
addr += sizeof(Sym_t) * j;
244-
if (__ptr_struct_oob(addr, sizeof(Sym_t), mem, size))
245-
continue;
246-
sym = (void *)addr;
345+
const char *vdso_symbols[VDSO_SYMBOL_MAX] = { ARCH_VDSO_SYMBOLS };
346+
const size_t vdso_symbol_length = sizeof(t->symbols[0].name) - 1;
247347

248-
if (ELF_ST_TYPE(sym->st_info) != STT_FUNC && ELF_ST_BIND(sym->st_info) != STB_GLOBAL)
249-
continue;
348+
Hash_t nbucket, nchain;
349+
Hash_t *bucket, *chain;
250350

251-
addr = dynsymbol_names + sym->st_name;
252-
if (__ptr_struct_oob(addr, vdso_symbol_length, mem, size))
253-
continue;
254-
name = (void *)addr;
351+
unsigned int i, sym_off;
352+
353+
unsigned long (*elf_hash)(const unsigned char *);
354+
355+
if (use_gnu_hash) {
356+
uint32_t *gnu_hash = (uint32_t *)hash;
357+
uint32_t bloom_sz;
358+
size_t *bloom;
359+
360+
nbucket = gnu_hash[0];
361+
sym_off = gnu_hash[1];
362+
bloom_sz = gnu_hash[2];
363+
bloom = (size_t *)&gnu_hash[4];
364+
bucket = (Hash_t *)(&bloom[bloom_sz]);
365+
elf_hash = &elf_gnu_hash;
366+
pr_debug("nbucket %lx sym_off %lx bloom_sz %lx bloom %lx bucket %lx\n",
367+
(unsigned long)nbucket, (unsigned long)sym_off,
368+
(unsigned long)bloom_sz, (unsigned long)bloom,
369+
(unsigned long)bucket);
370+
} else {
371+
nbucket = hash[0];
372+
nchain = hash[1];
373+
bucket = &hash[2];
374+
chain = &hash[nbucket + 2];
375+
elf_hash = &elf_sysv_hash;
376+
pr_debug("nbucket %lx nchain %lx bucket %lx chain %lx\n",
377+
(unsigned long)nbucket, (unsigned long)nchain,
378+
(unsigned long)bucket, (unsigned long)chain);
379+
}
255380

256-
if (std_strncmp(name, symbol, vdso_symbol_length))
257-
continue;
258381

259-
/* XXX: provide strncpy() implementation for PIE */
260-
memcpy(t->symbols[i].name, name, vdso_symbol_length);
261-
t->symbols[i].offset = (unsigned long)sym->st_value - load->p_vaddr;
262-
break;
382+
for (i = 0; i < VDSO_SYMBOL_MAX; i++) {
383+
const char *symbol = vdso_symbols[i];
384+
unsigned long addr, symbol_hash;
385+
const size_t symbol_length = __strlen(symbol);
386+
387+
symbol_hash = elf_hash((const unsigned char *)symbol);
388+
addr = elf_symbol_lookup(mem, size, symbol, symbol_hash,
389+
sym_off, dynsymbol_names, dyn_symtab, load,
390+
nbucket, nchain, bucket, chain,
391+
vdso_symbol_length, use_gnu_hash);
392+
pr_debug("symbol %s at address %lx\n", symbol, addr);
393+
if (!addr)
394+
continue;
395+
396+
/* XXX: provide strncpy() implementation for PIE */
397+
if (symbol_length > vdso_symbol_length) {
398+
pr_err("strlen(%s) %zd, only %zd bytes available\n",
399+
symbol, symbol_length, vdso_symbol_length);
400+
return -EINVAL;
263401
}
402+
memcpy(t->symbols[i].name, symbol, symbol_length);
403+
t->symbols[i].offset = addr - load->p_vaddr;
264404
}
405+
406+
return 0;
265407
}
266408

267409
int vdso_fill_symtable(uintptr_t mem, size_t size, struct vdso_symtable *t)
@@ -271,6 +413,7 @@ int vdso_fill_symtable(uintptr_t mem, size_t size, struct vdso_symtable *t)
271413
Dyn_t *dyn_symtab = NULL;
272414
Dyn_t *dyn_hash = NULL;
273415
Hash_t *hash = NULL;
416+
bool use_gnu_hash;
274417

275418
uintptr_t dynsymbol_names;
276419
uintptr_t addr;
@@ -296,7 +439,8 @@ int vdso_fill_symtable(uintptr_t mem, size_t size, struct vdso_symtable *t)
296439
* needed. Note that we're interested in a small set of tags.
297440
*/
298441

299-
ret = parse_elf_dynamic(mem, size, dynamic, &dyn_strtab, &dyn_symtab, &dyn_hash);
442+
ret = parse_elf_dynamic(mem, size, dynamic, &dyn_strtab, &dyn_symtab,
443+
&dyn_hash, &use_gnu_hash);
300444
if (ret < 0)
301445
return ret;
302446

@@ -310,7 +454,11 @@ int vdso_fill_symtable(uintptr_t mem, size_t size, struct vdso_symtable *t)
310454
goto err_oob;
311455
hash = (void *)addr;
312456

313-
parse_elf_symbols(mem, size, load, t, dynsymbol_names, hash, dyn_symtab);
457+
ret = parse_elf_symbols(mem, size, load, t, dynsymbol_names, hash, dyn_symtab,
458+
use_gnu_hash);
459+
460+
if (ret <0)
461+
return ret;
314462

315463
return 0;
316464

0 commit comments

Comments
 (0)