From 08d0aa0bacbb0df9a79241e0f19fbf7656b1ca65 Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Wed, 23 Nov 2022 10:26:18 -0800 Subject: [PATCH 01/12] Unroll memmove like memset --- src/mono/wasm/runtime/jiterpreter-support.ts | 137 +++++++++++++------ src/mono/wasm/runtime/jiterpreter.ts | 6 +- 2 files changed, 102 insertions(+), 41 deletions(-) diff --git a/src/mono/wasm/runtime/jiterpreter-support.ts b/src/mono/wasm/runtime/jiterpreter-support.ts index ceea427a1d5c98..7af87c3bb7e556 100644 --- a/src/mono/wasm/runtime/jiterpreter-support.ts +++ b/src/mono/wasm/runtime/jiterpreter-support.ts @@ -7,7 +7,8 @@ import { WasmOpcode } from "./jiterpreter-opcodes"; import cwraps from "./cwraps"; export const maxFailures = 2, - maxMemsetSize = 64; + maxMemsetSize = 64, + maxMemmoveSize = 64; // uint16 export declare interface MintOpcodePtr extends NativePointer { @@ -661,13 +662,14 @@ export function try_append_memset_fast (builder: WasmBuilder, localOffset: numbe if (count >= maxMemsetSize) return false; + const destLocal = destOnStack ? "math_lhs32" : "pLocals"; if (destOnStack) builder.local("math_lhs32", WasmOpcode.set_local); let offset = destOnStack ? 0 : localOffset; // Do blocks of 8-byte sets first for smaller/faster code while (count >= 8) { - builder.local(destOnStack ? "math_lhs32" : "pLocals"); + builder.local(destLocal); builder.i52_const(0); builder.appendU8(WasmOpcode.i64_store); builder.appendMemarg(offset, 0); @@ -677,7 +679,7 @@ export function try_append_memset_fast (builder: WasmBuilder, localOffset: numbe // Then set the remaining 0-7 bytes while (count >= 1) { - builder.local(destOnStack ? "math_lhs32" : "pLocals"); + builder.local(destLocal); builder.i32_const(0); let localCount = count % 4; switch (localCount) { @@ -716,45 +718,100 @@ export function append_memset_dest (builder: WasmBuilder, value: number, count: builder.appendU8(0); } +export function try_append_memmove_fast ( + builder: WasmBuilder, destLocalOffset: number, srcLocalOffset: number, + count: number, addressesOnStack: boolean +) { + let destLocal = "math_lhs32", srcLocal = "math_rhs32"; + + if (count <= 0) { + if (addressesOnStack) { + builder.appendU8(WasmOpcode.drop); + builder.appendU8(WasmOpcode.drop); + } + return true; + } + + if (count >= maxMemmoveSize) + return false; + + if (addressesOnStack) { + builder.local(srcLocal, WasmOpcode.set_local); + builder.local(destLocal, WasmOpcode.set_local); + } else { + destLocal = srcLocal = "pLocals"; + } + + let destOffset = addressesOnStack ? 0 : destLocalOffset, + srcOffset = addressesOnStack ? 0 : srcLocalOffset; + + // Do blocks of 8-byte copies first for smaller/faster code + while (count >= 8) { + builder.local(destLocal); + builder.local(srcLocal); + builder.appendU8(WasmOpcode.i64_load); + builder.appendMemarg(srcOffset, 0); + builder.appendU8(WasmOpcode.i64_store); + builder.appendMemarg(destOffset, 0); + destOffset += 8; + srcOffset += 8; + count -= 8; + } + + // Then copy the remaining 0-7 bytes + while (count >= 1) { + let loadOp : WasmOpcode, storeOp : WasmOpcode; + let localCount = count % 4; + switch (localCount) { + case 0: + // since we did %, 4 bytes turned into 0. gotta fix that up to avoid infinite loop + localCount = 4; + loadOp = WasmOpcode.i32_load; + storeOp = WasmOpcode.i32_store; + break; + default: + case 1: + localCount = 1; // silence tsc + loadOp = WasmOpcode.i32_load8_s; + storeOp = WasmOpcode.i32_store8; + break; + case 3: + case 2: + // For 3 bytes we just want to do a 2 write then a 1 + localCount = 2; + loadOp = WasmOpcode.i32_load16_s; + storeOp = WasmOpcode.i32_store16; + break; + + } + + builder.local(destLocal); + builder.local(srcLocal); + builder.appendU8(loadOp); + builder.appendMemarg(srcOffset, 0); + builder.appendU8(storeOp); + builder.appendMemarg(destOffset, 0); + srcOffset += localCount; + destOffset += localCount; + count -= localCount; + } + + return true; +} + // expects dest then source to have been pushed onto wasm stack export function append_memmove_dest_src (builder: WasmBuilder, count: number) { - // FIXME: Unroll this like memset, since we now know that the memory ops generate expensive - // function calls - switch (count) { - case 1: - builder.appendU8(WasmOpcode.i32_load8_u); - builder.appendMemarg(0, 0); - builder.appendU8(WasmOpcode.i32_store8); - builder.appendMemarg(0, 0); - return true; - case 2: - builder.appendU8(WasmOpcode.i32_load16_u); - builder.appendMemarg(0, 0); - builder.appendU8(WasmOpcode.i32_store16); - builder.appendMemarg(0, 0); - return true; - case 4: - builder.appendU8(WasmOpcode.i32_load); - builder.appendMemarg(0, 0); - builder.appendU8(WasmOpcode.i32_store); - builder.appendMemarg(0, 0); - return true; - case 8: - builder.appendU8(WasmOpcode.i64_load); - builder.appendMemarg(0, 0); - builder.appendU8(WasmOpcode.i64_store); - builder.appendMemarg(0, 0); - return true; - default: - // spec: pop n, pop s, pop d, copy n bytes from s to d - builder.i32_const(count); - // great encoding isn't it - builder.appendU8(WasmOpcode.PREFIX_sat); - builder.appendU8(10); - builder.appendU8(0); - builder.appendU8(0); - return true; - } + if (try_append_memmove_fast(builder, 0, 0, count, true)) + return true; + + // spec: pop n, pop s, pop d, copy n bytes from s to d + builder.i32_const(count); + // great encoding isn't it + builder.appendU8(WasmOpcode.PREFIX_sat); + builder.appendU8(10); + builder.appendU8(0); + builder.appendU8(0); + return true; } export function recordFailure () : void { diff --git a/src/mono/wasm/runtime/jiterpreter.ts b/src/mono/wasm/runtime/jiterpreter.ts index 6b7addc8e9c6e8..9c91114512ff14 100644 --- a/src/mono/wasm/runtime/jiterpreter.ts +++ b/src/mono/wasm/runtime/jiterpreter.ts @@ -15,7 +15,8 @@ import { MintOpcodePtr, WasmValtype, WasmBuilder, addWasmFunctionPointer, copyIntoScratchBuffer, _now, elapsedTimes, append_memset_dest, append_memmove_dest_src, counters, getRawCwrap, importDef, - JiterpreterOptions, getOptions, recordFailure, try_append_memset_fast + JiterpreterOptions, getOptions, recordFailure, try_append_memset_fast, + try_append_memmove_fast } from "./jiterpreter-support"; // Controls miscellaneous diagnostic output. @@ -1350,6 +1351,9 @@ function append_memset_local (builder: WasmBuilder, localOffset: number, value: } function append_memmove_local_local (builder: WasmBuilder, destLocalOffset: number, sourceLocalOffset: number, count: number) { + if (try_append_memmove_fast(builder, destLocalOffset, sourceLocalOffset, count, false)) + return true; + // spec: pop n, pop s, pop d, copy n bytes from s to d append_ldloca(builder, destLocalOffset); append_ldloca(builder, sourceLocalOffset); From f1655f20a4977fb20d9c3ab89f90b2ad56098878 Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Wed, 23 Nov 2022 11:05:59 -0800 Subject: [PATCH 02/12] Add runtime option that causes interpreter and jiterpreter tiering mechanisms to activate immediately --- src/mono/mono/mini/interp/interp.c | 6 +++--- src/mono/mono/mini/interp/jiterpreter.c | 2 +- src/mono/mono/mini/interp/tiering.h | 1 + src/mono/mono/utils/options-def.h | 11 ++++++----- src/mono/wasm/runtime/jiterpreter-support.ts | 3 +++ src/mono/wasm/runtime/jiterpreter.ts | 6 ++++-- 6 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/mono/mono/mini/interp/interp.c b/src/mono/mono/mini/interp/interp.c index 0a2b6e2770285d..daf7995637c5d1 100644 --- a/src/mono/mono/mini/interp/interp.c +++ b/src/mono/mono/mini/interp/interp.c @@ -2672,7 +2672,7 @@ do_jit_call (ThreadContext *context, stackval *ret_sp, stackval *sp, InterpFrame goto epilogue; } else { int count = cinfo->hit_count; - if (count == mono_opt_jiterpreter_jit_call_trampoline_hit_count) { + if (count == (mono_opt_interp_tier_instantly ? 1 : mono_opt_jiterpreter_jit_call_trampoline_hit_count)) { void *fn = cinfo->no_wrapper ? cinfo->addr : cinfo->wrapper; mono_interp_jit_wasm_jit_call_trampoline ( rmethod, cinfo, fn, rmethod->hasthis, rmethod->param_count, @@ -7250,7 +7250,7 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; MINT_IN_CASE(MINT_TIER_ENTER_METHOD) { frame->imethod->entry_count++; - if (frame->imethod->entry_count > INTERP_TIER_ENTRY_LIMIT && !clause_args) + if (frame->imethod->entry_count > (mono_opt_interp_tier_instantly ? INTERP_TIER_ENTRY_LIMIT_LOW : INTERP_TIER_ENTRY_LIMIT) && !clause_args) ip = mono_interp_tier_up_frame_enter (frame, context); else ip++; @@ -7258,7 +7258,7 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; } MINT_IN_CASE(MINT_TIER_PATCHPOINT) { frame->imethod->entry_count++; - if (frame->imethod->entry_count > INTERP_TIER_ENTRY_LIMIT && !clause_args) + if (frame->imethod->entry_count > (mono_opt_interp_tier_instantly ? INTERP_TIER_ENTRY_LIMIT_LOW : INTERP_TIER_ENTRY_LIMIT) && !clause_args) ip = mono_interp_tier_up_frame_patchpoint (frame, context, ip [1]); else ip += 2; diff --git a/src/mono/mono/mini/interp/jiterpreter.c b/src/mono/mono/mini/interp/jiterpreter.c index 071040ee76b502..fd387eead78b6a 100644 --- a/src/mono/mono/mini/interp/jiterpreter.c +++ b/src/mono/mono/mini/interp/jiterpreter.c @@ -140,7 +140,7 @@ mono_jiterp_increase_entry_count (void *_imethod) { InterpMethod *imethod = (InterpMethod*)_imethod; imethod->entry_count++; // Return whether the trace should bail out because the method needs to be tiered - return imethod->entry_count >= INTERP_TIER_ENTRY_LIMIT; + return imethod->entry_count >= (mono_opt_interp_tier_instantly ? INTERP_TIER_ENTRY_LIMIT_LOW : INTERP_TIER_ENTRY_LIMIT); } EMSCRIPTEN_KEEPALIVE void* diff --git a/src/mono/mono/mini/interp/tiering.h b/src/mono/mono/mini/interp/tiering.h index dbd7da87ecd4d9..d056b95d3d0a2b 100644 --- a/src/mono/mono/mini/interp/tiering.h +++ b/src/mono/mono/mini/interp/tiering.h @@ -3,6 +3,7 @@ #include "interp-internals.h" +#define INTERP_TIER_ENTRY_LIMIT_LOW 2 #define INTERP_TIER_ENTRY_LIMIT 1000 void diff --git a/src/mono/mono/utils/options-def.h b/src/mono/mono/utils/options-def.h index bf9cc2c890a5fb..4e7c239ac365e1 100644 --- a/src/mono/mono/utils/options-def.h +++ b/src/mono/mono/utils/options-def.h @@ -60,6 +60,7 @@ DEFINE_BOOL_READONLY(readonly_flag, "readonly-flag", FALSE, "Example") DEFINE_BOOL(wasm_exceptions, "wasm-exceptions", FALSE, "Enable codegen for WASM exceptions") DEFINE_BOOL(wasm_gc_safepoints, "wasm-gc-safepoints", FALSE, "Use GC safepoints on WASM") DEFINE_BOOL(aot_lazy_assembly_load, "aot-lazy-assembly-load", FALSE, "Load assemblies referenced by AOT images lazily") +DEFINE_BOOL(interp_tier_instantly, "interp-tier-instantly", TRUE, "Immediately tier up and optimize interpreter methods") #if HOST_BROWSER @@ -67,11 +68,11 @@ DEFINE_BOOL(aot_lazy_assembly_load, "aot-lazy-assembly-load", FALSE, "Load assem // and wasm modules between threads. before these can be enabled we need to implement all that #ifdef DISABLE_THREADS // traces_enabled controls whether the jiterpreter will JIT individual interpreter opcode traces -DEFINE_BOOL(jiterpreter_traces_enabled, "jiterpreter-traces-enabled", FALSE, "JIT interpreter opcode traces into WASM") +DEFINE_BOOL(jiterpreter_traces_enabled, "jiterpreter-traces-enabled", TRUE, "JIT interpreter opcode traces into WASM") // interp_entry_enabled controls whether specialized interp_entry wrappers will be jitted -DEFINE_BOOL(jiterpreter_interp_entry_enabled, "jiterpreter-interp-entry-enabled", FALSE, "JIT specialized WASM interp_entry wrappers") +DEFINE_BOOL(jiterpreter_interp_entry_enabled, "jiterpreter-interp-entry-enabled", TRUE, "JIT specialized WASM interp_entry wrappers") // jit_call_enabled controls whether do_jit_call will use specialized trampolines for hot call sites -DEFINE_BOOL(jiterpreter_jit_call_enabled, "jiterpreter-jit-call-enabled", FALSE, "JIT specialized WASM do_jit_call trampolines") +DEFINE_BOOL(jiterpreter_jit_call_enabled, "jiterpreter-jit-call-enabled", TRUE, "JIT specialized WASM do_jit_call trampolines") #else // traces_enabled controls whether the jiterpreter will JIT individual interpreter opcode traces DEFINE_BOOL_READONLY(jiterpreter_traces_enabled, "jiterpreter-traces-enabled", FALSE, "JIT interpreter opcode traces into WASM") @@ -95,7 +96,7 @@ DEFINE_BOOL(jiterpreter_call_resume_enabled, "jiterpreter-call-resume-enabled", // stats for options like estimateHeat, but raises overhead. DEFINE_BOOL(jiterpreter_disable_heuristic, "jiterpreter-disable-heuristic", FALSE, "Always insert trace entry points for more accurate statistics") // Automatically prints stats at app exit or when jiterpreter_dump_stats is called -DEFINE_BOOL(jiterpreter_stats_enabled, "jiterpreter-stats-enabled", FALSE, "Automatically print jiterpreter statistics") +DEFINE_BOOL(jiterpreter_stats_enabled, "jiterpreter-stats-enabled", TRUE, "Automatically print jiterpreter statistics") // Continue counting hits for traces that fail to compile and use it to estimate // the relative importance of the opcode that caused them to abort DEFINE_BOOL(jiterpreter_estimate_heat, "jiterpreter-estimate-heat", FALSE, "Maintain accurate hit count for all trace entry points") @@ -110,7 +111,7 @@ DEFINE_INT(jiterpreter_minimum_trace_hit_count, "jiterpreter-minimum-trace-hit-c // After a do_jit_call call site is hit this many times, we will queue it to be jitted DEFINE_INT(jiterpreter_jit_call_trampoline_hit_count, "jiterpreter-jit-call-hit-count", 3000, "Queue specialized do_jit_call trampoline for JIT after this many hits") // After a do_jit_call call site is hit this many times without being jitted, we will flush the JIT queue -DEFINE_INT(jiterpreter_jit_call_queue_flush_threshold, "jiterpreter-jit-call-queue-flush-threshold", 10000, "Flush the do_jit_call JIT queue after an unJITted call site has this many hits") +DEFINE_INT(jiterpreter_jit_call_queue_flush_threshold, "jiterpreter-jit-call-queue-flush-threshold", 15000, "Flush the do_jit_call JIT queue after an unJITted call site has this many hits") #endif // HOST_BROWSER /* Cleanup */ diff --git a/src/mono/wasm/runtime/jiterpreter-support.ts b/src/mono/wasm/runtime/jiterpreter-support.ts index 7af87c3bb7e556..2589d36d59a54b 100644 --- a/src/mono/wasm/runtime/jiterpreter-support.ts +++ b/src/mono/wasm/runtime/jiterpreter-support.ts @@ -857,6 +857,8 @@ export type JiterpreterOptions = { countBailouts: boolean; // Dump the wasm blob for all compiled traces dumpTraces: boolean; + // Instantly tiers up methods and traces + tierInstantly: boolean; minimumTraceLength: number; minimumTraceHitCount: number; } @@ -875,6 +877,7 @@ const optionNames : { [jsName: string] : string } = { "dumpTraces": "jiterpreter-dump-traces", "minimumTraceLength": "jiterpreter-minimum-trace-length", "minimumTraceHitCount": "jiterpreter-minimum-trace-hit-count", + "tierInstantly": "interp-tier-instantly", }; let optionsVersion = -1; diff --git a/src/mono/wasm/runtime/jiterpreter.ts b/src/mono/wasm/runtime/jiterpreter.ts index 9c91114512ff14..78a8e143d7d1e2 100644 --- a/src/mono/wasm/runtime/jiterpreter.ts +++ b/src/mono/wasm/runtime/jiterpreter.ts @@ -2913,9 +2913,11 @@ export function mono_interp_tier_prepare_jiterpreter ( else info.hitCount++; - if (info.hitCount < mostRecentOptions.minimumTraceHitCount) + const minHitCount = mostRecentOptions.tierInstantly ? 2 : mostRecentOptions.minimumTraceHitCount; + + if (info.hitCount < minHitCount) return JITERPRETER_TRAINING; - else if (info.hitCount === mostRecentOptions.minimumTraceHitCount) { + else if (info.hitCount === minHitCount) { counters.traceCandidates++; let methodFullName: string | undefined; if (trapTraceErrors || mostRecentOptions.estimateHeat || (instrumentedMethodNames.length > 0)) { From 17ae775aa3284ba4673b9f37e5d64642a9f41ade Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Wed, 23 Nov 2022 12:16:31 -0800 Subject: [PATCH 03/12] Implement floating point relops in WASM using a C function to preserve semantics --- src/mono/mono/mini/interp/jiterpreter.c | 37 +++++++++++++++++++ src/mono/wasm/runtime/jiterpreter.ts | 47 +++++++++++++++++++++++++ 2 files changed, 84 insertions(+) diff --git a/src/mono/mono/mini/interp/jiterpreter.c b/src/mono/mono/mini/interp/jiterpreter.c index fd387eead78b6a..99d2cd154e4f7f 100644 --- a/src/mono/mono/mini/interp/jiterpreter.c +++ b/src/mono/mono/mini/interp/jiterpreter.c @@ -35,6 +35,7 @@ void jiterp_preserve_module (void); #include "interp-intrins.h" #include "tiering.h" +#include #include #include #include @@ -437,6 +438,42 @@ mono_jiterp_conv_ovf (void *dest, void *src, int opcode) { return 0; } +#define JITERP_RELOP(opcode, type, op, noorder) \ + case opcode: \ + { \ + if (mono_isunordered (lhs, rhs)) \ + return noorder; \ + else \ + return ((type)lhs op (type)rhs); \ + } + +EMSCRIPTEN_KEEPALIVE int +mono_jiterp_relop_fp (double lhs, double rhs, int opcode) { + switch (opcode) { + JITERP_RELOP(MINT_CEQ_R4, float, ==, 0); + JITERP_RELOP(MINT_CEQ_R8, double, ==, 0); + JITERP_RELOP(MINT_CNE_R4, float, !=, 1); + JITERP_RELOP(MINT_CNE_R8, double, !=, 1); + JITERP_RELOP(MINT_CGT_R4, float, >, 0); + JITERP_RELOP(MINT_CGT_R8, double, >, 0); + JITERP_RELOP(MINT_CGE_R4, float, >=, 0); + JITERP_RELOP(MINT_CGE_R8, double, >=, 0); + JITERP_RELOP(MINT_CGT_UN_R4, float, >, 1); + JITERP_RELOP(MINT_CGT_UN_R8, double, >, 1); + JITERP_RELOP(MINT_CLT_R4, float, <, 0); + JITERP_RELOP(MINT_CLT_R8, double, <, 0); + JITERP_RELOP(MINT_CLT_UN_R4, float, <, 1); + JITERP_RELOP(MINT_CLT_UN_R8, double, <, 1); + JITERP_RELOP(MINT_CLE_R4, float, <=, 0); + JITERP_RELOP(MINT_CLE_R8, double, <=, 0); + + default: + g_assert_not_reached(); + } +} + +#undef JITERP_RELOP + // we use these helpers at JIT time to figure out where to do memory loads and stores EMSCRIPTEN_KEEPALIVE size_t mono_jiterp_get_offset_of_vtable_initialized_flag () { diff --git a/src/mono/wasm/runtime/jiterpreter.ts b/src/mono/wasm/runtime/jiterpreter.ts index 78a8e143d7d1e2..27ef0afaeef456 100644 --- a/src/mono/wasm/runtime/jiterpreter.ts +++ b/src/mono/wasm/runtime/jiterpreter.ts @@ -250,6 +250,7 @@ function getTraceImports () { ["ld_del_ptr", "ld_del_ptr", getRawCwrap("mono_jiterp_ld_delegate_method_ptr")], ["ldtsflda", "ldtsflda", getRawCwrap("mono_jiterp_ldtsflda")], ["conv_ovf", "conv_ovf", getRawCwrap("mono_jiterp_conv_ovf")], + ["relop_fp", "relop_fp", getRawCwrap("mono_jiterp_relop_fp")], ]; if (instrumentedMethodNames.length > 0) { @@ -490,6 +491,13 @@ function generate_wasm ( "opcode": WasmValtype.i32, }, WasmValtype.i32 ); + builder.defineType( + "relop_fp", { + "lhs": WasmValtype.f64, + "rhs": WasmValtype.f64, + "opcode": WasmValtype.i32, + }, WasmValtype.i32 + ); builder.generateTypeSection(); @@ -1816,6 +1824,27 @@ const unopTable : { [opcode: number]: OpRec3 | undefined } = { [MintOpcode.MINT_SHR_UN_I8_IMM]: [WasmOpcode.i64_shr_u, WasmOpcode.i64_load, WasmOpcode.i64_store], }; +// HACK: Generating correct wasm for these is non-trivial so we hand them off to C. +// The opcode specifies whether the operands need to be promoted first. +const intrinsicFpBinops : { [opcode: number] : WasmOpcode } = { + [MintOpcode.MINT_CEQ_R4]: WasmOpcode.f64_promote_f32, + [MintOpcode.MINT_CEQ_R8]: WasmOpcode.nop, + [MintOpcode.MINT_CNE_R4]: WasmOpcode.f64_promote_f32, + [MintOpcode.MINT_CNE_R8]: WasmOpcode.nop, + [MintOpcode.MINT_CGT_R4]: WasmOpcode.f64_promote_f32, + [MintOpcode.MINT_CGT_R8]: WasmOpcode.nop, + [MintOpcode.MINT_CGE_R4]: WasmOpcode.f64_promote_f32, + [MintOpcode.MINT_CGE_R8]: WasmOpcode.nop, + [MintOpcode.MINT_CGT_UN_R4]: WasmOpcode.f64_promote_f32, + [MintOpcode.MINT_CGT_UN_R8]: WasmOpcode.nop, + [MintOpcode.MINT_CLT_R4]: WasmOpcode.f64_promote_f32, + [MintOpcode.MINT_CLT_R8]: WasmOpcode.nop, + [MintOpcode.MINT_CLT_UN_R4]: WasmOpcode.f64_promote_f32, + [MintOpcode.MINT_CLT_UN_R8]: WasmOpcode.nop, + [MintOpcode.MINT_CLE_R4]: WasmOpcode.f64_promote_f32, + [MintOpcode.MINT_CLE_R8]: WasmOpcode.nop, +}; + const binopTable : { [opcode: number]: OpRec3 | OpRec4 | undefined } = { [MintOpcode.MINT_ADD_I4]: [WasmOpcode.i32_add, WasmOpcode.i32_load, WasmOpcode.i32_store], [MintOpcode.MINT_ADD_OVF_I4]:[WasmOpcode.i32_add, WasmOpcode.i32_load, WasmOpcode.i32_store], @@ -1886,6 +1915,7 @@ const binopTable : { [opcode: number]: OpRec3 | OpRec4 | undefined } = { [MintOpcode.MINT_CLE_UN_I8]: [WasmOpcode.i64_le_u, WasmOpcode.i64_load, WasmOpcode.i32_store], [MintOpcode.MINT_CGE_UN_I8]: [WasmOpcode.i64_ge_u, WasmOpcode.i64_load, WasmOpcode.i32_store], + /* [MintOpcode.MINT_CEQ_R4]: [WasmOpcode.f32_eq, WasmOpcode.f32_load, WasmOpcode.i32_store], [MintOpcode.MINT_CNE_R4]: [WasmOpcode.f32_ne, WasmOpcode.f32_load, WasmOpcode.i32_store], [MintOpcode.MINT_CLT_R4]: [WasmOpcode.f32_lt, WasmOpcode.f32_load, WasmOpcode.i32_store], @@ -1903,6 +1933,7 @@ const binopTable : { [opcode: number]: OpRec3 | OpRec4 | undefined } = { [MintOpcode.MINT_CGT_R8]: [WasmOpcode.f64_gt, WasmOpcode.f64_load, WasmOpcode.i32_store], [MintOpcode.MINT_CLE_R8]: [WasmOpcode.f64_le, WasmOpcode.f64_load, WasmOpcode.i32_store], [MintOpcode.MINT_CGE_R8]: [WasmOpcode.f64_ge, WasmOpcode.f64_load, WasmOpcode.i32_store], + */ // FIXME: unordered float comparisons }; @@ -1997,6 +2028,22 @@ function emit_binop (builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode info : OpRec3 | OpRec4 | undefined, operandsCached = false; + const intrinsicFpBinop = intrinsicFpBinops[opcode]; + if (intrinsicFpBinop) { + builder.local("pLocals"); + const isF64 = intrinsicFpBinop == WasmOpcode.nop; + append_ldloc(builder, getArgU16(ip, 2), isF64 ? WasmOpcode.f64_load : WasmOpcode.f32_load); + if (!isF64) + builder.appendU8(intrinsicFpBinop); + append_ldloc(builder, getArgU16(ip, 3), isF64 ? WasmOpcode.f64_load : WasmOpcode.f32_load); + if (!isF64) + builder.appendU8(intrinsicFpBinop); + builder.i32_const(opcode); + builder.callImport("relop_fp"); + append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store); + return true; + } + switch (opcode) { case MintOpcode.MINT_REM_R4: case MintOpcode.MINT_REM_R8: From c12d76c2468320214dffaa53587d7a8b42e50bd7 Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Wed, 23 Nov 2022 12:33:39 -0800 Subject: [PATCH 04/12] Revert config changes --- src/mono/mono/utils/options-def.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mono/mono/utils/options-def.h b/src/mono/mono/utils/options-def.h index 4e7c239ac365e1..a0f6c3027c56ad 100644 --- a/src/mono/mono/utils/options-def.h +++ b/src/mono/mono/utils/options-def.h @@ -60,7 +60,7 @@ DEFINE_BOOL_READONLY(readonly_flag, "readonly-flag", FALSE, "Example") DEFINE_BOOL(wasm_exceptions, "wasm-exceptions", FALSE, "Enable codegen for WASM exceptions") DEFINE_BOOL(wasm_gc_safepoints, "wasm-gc-safepoints", FALSE, "Use GC safepoints on WASM") DEFINE_BOOL(aot_lazy_assembly_load, "aot-lazy-assembly-load", FALSE, "Load assemblies referenced by AOT images lazily") -DEFINE_BOOL(interp_tier_instantly, "interp-tier-instantly", TRUE, "Immediately tier up and optimize interpreter methods") +DEFINE_BOOL(interp_tier_instantly, "interp-tier-instantly", FALSE, "Immediately tier up and optimize interpreter methods") #if HOST_BROWSER @@ -68,11 +68,11 @@ DEFINE_BOOL(interp_tier_instantly, "interp-tier-instantly", TRUE, "Immediately t // and wasm modules between threads. before these can be enabled we need to implement all that #ifdef DISABLE_THREADS // traces_enabled controls whether the jiterpreter will JIT individual interpreter opcode traces -DEFINE_BOOL(jiterpreter_traces_enabled, "jiterpreter-traces-enabled", TRUE, "JIT interpreter opcode traces into WASM") +DEFINE_BOOL(jiterpreter_traces_enabled, "jiterpreter-traces-enabled", FALSE, "JIT interpreter opcode traces into WASM") // interp_entry_enabled controls whether specialized interp_entry wrappers will be jitted -DEFINE_BOOL(jiterpreter_interp_entry_enabled, "jiterpreter-interp-entry-enabled", TRUE, "JIT specialized WASM interp_entry wrappers") +DEFINE_BOOL(jiterpreter_interp_entry_enabled, "jiterpreter-interp-entry-enabled", FALSE, "JIT specialized WASM interp_entry wrappers") // jit_call_enabled controls whether do_jit_call will use specialized trampolines for hot call sites -DEFINE_BOOL(jiterpreter_jit_call_enabled, "jiterpreter-jit-call-enabled", TRUE, "JIT specialized WASM do_jit_call trampolines") +DEFINE_BOOL(jiterpreter_jit_call_enabled, "jiterpreter-jit-call-enabled", FALSE, "JIT specialized WASM do_jit_call trampolines") #else // traces_enabled controls whether the jiterpreter will JIT individual interpreter opcode traces DEFINE_BOOL_READONLY(jiterpreter_traces_enabled, "jiterpreter-traces-enabled", FALSE, "JIT interpreter opcode traces into WASM") From 41db2704b9633a9cb6bd4e89b1e41986b3b75427 Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Wed, 23 Nov 2022 12:36:23 -0800 Subject: [PATCH 05/12] Whitespace cleanup --- src/mono/wasm/runtime/jiterpreter.ts | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/src/mono/wasm/runtime/jiterpreter.ts b/src/mono/wasm/runtime/jiterpreter.ts index 27ef0afaeef456..980e346a952c2a 100644 --- a/src/mono/wasm/runtime/jiterpreter.ts +++ b/src/mono/wasm/runtime/jiterpreter.ts @@ -1915,27 +1915,6 @@ const binopTable : { [opcode: number]: OpRec3 | OpRec4 | undefined } = { [MintOpcode.MINT_CLE_UN_I8]: [WasmOpcode.i64_le_u, WasmOpcode.i64_load, WasmOpcode.i32_store], [MintOpcode.MINT_CGE_UN_I8]: [WasmOpcode.i64_ge_u, WasmOpcode.i64_load, WasmOpcode.i32_store], - /* - [MintOpcode.MINT_CEQ_R4]: [WasmOpcode.f32_eq, WasmOpcode.f32_load, WasmOpcode.i32_store], - [MintOpcode.MINT_CNE_R4]: [WasmOpcode.f32_ne, WasmOpcode.f32_load, WasmOpcode.i32_store], - [MintOpcode.MINT_CLT_R4]: [WasmOpcode.f32_lt, WasmOpcode.f32_load, WasmOpcode.i32_store], - // FIXME: What are these, semantically? - [MintOpcode.MINT_CLT_UN_R4]: [WasmOpcode.f32_lt, WasmOpcode.f32_load, WasmOpcode.i32_store], - [MintOpcode.MINT_CGT_R4]: [WasmOpcode.f32_gt, WasmOpcode.f32_load, WasmOpcode.i32_store], - // FIXME - [MintOpcode.MINT_CGT_UN_R4]: [WasmOpcode.f32_gt, WasmOpcode.f32_load, WasmOpcode.i32_store], - [MintOpcode.MINT_CLE_R4]: [WasmOpcode.f32_le, WasmOpcode.f32_load, WasmOpcode.i32_store], - [MintOpcode.MINT_CGE_R4]: [WasmOpcode.f32_ge, WasmOpcode.f32_load, WasmOpcode.i32_store], - - [MintOpcode.MINT_CEQ_R8]: [WasmOpcode.f64_eq, WasmOpcode.f64_load, WasmOpcode.i32_store], - [MintOpcode.MINT_CNE_R8]: [WasmOpcode.f64_ne, WasmOpcode.f64_load, WasmOpcode.i32_store], - [MintOpcode.MINT_CLT_R8]: [WasmOpcode.f64_lt, WasmOpcode.f64_load, WasmOpcode.i32_store], - [MintOpcode.MINT_CGT_R8]: [WasmOpcode.f64_gt, WasmOpcode.f64_load, WasmOpcode.i32_store], - [MintOpcode.MINT_CLE_R8]: [WasmOpcode.f64_le, WasmOpcode.f64_load, WasmOpcode.i32_store], - [MintOpcode.MINT_CGE_R8]: [WasmOpcode.f64_ge, WasmOpcode.f64_load, WasmOpcode.i32_store], - */ - - // FIXME: unordered float comparisons }; const relopbranchTable : { [opcode: number]: [comparisonOpcode: MintOpcode, immediateOpcode: WasmOpcode | false, isSafepoint: boolean] | MintOpcode | undefined } = { From 8113ec4bd79270eabc1487c6985520e9999b5546 Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Wed, 23 Nov 2022 12:37:29 -0800 Subject: [PATCH 06/12] Repair merge damage --- src/mono/mono/utils/options-def.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mono/mono/utils/options-def.h b/src/mono/mono/utils/options-def.h index a0f6c3027c56ad..ddd8164d80e2bb 100644 --- a/src/mono/mono/utils/options-def.h +++ b/src/mono/mono/utils/options-def.h @@ -96,7 +96,7 @@ DEFINE_BOOL(jiterpreter_call_resume_enabled, "jiterpreter-call-resume-enabled", // stats for options like estimateHeat, but raises overhead. DEFINE_BOOL(jiterpreter_disable_heuristic, "jiterpreter-disable-heuristic", FALSE, "Always insert trace entry points for more accurate statistics") // Automatically prints stats at app exit or when jiterpreter_dump_stats is called -DEFINE_BOOL(jiterpreter_stats_enabled, "jiterpreter-stats-enabled", TRUE, "Automatically print jiterpreter statistics") +DEFINE_BOOL(jiterpreter_stats_enabled, "jiterpreter-stats-enabled", FALSE, "Automatically print jiterpreter statistics") // Continue counting hits for traces that fail to compile and use it to estimate // the relative importance of the opcode that caused them to abort DEFINE_BOOL(jiterpreter_estimate_heat, "jiterpreter-estimate-heat", FALSE, "Maintain accurate hit count for all trace entry points") @@ -111,7 +111,7 @@ DEFINE_INT(jiterpreter_minimum_trace_hit_count, "jiterpreter-minimum-trace-hit-c // After a do_jit_call call site is hit this many times, we will queue it to be jitted DEFINE_INT(jiterpreter_jit_call_trampoline_hit_count, "jiterpreter-jit-call-hit-count", 3000, "Queue specialized do_jit_call trampoline for JIT after this many hits") // After a do_jit_call call site is hit this many times without being jitted, we will flush the JIT queue -DEFINE_INT(jiterpreter_jit_call_queue_flush_threshold, "jiterpreter-jit-call-queue-flush-threshold", 15000, "Flush the do_jit_call JIT queue after an unJITted call site has this many hits") +DEFINE_INT(jiterpreter_jit_call_queue_flush_threshold, "jiterpreter-jit-call-queue-flush-threshold", 10000, "Flush the do_jit_call JIT queue after an unJITted call site has this many hits") #endif // HOST_BROWSER /* Cleanup */ From d69318db3a7e184f46d4e3756069dba2c78e9b02 Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Wed, 23 Nov 2022 13:19:44 -0800 Subject: [PATCH 07/12] Hoist unordered check --- src/mono/mono/mini/interp/jiterpreter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mono/mono/mini/interp/jiterpreter.c b/src/mono/mono/mini/interp/jiterpreter.c index 99d2cd154e4f7f..93869cf0ddd13d 100644 --- a/src/mono/mono/mini/interp/jiterpreter.c +++ b/src/mono/mono/mini/interp/jiterpreter.c @@ -441,7 +441,7 @@ mono_jiterp_conv_ovf (void *dest, void *src, int opcode) { #define JITERP_RELOP(opcode, type, op, noorder) \ case opcode: \ { \ - if (mono_isunordered (lhs, rhs)) \ + if (is_unordered) \ return noorder; \ else \ return ((type)lhs op (type)rhs); \ @@ -449,6 +449,7 @@ mono_jiterp_conv_ovf (void *dest, void *src, int opcode) { EMSCRIPTEN_KEEPALIVE int mono_jiterp_relop_fp (double lhs, double rhs, int opcode) { + gboolean is_unordered = mono_isunordered (lhs, rhs); switch (opcode) { JITERP_RELOP(MINT_CEQ_R4, float, ==, 0); JITERP_RELOP(MINT_CEQ_R8, double, ==, 0); From d4dc09361fdf17dae7cbb10ee17d357b0324d387 Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Thu, 24 Nov 2022 14:30:25 -0800 Subject: [PATCH 08/12] Make interp_exec_method static again, because that appears to improve code generation --- src/mono/mono/mini/interp/interp-internals.h | 3 - src/mono/mono/mini/interp/interp.c | 60 +++++++++++++- src/mono/mono/mini/interp/jiterpreter.c | 82 -------------------- src/mono/mono/mini/interp/jiterpreter.h | 35 +++++++++ 4 files changed, 94 insertions(+), 86 deletions(-) diff --git a/src/mono/mono/mini/interp/interp-internals.h b/src/mono/mono/mini/interp/interp-internals.h index 4a61ef9591bacf..6d9bbb5586ff5c 100644 --- a/src/mono/mono/mini/interp/interp-internals.h +++ b/src/mono/mono/mini/interp/interp-internals.h @@ -301,9 +301,6 @@ mono_interp_error_cleanup (MonoError *error); gboolean mono_interp_is_method_multicastdelegate_invoke (MonoMethod *method); -MONO_NEVER_INLINE void -mono_interp_exec_method (InterpFrame *frame, ThreadContext *context, FrameClauseArgs *clause_args); - #if HOST_BROWSER gboolean diff --git a/src/mono/mono/mini/interp/interp.c b/src/mono/mono/mini/interp/interp.c index daf7995637c5d1..908963311c3fe3 100644 --- a/src/mono/mono/mini/interp/interp.c +++ b/src/mono/mono/mini/interp/interp.c @@ -102,6 +102,9 @@ struct FrameClauseArgs { gboolean run_until_end; }; +static MONO_NEVER_INLINE void +mono_interp_exec_method (InterpFrame *frame, ThreadContext *context, FrameClauseArgs *clause_args); + /* * This code synchronizes with interp_mark_stack () using compiler memory barriers. */ @@ -3698,7 +3701,7 @@ max_d (double lhs, double rhs) * to return error information. * FRAME is only valid until the next call to alloc_frame (). */ -MONO_NEVER_INLINE void +static MONO_NEVER_INLINE void mono_interp_exec_method (InterpFrame *frame, ThreadContext *context, FrameClauseArgs *clause_args) { InterpMethod *cmethod; @@ -8611,4 +8614,59 @@ mono_interp_is_method_multicastdelegate_invoke (MonoMethod *method) { return is_method_multicastdelegate_invoke (method); } + +// after interp_entry_prologue the wrapper will set up all the argument values +// in the correct place and compute the stack offset, then it passes that in to this +// function in order to actually enter the interpreter and process the return value +EMSCRIPTEN_KEEPALIVE void +mono_jiterp_interp_entry (JiterpEntryData *_data, stackval *sp_args, void *res) +{ + JiterpEntryDataHeader header; + MonoType *type; + + // Copy the scratch buffer into a local variable. This is necessary for us to be + // reentrant-safe because mono_interp_exec_method could end up hitting the trampoline + // again + g_assert(_data); + header = _data->header; + + g_assert(header.rmethod); + g_assert(header.rmethod->method); + g_assert(sp_args); + + stackval *sp = (stackval*)header.context->stack_pointer; + + InterpFrame frame = {0}; + frame.imethod = header.rmethod; + frame.stack = sp; + frame.retval = sp; + + header.context->stack_pointer = (guchar*)sp_args; + g_assert ((guchar*)sp_args < header.context->stack_end); + + MONO_ENTER_GC_UNSAFE; + mono_interp_exec_method (&frame, header.context, NULL); + MONO_EXIT_GC_UNSAFE; + + header.context->stack_pointer = (guchar*)sp; + + if (header.rmethod->needs_thread_attach) + mono_threads_detach_coop (header.orig_domain, &header.attach_cookie); + + mono_jiterp_check_pending_unwind (header.context); + + if (mono_llvm_only) { + if (header.context->has_resume_state) + /* The exception will be handled in a frame above us */ + mono_llvm_cpp_throw_exception (); + } else { + g_assert (!header.context->has_resume_state); + } + + // The return value is at the bottom of the stack, after the locals space + type = header.rmethod->rtype; + if (type->type != MONO_TYPE_VOID) + mono_jiterp_stackval_to_data (type, frame.stack, res); +} + #endif diff --git a/src/mono/mono/mini/interp/jiterpreter.c b/src/mono/mono/mini/interp/jiterpreter.c index 93869cf0ddd13d..b05161e772827e 100644 --- a/src/mono/mono/mini/interp/jiterpreter.c +++ b/src/mono/mono/mini/interp/jiterpreter.c @@ -556,34 +556,6 @@ mono_jiterp_adjust_abort_count (MintOpcode opcode, gint32 delta) { return jiterpreter_abort_counts[opcode]; } -typedef struct { - InterpMethod *rmethod; - ThreadContext *context; - gpointer orig_domain; - gpointer attach_cookie; -} JiterpEntryDataHeader; - -// we optimize delegate calls by attempting to cache the delegate invoke -// target - this will improve performance when the same delegate is invoked -// repeatedly inside a loop -typedef struct { - MonoDelegate *delegate_invoke_is_for; - MonoMethod *delegate_invoke; - InterpMethod *delegate_invoke_rmethod; -} JiterpEntryDataCache; - -// jitted interp_entry wrappers use custom tracking data structures -// that are allocated in the heap, one per wrapper -// FIXME: For thread safety we need to make these thread-local or stack-allocated -// Note that if we stack allocate these the cache will need to move somewhere else -typedef struct { - // We split the cache out from the important data so that when - // jiterp_interp_entry copies the important data it doesn't have - // to also copy the cache. This reduces overhead slightly - JiterpEntryDataHeader header; - JiterpEntryDataCache cache; -} JiterpEntryData; - // at the start of a jitted interp_entry wrapper, this is called to perform initial setup // like resolving the target for delegates and setting up the thread context // inlining this into the wrappers would make them unnecessarily big and complex @@ -642,60 +614,6 @@ mono_jiterp_interp_entry_prologue (JiterpEntryData *data, void *this_arg) return sp_args; } -// after interp_entry_prologue the wrapper will set up all the argument values -// in the correct place and compute the stack offset, then it passes that in to this -// function in order to actually enter the interpreter and process the return value -EMSCRIPTEN_KEEPALIVE void -mono_jiterp_interp_entry (JiterpEntryData *_data, stackval *sp_args, void *res) -{ - JiterpEntryDataHeader header; - MonoType *type; - - // Copy the scratch buffer into a local variable. This is necessary for us to be - // reentrant-safe because mono_interp_exec_method could end up hitting the trampoline - // again - jiterp_assert(_data); - header = _data->header; - - jiterp_assert(header.rmethod); - jiterp_assert(header.rmethod->method); - jiterp_assert(sp_args); - - stackval *sp = (stackval*)header.context->stack_pointer; - - InterpFrame frame = {0}; - frame.imethod = header.rmethod; - frame.stack = sp; - frame.retval = sp; - - header.context->stack_pointer = (guchar*)sp_args; - g_assert ((guchar*)sp_args < header.context->stack_end); - - MONO_ENTER_GC_UNSAFE; - mono_interp_exec_method (&frame, header.context, NULL); - MONO_EXIT_GC_UNSAFE; - - header.context->stack_pointer = (guchar*)sp; - - if (header.rmethod->needs_thread_attach) - mono_threads_detach_coop (header.orig_domain, &header.attach_cookie); - - mono_jiterp_check_pending_unwind (header.context); - - if (mono_llvm_only) { - if (header.context->has_resume_state) - /* The exception will be handled in a frame above us */ - mono_llvm_cpp_throw_exception (); - } else { - g_assert (!header.context->has_resume_state); - } - - // The return value is at the bottom of the stack, after the locals space - type = header.rmethod->rtype; - if (type->type != MONO_TYPE_VOID) - mono_jiterp_stackval_to_data (type, frame.stack, res); -} - // should_abort_trace returns one of these codes depending on the opcode and current state #define TRACE_IGNORE -1 #define TRACE_CONTINUE 0 diff --git a/src/mono/mono/mini/interp/jiterpreter.h b/src/mono/mono/mini/interp/jiterpreter.h index acb7cc8002552f..6856b6d58a4be0 100644 --- a/src/mono/mono/mini/interp/jiterpreter.h +++ b/src/mono/mono/mini/interp/jiterpreter.h @@ -83,6 +83,41 @@ mono_jiterp_do_jit_call_indirect ( gpointer cb, gpointer arg, gboolean *out_thrown ); +#ifdef __MONO_MINI_INTERPRETER_INTERNALS_H__ + +typedef struct { + InterpMethod *rmethod; + ThreadContext *context; + gpointer orig_domain; + gpointer attach_cookie; +} JiterpEntryDataHeader; + +// we optimize delegate calls by attempting to cache the delegate invoke +// target - this will improve performance when the same delegate is invoked +// repeatedly inside a loop +typedef struct { + MonoDelegate *delegate_invoke_is_for; + MonoMethod *delegate_invoke; + InterpMethod *delegate_invoke_rmethod; +} JiterpEntryDataCache; + +// jitted interp_entry wrappers use custom tracking data structures +// that are allocated in the heap, one per wrapper +// FIXME: For thread safety we need to make these thread-local or stack-allocated +// Note that if we stack allocate these the cache will need to move somewhere else +typedef struct { + // We split the cache out from the important data so that when + // jiterp_interp_entry copies the important data it doesn't have + // to also copy the cache. This reduces overhead slightly + JiterpEntryDataHeader header; + JiterpEntryDataCache cache; +} JiterpEntryData; + +void +mono_jiterp_interp_entry (JiterpEntryData *_data, stackval *sp_args, void *res); + +#endif // __MONO_MINI_INTERPRETER_INTERNALS_H__ + extern WasmDoJitCall jiterpreter_do_jit_call; #endif // HOST_BROWSER From 138070d7a9c8deadd4e7ed02e7a61ea3bbadea46 Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Tue, 29 Nov 2022 15:08:15 -0800 Subject: [PATCH 09/12] Remove tier_instantly (not necessary) and address PR feedback --- src/mono/mono/mini/interp/interp.c | 27 +++++++++---------------- src/mono/mono/mini/interp/jiterpreter.c | 2 +- src/mono/mono/utils/options-def.h | 1 - 3 files changed, 10 insertions(+), 20 deletions(-) diff --git a/src/mono/mono/mini/interp/interp.c b/src/mono/mono/mini/interp/interp.c index 908963311c3fe3..1c93cac1dc6b25 100644 --- a/src/mono/mono/mini/interp/interp.c +++ b/src/mono/mono/mini/interp/interp.c @@ -2675,7 +2675,7 @@ do_jit_call (ThreadContext *context, stackval *ret_sp, stackval *sp, InterpFrame goto epilogue; } else { int count = cinfo->hit_count; - if (count == (mono_opt_interp_tier_instantly ? 1 : mono_opt_jiterpreter_jit_call_trampoline_hit_count)) { + if (count == mono_opt_jiterpreter_jit_call_trampoline_hit_count) { void *fn = cinfo->no_wrapper ? cinfo->addr : cinfo->wrapper; mono_interp_jit_wasm_jit_call_trampoline ( rmethod, cinfo, fn, rmethod->hasthis, rmethod->param_count, @@ -3800,6 +3800,11 @@ mono_interp_exec_method (InterpFrame *frame, ThreadContext *context, FrameClause MINT_IN_CASE(MINT_DEF) MINT_IN_CASE(MINT_DUMMY_USE) MINT_IN_CASE(MINT_TIER_PATCHPOINT_DATA) +#ifndef HOST_BROWSER + MINT_IN_CASE(MINT_TIER_NOP_JITERPRETER) + MINT_IN_CASE(MINT_TIER_PREPARE_JITERPRETER) + MINT_IN_CASE(MINT_TIER_ENTER_JITERPRETER) +#endif g_assert_not_reached (); MINT_IN_BREAK; MINT_IN_CASE(MINT_BREAK) @@ -7253,7 +7258,7 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; MINT_IN_CASE(MINT_TIER_ENTER_METHOD) { frame->imethod->entry_count++; - if (frame->imethod->entry_count > (mono_opt_interp_tier_instantly ? INTERP_TIER_ENTRY_LIMIT_LOW : INTERP_TIER_ENTRY_LIMIT) && !clause_args) + if (frame->imethod->entry_count > INTERP_TIER_ENTRY_LIMIT && !clause_args) ip = mono_interp_tier_up_frame_enter (frame, context); else ip++; @@ -7261,7 +7266,7 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; } MINT_IN_CASE(MINT_TIER_PATCHPOINT) { frame->imethod->entry_count++; - if (frame->imethod->entry_count > (mono_opt_interp_tier_instantly ? INTERP_TIER_ENTRY_LIMIT_LOW : INTERP_TIER_ENTRY_LIMIT) && !clause_args) + if (frame->imethod->entry_count > INTERP_TIER_ENTRY_LIMIT && !clause_args) ip = mono_interp_tier_up_frame_patchpoint (frame, context, ip [1]); else ip += 2; @@ -7521,6 +7526,7 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; ip += 5; MINT_IN_BREAK; } + #ifdef HOST_BROWSER MINT_IN_CASE(MINT_TIER_NOP_JITERPRETER) { ip += 3; @@ -7611,21 +7617,6 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; ip = (guint16*) (((guint8*)ip) + offset); MINT_IN_BREAK; } -#else - MINT_IN_CASE(MINT_TIER_NOP_JITERPRETER) { - g_assert_not_reached (); - MINT_IN_BREAK; - } - - MINT_IN_CASE(MINT_TIER_PREPARE_JITERPRETER) { - g_assert_not_reached (); - MINT_IN_BREAK; - } - - MINT_IN_CASE(MINT_TIER_ENTER_JITERPRETER) { - g_assert_not_reached (); - MINT_IN_BREAK; - } #endif #if !USE_COMPUTED_GOTO diff --git a/src/mono/mono/mini/interp/jiterpreter.c b/src/mono/mono/mini/interp/jiterpreter.c index b05161e772827e..4c6105548a170a 100644 --- a/src/mono/mono/mini/interp/jiterpreter.c +++ b/src/mono/mono/mini/interp/jiterpreter.c @@ -141,7 +141,7 @@ mono_jiterp_increase_entry_count (void *_imethod) { InterpMethod *imethod = (InterpMethod*)_imethod; imethod->entry_count++; // Return whether the trace should bail out because the method needs to be tiered - return imethod->entry_count >= (mono_opt_interp_tier_instantly ? INTERP_TIER_ENTRY_LIMIT_LOW : INTERP_TIER_ENTRY_LIMIT); + return imethod->entry_count >= INTERP_TIER_ENTRY_LIMIT; } EMSCRIPTEN_KEEPALIVE void* diff --git a/src/mono/mono/utils/options-def.h b/src/mono/mono/utils/options-def.h index ddd8164d80e2bb..bf9cc2c890a5fb 100644 --- a/src/mono/mono/utils/options-def.h +++ b/src/mono/mono/utils/options-def.h @@ -60,7 +60,6 @@ DEFINE_BOOL_READONLY(readonly_flag, "readonly-flag", FALSE, "Example") DEFINE_BOOL(wasm_exceptions, "wasm-exceptions", FALSE, "Enable codegen for WASM exceptions") DEFINE_BOOL(wasm_gc_safepoints, "wasm-gc-safepoints", FALSE, "Use GC safepoints on WASM") DEFINE_BOOL(aot_lazy_assembly_load, "aot-lazy-assembly-load", FALSE, "Load assemblies referenced by AOT images lazily") -DEFINE_BOOL(interp_tier_instantly, "interp-tier-instantly", FALSE, "Immediately tier up and optimize interpreter methods") #if HOST_BROWSER From 05d90cb28fa23533d3524aa73f2dc9ff5014e6a1 Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Tue, 29 Nov 2022 15:28:01 -0800 Subject: [PATCH 10/12] Remove header change --- src/mono/mono/mini/interp/tiering.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mono/mono/mini/interp/tiering.h b/src/mono/mono/mini/interp/tiering.h index d056b95d3d0a2b..dbd7da87ecd4d9 100644 --- a/src/mono/mono/mini/interp/tiering.h +++ b/src/mono/mono/mini/interp/tiering.h @@ -3,7 +3,6 @@ #include "interp-internals.h" -#define INTERP_TIER_ENTRY_LIMIT_LOW 2 #define INTERP_TIER_ENTRY_LIMIT 1000 void From d6ff4e5dc498dfa3f59950e0860ed0a968a7f69c Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Tue, 29 Nov 2022 15:30:01 -0800 Subject: [PATCH 11/12] remove ts side tier instantly --- src/mono/wasm/runtime/jiterpreter-support.ts | 5 +---- src/mono/wasm/runtime/jiterpreter.ts | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/mono/wasm/runtime/jiterpreter-support.ts b/src/mono/wasm/runtime/jiterpreter-support.ts index 2589d36d59a54b..322378efc9ccf6 100644 --- a/src/mono/wasm/runtime/jiterpreter-support.ts +++ b/src/mono/wasm/runtime/jiterpreter-support.ts @@ -857,8 +857,6 @@ export type JiterpreterOptions = { countBailouts: boolean; // Dump the wasm blob for all compiled traces dumpTraces: boolean; - // Instantly tiers up methods and traces - tierInstantly: boolean; minimumTraceLength: number; minimumTraceHitCount: number; } @@ -876,8 +874,7 @@ const optionNames : { [jsName: string] : string } = { "countBailouts": "jiterpreter-count-bailouts", "dumpTraces": "jiterpreter-dump-traces", "minimumTraceLength": "jiterpreter-minimum-trace-length", - "minimumTraceHitCount": "jiterpreter-minimum-trace-hit-count", - "tierInstantly": "interp-tier-instantly", + "minimumTraceHitCount": "jiterpreter-minimum-trace-hit-count" }; let optionsVersion = -1; diff --git a/src/mono/wasm/runtime/jiterpreter.ts b/src/mono/wasm/runtime/jiterpreter.ts index 980e346a952c2a..aa13d7fc179f05 100644 --- a/src/mono/wasm/runtime/jiterpreter.ts +++ b/src/mono/wasm/runtime/jiterpreter.ts @@ -2939,7 +2939,7 @@ export function mono_interp_tier_prepare_jiterpreter ( else info.hitCount++; - const minHitCount = mostRecentOptions.tierInstantly ? 2 : mostRecentOptions.minimumTraceHitCount; + const minHitCount = mostRecentOptions.minimumTraceHitCount; if (info.hitCount < minHitCount) return JITERPRETER_TRAINING; From d29e7ceb7069a145ad2691ffc6e39504be5b46ab Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Wed, 30 Nov 2022 18:09:07 -0800 Subject: [PATCH 12/12] Enable jiterpreter if tiering is disabled, update stats --- src/mono/mono/mini/interp/transform.c | 3 +- src/mono/wasm/runtime/jiterpreter.ts | 51 +++++++++++++++++---------- 2 files changed, 33 insertions(+), 21 deletions(-) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index bd572d7d70bd18..d7d71ac3b6ea27 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -10071,8 +10071,7 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG interp_optimize_code (td); interp_alloc_offsets (td); #if HOST_BROWSER - if (mono_interp_tiering_enabled ()) - jiterp_insert_entry_points (td); + jiterp_insert_entry_points (td); #endif } diff --git a/src/mono/wasm/runtime/jiterpreter.ts b/src/mono/wasm/runtime/jiterpreter.ts index aa13d7fc179f05..6d9516f0255b59 100644 --- a/src/mono/wasm/runtime/jiterpreter.ts +++ b/src/mono/wasm/runtime/jiterpreter.ts @@ -3029,32 +3029,45 @@ export function jiterpreter_dump_stats (b?: boolean) { // Filter out noisy methods that we don't care about optimizing if (traces[i].name!.indexOf("Xunit.") >= 0) continue; + // FIXME: A single hot method can contain many failed traces. This creates a lot of noise // here and also likely indicates the jiterpreter would add a lot of overhead to it // Filter out aborts that aren't meaningful since it is unlikely to ever make sense // to fix them, either because they are rarely used or because putting them in // traces would not meaningfully improve performance - if (traces[i].abortReason && traces[i].abortReason!.startsWith("mono_icall_")) - continue; - switch (traces[i].abortReason) { - case "trace-too-small": - case "call": - case "callvirt.fast": - case "calli.nat.fast": - case "calli.nat": - case "call.delegate": - case "newobj": - case "newobj_vt": - case "intrins_ordinal_ignore_case_ascii": - case "intrins_marvin_block": - case "intrins_ascii_chars_to_uppercase": - case "switch": - case "call_handler.s": - case "rethrow": - case "endfinally": - case "end-of-body": + if (traces[i].abortReason) { + if (traces[i].abortReason!.startsWith("mono_icall_") || + traces[i].abortReason!.startsWith("ret.")) continue; + + switch (traces[i].abortReason) { + // not feasible to fix + case "trace-too-small": + case "call": + case "callvirt.fast": + case "calli.nat.fast": + case "calli.nat": + case "call.delegate": + case "newobj": + case "newobj_vt": + case "newobj_slow": + case "switch": + case "call_handler.s": + case "rethrow": + case "endfinally": + case "end-of-body": + case "ret": + continue; + + // not worth implementing / too difficult + case "intrins_ordinal_ignore_case_ascii": + case "intrins_marvin_block": + case "intrins_ascii_chars_to_uppercase": + case "newarr": + continue; + } } + c++; console.log(`${traces[i].name} @${traces[i].ip} (${traces[i].hitCount} hits) ${traces[i].abortReason}`); }