diff --git a/src/mono/mono/mini/interp/interp.c b/src/mono/mono/mini/interp/interp.c index 5fb08cc4f23d82..2b92e7f160e22e 100644 --- a/src/mono/mono/mini/interp/interp.c +++ b/src/mono/mono/mini/interp/interp.c @@ -6576,6 +6576,25 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; MINT_IN_BREAK; } + MINT_IN_CASE(MINT_MOV_8_2) + LOCAL_VAR (ip [1], guint64) = LOCAL_VAR (ip [2], guint64); + LOCAL_VAR (ip [3], guint64) = LOCAL_VAR (ip [4], guint64); + ip += 5; + MINT_IN_BREAK; + MINT_IN_CASE(MINT_MOV_8_3) + LOCAL_VAR (ip [1], guint64) = LOCAL_VAR (ip [2], guint64); + LOCAL_VAR (ip [3], guint64) = LOCAL_VAR (ip [4], guint64); + LOCAL_VAR (ip [5], guint64) = LOCAL_VAR (ip [6], guint64); + ip += 7; + MINT_IN_BREAK; + MINT_IN_CASE(MINT_MOV_8_4) + LOCAL_VAR (ip [1], guint64) = LOCAL_VAR (ip [2], guint64); + LOCAL_VAR (ip [3], guint64) = LOCAL_VAR (ip [4], guint64); + LOCAL_VAR (ip [5], guint64) = LOCAL_VAR (ip [6], guint64); + LOCAL_VAR (ip [7], guint64) = LOCAL_VAR (ip [8], guint64); + ip += 9; + MINT_IN_BREAK; + MINT_IN_CASE(MINT_LOCALLOC) { int len = LOCAL_VAR (ip [2], gint32); gpointer mem = frame_data_allocator_alloc (&context->data_stack, frame, ALIGN_TO (len, MINT_VT_ALIGNMENT)); diff --git a/src/mono/mono/mini/interp/mintops.def b/src/mono/mono/mini/interp/mintops.def index a82cdb8623ecd8..1f55c92ec59000 100644 --- a/src/mono/mono/mini/interp/mintops.def +++ b/src/mono/mono/mini/interp/mintops.def @@ -108,6 +108,12 @@ OPDEF(MINT_MOV_4, "mov.4", 3, 1, 1, MintOpNoArgs) OPDEF(MINT_MOV_8, "mov.8", 3, 1, 1, MintOpNoArgs) OPDEF(MINT_MOV_VT, "mov.vt", 4, 1, 1, MintOpShortInt) +// These opcodes represent multiple moves stacked together. They have multiple src and dst +// but they are not represented here. They are generated by the var offset allocator. +OPDEF(MINT_MOV_8_2, "mov.8.2", 5, 0, 0, MintOpPair2) +OPDEF(MINT_MOV_8_3, "mov.8.3", 7, 0, 0, MintOpPair3) +OPDEF(MINT_MOV_8_4, "mov.8.4", 9, 0, 0, MintOpPair4) + OPDEF(MINT_LDLOCA_S, "ldloca.s", 3, 1, 0, MintOpUShortInt) OPDEF(MINT_LDIND_I1, "ldind.i1", 3, 1, 1, MintOpNoArgs) diff --git a/src/mono/mono/mini/interp/mintops.h b/src/mono/mono/mini/interp/mintops.h index ec5c95298c0467..82c78ac9243bfa 100644 --- a/src/mono/mono/mini/interp/mintops.h +++ b/src/mono/mono/mini/interp/mintops.h @@ -24,7 +24,10 @@ typedef enum MintOpClassToken, MintOpTwoShorts, MintOpShortAndInt, - MintOpShortAndShortBranch + MintOpShortAndShortBranch, + MintOpPair2, + MintOpPair3, + MintOpPair4 } MintOpArgType; #define OPDEF(a,b,c,d,e,f) a, @@ -74,6 +77,8 @@ typedef enum { #define MINT_CALL_ARGS 2 #define MINT_CALL_ARGS_SREG -2 +#define MINT_MOV_PAIRS_MAX 4 + extern unsigned char const mono_interp_oplen[]; extern int const mono_interp_op_dregs []; extern int const mono_interp_op_sregs []; diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 7b7a3afa82c2e0..284004fbcc4155 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -1411,6 +1411,14 @@ dump_interp_ins_data (InterpInst *ins, gint32 ins_offset, const guint16 *data, g target = ins_offset + *(gint16*)(data + 1); g_string_append_printf (str, " %u, IR_%04x", *(guint16*)data, target); } + case MintOpPair2: + g_string_append_printf (str, " %u <- %u, %u <- %u", data [0], data [1], data [2], data [3]); + break; + case MintOpPair3: + g_string_append_printf (str, " %u <- %u, %u <- %u, %u <- %u", data [0], data [1], data [2], data [3], data [4], data [5]); + break; + case MintOpPair4: + g_string_append_printf (str, " %u <- %u, %u <- %u, %u <- %u, %u <- %u", data [0], data [1], data [2], data [3], data [4], data [5], data [6], data [7]); break; default: g_string_append_printf (str, "unknown arg type\n"); @@ -7549,6 +7557,9 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in if (ins->info.target_bb->native_offset >= 0) { // Backwards branch. We can already patch it. *ip++ = ins->info.target_bb->native_offset - br_offset; + } else if (opcode == MINT_BR_S && ins->info.target_bb == td->cbb->next_bb) { + // Ignore branch to the next basic block. Revert the added MINT_BR_S. + ip--; } else { // We don't know the in_offset of the target, add a reloc Reloc *reloc = (Reloc*)mono_mempool_alloc0 (td->mempool, sizeof (Reloc)); @@ -7647,6 +7658,12 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in for (int i = size - 1; i < (jit_call2_size - 1); i++) *ip++ = MINT_NIY; #endif + } else if (opcode >= MINT_MOV_8_2 && opcode <= MINT_MOV_8_4) { + // This instruction is not marked as operating on any vars, all instruction slots are + // actually vas. Resolve their offset + int num_vars = mono_interp_oplen [opcode] - 1; + for (int i = 0; i < num_vars; i++) + *ip++ = td->locals [ins->data [i]].offset; } else { if (mono_interp_op_dregs [opcode]) *ip++ = td->locals [ins->dreg].offset; @@ -7696,6 +7713,7 @@ generate_compacted_code (TransformData *td) for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { InterpInst *ins = bb->first_ins; bb->native_offset = ip - td->new_code; + td->cbb = bb; while (ins) { ip = emit_compacted_instruction (td, ip, ins); ins = ins->next; @@ -7984,7 +8002,7 @@ interp_fold_unop_cond_br (TransformData *td, InterpBasicBlock *cbb, LocalValue * static InterpInst* -interp_fold_binop (TransformData *td, LocalValue *local_defs, InterpInst *ins) +interp_fold_binop (TransformData *td, LocalValue *local_defs, InterpInst *ins, gboolean *folded) { int *local_ref_count = td->local_ref_count; // ins should be a binop, therefore it should have a single dreg and two sregs @@ -7995,6 +8013,8 @@ interp_fold_binop (TransformData *td, LocalValue *local_defs, InterpInst *ins) LocalValue *val2 = &local_defs [sreg2]; LocalValue result; + *folded = FALSE; + if (val1->type != LOCAL_VALUE_I4 && val1->type != LOCAL_VALUE_I8) return ins; if (val2->type != LOCAL_VALUE_I4 && val2->type != LOCAL_VALUE_I8) @@ -8066,7 +8086,7 @@ interp_fold_binop (TransformData *td, LocalValue *local_defs, InterpInst *ins) // with a LDC of the constant. We leave alone the sregs of this instruction, for // deadce to kill the instructions initializing them. mono_interp_stats.constant_folds++; - + *folded = TRUE; if (result.type == LOCAL_VALUE_I4) ins = interp_get_ldc_i4_from_const (td, ins, result.i, dreg); else if (result.type == LOCAL_VALUE_I8) @@ -8341,7 +8361,42 @@ interp_cprop (TransformData *td) } else if (MINT_IS_UNOP_CONDITIONAL_BRANCH (opcode)) { ins = interp_fold_unop_cond_br (td, bb, local_defs, ins); } else if (MINT_IS_BINOP (opcode)) { - ins = interp_fold_binop (td, local_defs, ins); + gboolean folded; + ins = interp_fold_binop (td, local_defs, ins, &folded); + if (!folded) { + int sreg = -1; + int mov_op; + if ((opcode == MINT_MUL_I4 || opcode == MINT_DIV_I4) && + local_defs [ins->sregs [1]].type == LOCAL_VALUE_I4 && + local_defs [ins->sregs [1]].i == 1) { + sreg = ins->sregs [0]; + mov_op = MINT_MOV_4; + } else if ((opcode == MINT_MUL_I8 || opcode == MINT_DIV_I8) && + local_defs [ins->sregs [1]].type == LOCAL_VALUE_I8 && + local_defs [ins->sregs [1]].l == 1) { + sreg = ins->sregs [0]; + mov_op = MINT_MOV_8; + } else if (opcode == MINT_MUL_I4 && + local_defs [ins->sregs [0]].type == LOCAL_VALUE_I4 && + local_defs [ins->sregs [0]].i == 1) { + sreg = ins->sregs [1]; + mov_op = MINT_MOV_4; + } else if (opcode == MINT_MUL_I8 && + local_defs [ins->sregs [0]].type == LOCAL_VALUE_I8 && + local_defs [ins->sregs [0]].l == 1) { + sreg = ins->sregs [1]; + mov_op = MINT_MOV_8; + } + if (sreg != -1) { + ins->opcode = mov_op; + ins->sregs [0] = sreg; + if (td->verbose_level) { + g_print ("Replace idempotent binop :\n\t"); + dump_interp_inst (ins); + } + needs_retry = TRUE; + } + } } else if (MINT_IS_BINOP_CONDITIONAL_BRANCH (opcode)) { ins = interp_fold_binop_cond_br (td, bb, local_defs, ins); } else if (MINT_IS_LDFLD (opcode) && ins->data [0] == 0) { @@ -9105,7 +9160,11 @@ interp_alloc_offsets (TransformData *td) if (ins->flags & INTERP_INST_FLAG_CALL) { int *call_args = ins->info.call_args; if (call_args) { + int pair_sregs [MINT_MOV_PAIRS_MAX]; + int pair_dregs [MINT_MOV_PAIRS_MAX]; + int num_pairs = 0; int var = *call_args; + while (var != -1) { if (td->locals [var].flags & INTERP_LOCAL_FLAG_GLOBAL || td->locals [var].flags & INTERP_LOCAL_FLAG_NO_CALL_ARGS) { @@ -9114,17 +9173,27 @@ interp_alloc_offsets (TransformData *td) int new_var = create_interp_local (td, td->locals [var].type); td->locals [new_var].call = ins; td->locals [new_var].flags |= INTERP_LOCAL_FLAG_CALL_ARGS; - int opcode = get_mov_for_type (mint_type (td->locals [var].type), FALSE); - InterpInst *new_inst = interp_insert_ins_bb (td, bb, ins->prev, opcode); - interp_ins_set_dreg (new_inst, new_var); - interp_ins_set_sreg (new_inst, var); - if (opcode == MINT_MOV_VT) - new_inst->data [0] = td->locals [var].size; - // The arg of the call is no longer global - *call_args = new_var; - // Also update liveness for this instruction - foreach_local_var (td, new_inst, ins_index, set_var_live_range); - ins_index++; + + int mt = mint_type (td->locals [var].type); + if (mt != MINT_TYPE_VT && num_pairs < MINT_MOV_PAIRS_MAX) { + pair_sregs [num_pairs] = var; + pair_dregs [num_pairs] = new_var; + num_pairs++; + // The arg of the call is no longer global + *call_args = new_var; + } else { + int opcode = get_mov_for_type (mt, FALSE); + InterpInst *new_inst = interp_insert_ins_bb (td, bb, ins->prev, opcode); + interp_ins_set_dreg (new_inst, new_var); + interp_ins_set_sreg (new_inst, var); + if (opcode == MINT_MOV_VT) + new_inst->data [0] = td->locals [var].size; + // The arg of the call is no longer global + *call_args = new_var; + // Also update liveness for this instruction + foreach_local_var (td, new_inst, ins_index, set_var_live_range); + ins_index++; + } } else { // Flag this var as it has special storage on the call args stack td->locals [var].call = ins; @@ -9133,6 +9202,30 @@ interp_alloc_offsets (TransformData *td) call_args++; var = *call_args; } + if (num_pairs > 0) { + int i; + for (i = 0; i < num_pairs; i++) { + set_var_live_range (td, pair_sregs [i], ins_index); + set_var_live_range (td, pair_dregs [i], ins_index); + } + if (num_pairs == 1) { + int mt = mint_type (td->locals [pair_sregs [0]].type); + int opcode = get_mov_for_type (mt, FALSE); + InterpInst *new_inst = interp_insert_ins_bb (td, bb, ins->prev, opcode); + interp_ins_set_dreg (new_inst, pair_dregs [0]); + interp_ins_set_sreg (new_inst, pair_sregs [0]); + } else { + // Squash together multiple moves to the param area into a single opcode + int opcode = MINT_MOV_8_2 + num_pairs - 2; + InterpInst *new_inst = interp_insert_ins_bb (td, bb, ins->prev, opcode); + int k = 0; + for (i = 0; i < num_pairs; i++) { + new_inst->data [k++] = pair_dregs [i]; + new_inst->data [k++] = pair_sregs [i]; + } + } + ins_index++; + } } } // Set live_start and live_end for every referenced local that is not global