[webkit-reviews] review granted: [Bug 21943] Avoid needless reads of temporary values in CTI code : [Attachment 25120] Proposed patch
bugzilla-daemon at webkit.org
bugzilla-daemon at webkit.org
Thu Nov 13 03:02:43 PST 2008
Maciej Stachowiak <mjs at apple.com> has granted Cameron Zwarich (cpst)
<cwzwarich at uwaterloo.ca>'s request for review:
Bug 21943: Avoid needless reads of temporary values in CTI code
https://bugs.webkit.org/show_bug.cgi?id=21943
Attachment 25120: Proposed patch
https://bugs.webkit.org/attachment.cgi?id=25120&action=review
------- Additional Comments from Maciej Stachowiak <mjs at apple.com>
r=me
> Index: ChangeLog
> ===================================================================
> --- ChangeLog (revision 38367)
> +++ ChangeLog (working copy)
> @@ -1,3 +1,64 @@
> +2008-11-13 Cameron Zwarich <zwarich at apple.com>
> +
> + Reviewed by NOBODY (OOPS!).
> +
> + Bug 21943: Avoid needless reads of temporary values in CTI code
> + <https://bugs.webkit.org/show_bug.cgi?id=21943>
> +
> + If we are writing a value to a temporary register file, we should
not
> + immediately read it back into the same machine register. This patch
> + implements this optimization.
> +
> + In order to perform this optimization, we need to know the possible
> + jump tagets in the CodeBlock. For temporaries, the only problematic
> + jump targets are binary logical operators and the ternary
conditional
> + operator. However, if this optimization were to be extended to local
> + variable registers as well, other jump targets would need to be
> + included, like switch statement cases and the beginnings of catch
> + blocks.
> +
> + This optimization also requires that the fast case and the slow case
> + of an opcode use emitPutResult() on the same register, which we have
> + chosen to be eax, as that is the register into which we read the
first
> + operand of opcodes. In order to make this the case, we needed to add
> + some mov instructions to some slow cases of instructions.
> +
> + The function compileBinaryArithOp() uses distinct machine registers
for
> + its final result. While it seems possible to modify this code so
that
> + the same machine register is always used, we disabled the
optimization
> + for the moment. Also, this optimization is disabled when generating
slow
> + cases, because some fast cases overwrite the value of eax before
jumping
> + to the slow case. In the future, it may be possible to perform this
> + optimization in slow cases as well, but it did not seem to be a
speedup
> + when testing an early version of this patch.
> +
> + * VM/CTI.cpp:
> + (JSC::CTI::invalidatePeepholeOptimizations):
> + (JSC::CTI::emitGetArg):
> + (JSC::CTI::emitGetPutArg):
> + (JSC::CTI::emitPutArg):
> + (JSC::CTI::emitPutArgConstant):
> + (JSC::CTI::emitPutCTIParam):
> + (JSC::CTI::emitGetCTIParam):
> + (JSC::CTI::emitPutToCallFrameHeader):
> + (JSC::CTI::emitGetFromCallFrameHeader):
> + (JSC::CTI::emitPutResult):
> + (JSC::CTI::emitCTICall):
> + (JSC::CTI::CTI):
> + (JSC::CTI::compileOpCall):
> + (JSC::CTI::compileOpStrictEq):
> + (JSC::CTI::emitSlowScriptCheck):
> + (JSC::CTI::compileBinaryArithOp):
> + (JSC::CTI::privateCompileMainPass):
> + (JSC::CTI::privateCompileSlowCases):
> + (JSC::CTI::privateCompileGetByIdProto):
> + (JSC::CTI::privateCompilePatchGetArrayLength):
> + * VM/CTI.h:
> + * VM/CodeBlock.h:
> + (JSC::CodeBlock::isTemporaryRegisterIndex):
> + * bytecompiler/CodeGenerator.cpp:
> + (JSC::CodeGenerator::emitLabel):
> +
> 2008-11-12 Alp Toker <alp at nuanti.com>
>
> autotools build system fix-up only. Add FloatQuad.h to the source
> Index: VM/CTI.cpp
> ===================================================================
> --- VM/CTI.cpp (revision 38367)
> +++ VM/CTI.cpp (working copy)
> @@ -173,15 +173,38 @@ static ALWAYS_INLINE uintptr_t asInteger
> return reinterpret_cast<uintptr_t>(value);
> }
>
> +ALWAYS_INLINE void CTI::invalidatePeepholeOptimizations()
> +{
> + m_lastResultBytecodeRegister = std::numeric_limits<int>::max();
> +}
> +
> // get arg puts an arg from the SF register array into a h/w register
> -ALWAYS_INLINE void CTI::emitGetArg(int src, X86Assembler::RegisterID dst)
> +ALWAYS_INLINE void CTI::emitGetArg(int src, X86Assembler::RegisterID dst,
unsigned currentInstructionIndex)
> {
> // TODO: we want to reuse values that are already in registers if we can
- add a register allocator!
> if (m_codeBlock->isConstantRegisterIndex(src)) {
> JSValue* value = m_codeBlock->getConstant(src);
> m_jit.movl_i32r(asInteger(value), dst);
> - } else
> - m_jit.movl_mr(src * sizeof(Register), X86::edi, dst);
> + invalidatePeepholeOptimizations();
> + return;
> + }
> +
> + if (src == m_lastResultBytecodeRegister && dst == X86::eax &&
m_codeBlock->isTemporaryRegisterIndex(src)) {
> + bool atJumpTarget = false;
> + while (m_jumpTargetsPosition < m_codeBlock->jumpTargets.size() &&
m_codeBlock->jumpTargets[m_jumpTargetsPosition] <= currentInstructionIndex) {
> + if (m_codeBlock->jumpTargets[m_jumpTargetsPosition] ==
currentInstructionIndex)
> + atJumpTarget = true;
> + m_jumpTargetsPosition++;
> + }
> +
> + if (!atJumpTarget) {
> + invalidatePeepholeOptimizations();
> + return;
> + }
> + }
> +
> + m_jit.movl_mr(src * sizeof(Register), X86::edi, dst);
> + invalidatePeepholeOptimizations();
> }
>
> // get arg puts an arg from the SF register array onto the stack, as an arg
to a context threaded function.
> @@ -194,17 +217,21 @@ ALWAYS_INLINE void CTI::emitGetPutArg(un
> m_jit.movl_mr(src * sizeof(Register), X86::edi, scratch);
> m_jit.movl_rm(scratch, offset + sizeof(void*), X86::esp);
> }
> +
> + invalidatePeepholeOptimizations();
> }
>
> // puts an arg onto the stack, as an arg to a context threaded function.
> ALWAYS_INLINE void CTI::emitPutArg(X86Assembler::RegisterID src, unsigned
offset)
> {
> m_jit.movl_rm(src, offset + sizeof(void*), X86::esp);
> + invalidatePeepholeOptimizations();
> }
>
> ALWAYS_INLINE void CTI::emitPutArgConstant(unsigned value, unsigned offset)
> {
> m_jit.movl_i32m(value, offset + sizeof(void*), X86::esp);
> + invalidatePeepholeOptimizations();
> }
>
> ALWAYS_INLINE JSValue* CTI::getConstantImmediateNumericArg(unsigned src)
> @@ -219,31 +246,37 @@ ALWAYS_INLINE JSValue* CTI::getConstantI
> ALWAYS_INLINE void CTI::emitPutCTIParam(void* value, unsigned name)
> {
> m_jit.movl_i32m(reinterpret_cast<intptr_t>(value), name * sizeof(void*),
X86::esp);
> + invalidatePeepholeOptimizations();
> }
>
> ALWAYS_INLINE void CTI::emitPutCTIParam(X86Assembler::RegisterID from,
unsigned name)
> {
> m_jit.movl_rm(from, name * sizeof(void*), X86::esp);
> + invalidatePeepholeOptimizations();
> }
>
> ALWAYS_INLINE void CTI::emitGetCTIParam(unsigned name,
X86Assembler::RegisterID to)
> {
> m_jit.movl_mr(name * sizeof(void*), X86::esp, to);
> + invalidatePeepholeOptimizations();
> }
>
> ALWAYS_INLINE void CTI::emitPutToCallFrameHeader(X86Assembler::RegisterID
from, RegisterFile::CallFrameHeaderEntry entry)
> {
> m_jit.movl_rm(from, entry * sizeof(Register), X86::edi);
> + invalidatePeepholeOptimizations();
> }
>
> ALWAYS_INLINE void
CTI::emitGetFromCallFrameHeader(RegisterFile::CallFrameHeaderEntry entry,
X86Assembler::RegisterID to)
> {
> m_jit.movl_mr(entry * sizeof(Register), X86::edi, to);
> + invalidatePeepholeOptimizations();
> }
>
> ALWAYS_INLINE void CTI::emitPutResult(unsigned dst, X86Assembler::RegisterID
from)
> {
> m_jit.movl_rm(from, dst * sizeof(Register), X86::edi);
> + m_lastResultBytecodeRegister = (from == X86::eax) ? dst :
std::numeric_limits<int>::max();
> // FIXME: #ifndef NDEBUG, Write the correct m_type to the register.
> }
>
> @@ -351,6 +384,7 @@ ALWAYS_INLINE X86Assembler::JmpSrc CTI::
> #if ENABLE(OPCODE_SAMPLING)
> m_jit.movl_i32m(m_machine->sampler()->encodeSample(vPC, false),
m_machine->sampler()->sampleSlot());
> #endif
> + invalidatePeepholeOptimizations();
>
> return call;
> }
> @@ -369,6 +403,7 @@ ALWAYS_INLINE X86Assembler::JmpSrc CTI::
> #if ENABLE(OPCODE_SAMPLING)
> m_jit.movl_i32m(m_machine->sampler()->encodeSample(vPC, false),
m_machine->sampler()->sampleSlot());
> #endif
> + invalidatePeepholeOptimizations();
>
> return call;
> }
> @@ -387,6 +422,7 @@ ALWAYS_INLINE X86Assembler::JmpSrc CTI::
> #if ENABLE(OPCODE_SAMPLING)
> m_jit.movl_i32m(m_machine->sampler()->encodeSample(vPC, false),
m_machine->sampler()->sampleSlot());
> #endif
> + invalidatePeepholeOptimizations();
>
> return call;
> }
> @@ -405,6 +441,7 @@ ALWAYS_INLINE X86Assembler::JmpSrc CTI::
> #if ENABLE(OPCODE_SAMPLING)
> m_jit.movl_i32m(m_machine->sampler()->encodeSample(vPC, false),
m_machine->sampler()->sampleSlot());
> #endif
> + invalidatePeepholeOptimizations();
>
> return call;
> }
> @@ -423,6 +460,7 @@ ALWAYS_INLINE X86Assembler::JmpSrc CTI::
> #if ENABLE(OPCODE_SAMPLING)
> m_jit.movl_i32m(m_machine->sampler()->encodeSample(vPC, false),
m_machine->sampler()->sampleSlot());
> #endif
> + invalidatePeepholeOptimizations();
>
> return call;
> }
> @@ -441,6 +479,7 @@ ALWAYS_INLINE X86Assembler::JmpSrc CTI::
> #if ENABLE(OPCODE_SAMPLING)
> m_jit.movl_i32m(m_machine->sampler()->encodeSample(vPC, false),
m_machine->sampler()->sampleSlot());
> #endif
> + invalidatePeepholeOptimizations();
>
> return call;
> }
> @@ -459,6 +498,7 @@ ALWAYS_INLINE X86Assembler::JmpSrc CTI::
> #if ENABLE(OPCODE_SAMPLING)
> m_jit.movl_i32m(m_machine->sampler()->encodeSample(vPC, false),
m_machine->sampler()->sampleSlot());
> #endif
> + invalidatePeepholeOptimizations();
>
> return call;
> }
> @@ -549,6 +589,8 @@ CTI::CTI(JSGlobalData* globalData, CodeB
> , m_labels(codeBlock ? codeBlock->instructions.size() : 0)
> , m_propertyAccessCompilationInfo(codeBlock ?
codeBlock->propertyAccessInstructions.size() : 0)
> , m_callStructureStubCompilationInfo(codeBlock ?
codeBlock->callLinkInfos.size() : 0)
> + , m_lastResultBytecodeRegister(std::numeric_limits<int>::max())
> + , m_jumpTargetsPosition(0)
> {
> }
>
> @@ -637,7 +679,7 @@ void CTI::compileOpCall(OpcodeID opcodeI
> // Handle eval
> X86Assembler::JmpSrc wasEval;
> if (opcodeID == op_call_eval) {
> - emitGetArg(callee, X86::ecx);
> + emitGetArg(callee, X86::ecx, i);
> compileOpCallEvalSetupArgs(instruction);
>
> emitCTICall(instruction, i, Machine::cti_op_call_eval);
> @@ -647,7 +689,7 @@ void CTI::compileOpCall(OpcodeID opcodeI
>
> // This plants a check for a cached JSFunction value, so we can plant a
fast link to the callee.
> // This deliberately leaves the callee in ecx, used when setting up the
stack frame below
> - emitGetArg(callee, X86::ecx);
> + emitGetArg(callee, X86::ecx, i);
> m_jit.cmpl_i32r(asInteger(JSImmediate::impossibleValue()), X86::ecx);
> X86Assembler::JmpDst addressOfLinkedFunctionCheck = m_jit.label();
> m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), i));
> @@ -665,7 +707,7 @@ void CTI::compileOpCall(OpcodeID opcodeI
> emitGetPutArg(proto, 12, X86::eax);
> emitCTICall(instruction, i, Machine::cti_op_construct_JSConstruct);
> emitPutResult(thisRegister);
> - emitGetArg(callee, X86::ecx);
> + emitGetArg(callee, X86::ecx, i);
> }
>
> // Fast version of stack frame initialization, directly relative to edi.
> @@ -700,8 +742,8 @@ void CTI::compileOpStrictEq(Instruction*
> unsigned src1 = instruction[2].u.operand;
> unsigned src2 = instruction[3].u.operand;
>
> - emitGetArg(src1, X86::eax);
> - emitGetArg(src2, X86::edx);
> + emitGetArg(src1, X86::eax, i);
> + emitGetArg(src2, X86::edx, i);
>
> m_jit.testl_i32r(JSImmediate::TagMask, X86::eax);
> X86Assembler::JmpSrc firstNotImmediate = m_jit.emitUnlinkedJe();
> @@ -758,6 +800,8 @@ void CTI::emitSlowScriptCheck(Instructio
> m_jit.movl_mr(OBJECT_OFFSET(JSGlobalData, machine), X86::ecx, X86::ecx);
> m_jit.movl_mr(OBJECT_OFFSET(Machine, m_ticksUntilNextTimeoutCheck),
X86::ecx, X86::esi);
> m_jit.link(skipTimeout, m_jit.label());
> +
> + invalidatePeepholeOptimizations();
> }
>
> /*
> @@ -806,8 +850,8 @@ void CTI::compileBinaryArithOp(OpcodeID
> X86Assembler::JmpSrc wasJSNumberCell2;
> X86Assembler::JmpSrc wasJSNumberCell2b;
>
> - emitGetArg(src1, X86::eax);
> - emitGetArg(src2, X86::edx);
> + emitGetArg(src1, X86::eax, i);
> + emitGetArg(src2, X86::edx, i);
>
> if (types.second().isReusable() && isSSE2Present()) {
> ASSERT(types.second().mightBeNumber());
> @@ -946,6 +990,10 @@ void CTI::compileBinaryArithOp(OpcodeID
> m_jit.link(wasJSNumberCell1, m_jit.label());
> m_jit.link(wasJSNumberCell1b, m_jit.label());
> }
> +
> + // FIXME: make the different cases of this function all use eax as the
> + // destination register and enable the register caching optimization.
> + invalidatePeepholeOptimizations();
> }
>
> void CTI::compileBinaryArithOpSlowCase(Instruction* vPC, OpcodeID opcodeID,
Vector<SlowCaseEntry>::iterator& iter, unsigned dst, unsigned src1, unsigned
src2, OperandTypes types, unsigned i)
> @@ -1016,7 +1064,7 @@ void CTI::privateCompileMainPass()
> if (m_codeBlock->isConstantRegisterIndex(src))
> m_jit.movl_i32r(asInteger(m_codeBlock->getConstant(src)),
X86::eax);
> else
> - emitGetArg(src, X86::eax);
> + emitGetArg(src, X86::eax, i);
> emitPutResult(instruction[i + 1].u.operand);
> i += 3;
> break;
> @@ -1027,13 +1075,13 @@ void CTI::privateCompileMainPass()
> unsigned src2 = instruction[i + 3].u.operand;
>
> if (JSValue* value = getConstantImmediateNumericArg(src1)) {
> - emitGetArg(src2, X86::edx);
> + emitGetArg(src2, X86::edx, i);
> emitJumpSlowCaseIfNotImmNum(X86::edx, i);
> m_jit.addl_i32r(getDeTaggedConstantImmediate(value),
X86::edx);
> m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJo(),
i));
> emitPutResult(dst, X86::edx);
> } else if (JSValue* value =
getConstantImmediateNumericArg(src2)) {
> - emitGetArg(src1, X86::eax);
> + emitGetArg(src1, X86::eax, i);
> emitJumpSlowCaseIfNotImmNum(X86::eax, i);
> m_jit.addl_i32r(getDeTaggedConstantImmediate(value),
X86::eax);
> m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJo(),
i));
> @@ -1056,7 +1104,7 @@ void CTI::privateCompileMainPass()
> case op_end: {
> if (m_codeBlock->needsFullScopeChain)
> emitCTICall(instruction + i, i, Machine::cti_op_end);
> - emitGetArg(instruction[i + 1].u.operand, X86::eax);
> + emitGetArg(instruction[i + 1].u.operand, X86::eax, i);
> m_jit.pushl_m(RegisterFile::ReturnPC *
static_cast<int>(sizeof(Register)), X86::edi);
> m_jit.ret();
> i += 2;
> @@ -1070,7 +1118,7 @@ void CTI::privateCompileMainPass()
> }
> case op_pre_inc: {
> int srcDst = instruction[i + 1].u.operand;
> - emitGetArg(srcDst, X86::eax);
> + emitGetArg(srcDst, X86::eax, i);
> emitJumpSlowCaseIfNotImmNum(X86::eax, i);
>
m_jit.addl_i8r(getDeTaggedConstantImmediate(JSImmediate::oneImmediate()),
X86::eax);
> m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJo(), i));
> @@ -1092,13 +1140,13 @@ void CTI::privateCompileMainPass()
> unsigned target = instruction[i + 3].u.operand;
> JSValue* src2imm = getConstantImmediateNumericArg(instruction[i
+ 2].u.operand);
> if (src2imm) {
> - emitGetArg(instruction[i + 1].u.operand, X86::edx);
> + emitGetArg(instruction[i + 1].u.operand, X86::edx, i);
> emitJumpSlowCaseIfNotImmNum(X86::edx, i);
> m_jit.cmpl_i32r(asInteger(src2imm), X86::edx);
> m_jmpTable.append(JmpTable(m_jit.emitUnlinkedJl(), i + 3 +
target));
> } else {
> - emitGetArg(instruction[i + 1].u.operand, X86::eax);
> - emitGetArg(instruction[i + 2].u.operand, X86::edx);
> + emitGetArg(instruction[i + 1].u.operand, X86::eax, i);
> + emitGetArg(instruction[i + 2].u.operand, X86::edx, i);
> emitJumpSlowCaseIfNotImmNum(X86::eax, i);
> emitJumpSlowCaseIfNotImmNum(X86::edx, i);
> m_jit.cmpl_rr(X86::edx, X86::eax);
> @@ -1113,13 +1161,13 @@ void CTI::privateCompileMainPass()
> unsigned target = instruction[i + 3].u.operand;
> JSValue* src2imm = getConstantImmediateNumericArg(instruction[i
+ 2].u.operand);
> if (src2imm) {
> - emitGetArg(instruction[i + 1].u.operand, X86::edx);
> + emitGetArg(instruction[i + 1].u.operand, X86::edx, i);
> emitJumpSlowCaseIfNotImmNum(X86::edx, i);
> m_jit.cmpl_i32r(asInteger(src2imm), X86::edx);
> m_jmpTable.append(JmpTable(m_jit.emitUnlinkedJle(), i + 3 +
target));
> } else {
> - emitGetArg(instruction[i + 1].u.operand, X86::eax);
> - emitGetArg(instruction[i + 2].u.operand, X86::edx);
> + emitGetArg(instruction[i + 1].u.operand, X86::eax, i);
> + emitGetArg(instruction[i + 2].u.operand, X86::edx, i);
> emitJumpSlowCaseIfNotImmNum(X86::eax, i);
> emitJumpSlowCaseIfNotImmNum(X86::edx, i);
> m_jit.cmpl_rr(X86::edx, X86::eax);
> @@ -1139,8 +1187,8 @@ void CTI::privateCompileMainPass()
> // to just after the arguments have been loaded into registers
'hotPathBegin', and we generate code
> // such that the StructureID & offset are always at the same
distance from this.
>
> - emitGetArg(instruction[i + 1].u.operand, X86::eax);
> - emitGetArg(instruction[i + 3].u.operand, X86::edx);
> + emitGetArg(instruction[i + 1].u.operand, X86::eax, i);
> + emitGetArg(instruction[i + 3].u.operand, X86::edx, i);
>
>
ASSERT(m_codeBlock->propertyAccessInstructions[propertyAccessInstructionIndex].
opcodeIndex == i);
> X86Assembler::JmpDst hotPathBegin = m_jit.label();
> @@ -1168,7 +1216,7 @@ void CTI::privateCompileMainPass()
> // to array-length / prototype access tranpolines, and finally
we also the the property-map access offset as a label
> // to jump back to if one of these trampolies finds a match.
>
> - emitGetArg(instruction[i + 2].u.operand, X86::eax);
> + emitGetArg(instruction[i + 2].u.operand, X86::eax, i);
>
>
ASSERT(m_codeBlock->propertyAccessInstructions[propertyAccessInstructionIndex].
opcodeIndex == i);
>
> @@ -1183,17 +1231,17 @@ void CTI::privateCompileMainPass()
> ASSERT(X86Assembler::getDifferenceBetweenLabels(hotPathBegin,
m_jit.label()) == repatchOffsetGetByIdBranchToSlowCase);
>
> m_jit.movl_mr(OBJECT_OFFSET(JSObject, m_propertyStorage),
X86::eax, X86::eax);
> - m_jit.movl_mr(repatchGetByIdDefaultOffset, X86::eax, X86::ecx);
> + m_jit.movl_mr(repatchGetByIdDefaultOffset, X86::eax, X86::eax);
> ASSERT(X86Assembler::getDifferenceBetweenLabels(hotPathBegin,
m_jit.label()) == repatchOffsetGetByIdPropertyMapOffset);
> - emitPutResult(instruction[i + 1].u.operand, X86::ecx);
> + emitPutResult(instruction[i + 1].u.operand);
>
> i += 8;
> break;
> }
> case op_instanceof: {
> - emitGetArg(instruction[i + 2].u.operand, X86::eax); // value
> - emitGetArg(instruction[i + 3].u.operand, X86::ecx); // baseVal
> - emitGetArg(instruction[i + 4].u.operand, X86::edx); // proto
> + emitGetArg(instruction[i + 2].u.operand, X86::eax, i); // value
> + emitGetArg(instruction[i + 3].u.operand, X86::ecx, i); //
baseVal
> + emitGetArg(instruction[i + 4].u.operand, X86::edx, i); // proto
>
> // check if any are immediates
> m_jit.orl_rr(X86::eax, X86::ecx);
> @@ -1210,7 +1258,7 @@ void CTI::privateCompileMainPass()
> m_jit.movl_mr(OBJECT_OFFSET(JSCell, m_structureID), X86::edx,
X86::edx);
> m_jit.subl_mr(OBJECT_OFFSET(StructureID, m_typeInfo.m_type),
X86::eax, X86::ecx);
> m_jit.subl_mr(OBJECT_OFFSET(StructureID, m_typeInfo.m_type),
X86::edx, X86::ecx);
> - emitGetArg(instruction[i + 3].u.operand, X86::edx); // reload
baseVal
> + emitGetArg(instruction[i + 3].u.operand, X86::edx, i); // reload
baseVal
> m_jit.movl_mr(OBJECT_OFFSET(JSCell, m_structureID), X86::edx,
X86::edx);
> m_jit.cmpl_rm(X86::ecx, OBJECT_OFFSET(StructureID,
m_typeInfo.m_type), X86::edx);
>
> @@ -1223,8 +1271,8 @@ void CTI::privateCompileMainPass()
>
> m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), i));
>
> - emitGetArg(instruction[i + 2].u.operand, X86::ecx); // reload
value
> - emitGetArg(instruction[i + 4].u.operand, X86::edx); // reload
proto
> + emitGetArg(instruction[i + 2].u.operand, X86::ecx, i); // reload
value
> + emitGetArg(instruction[i + 4].u.operand, X86::edx, i); // reload
proto
>
> // optimistically load true result
> m_jit.movl_i32r(asInteger(jsBoolean(true)), X86::eax);
> @@ -1270,7 +1318,7 @@ void CTI::privateCompileMainPass()
> JSValue* src2Value = getConstantImmediateNumericArg(src2);
> int32_t value;
> if (src1Value && ((value = JSImmediate::intValue(src1Value)) >
0)) {
> - emitGetArg(src2, X86::eax);
> + emitGetArg(src2, X86::eax, i);
> emitJumpSlowCaseIfNotImmNum(X86::eax, i);
> emitFastArithDeTagImmediate(X86::eax);
> m_jit.imull_i32r(X86::eax, value, X86::eax);
> @@ -1278,7 +1326,7 @@ void CTI::privateCompileMainPass()
> emitFastArithReTagImmediate(X86::eax);
> emitPutResult(dst);
> } else if (src2Value && ((value =
JSImmediate::intValue(src2Value)) > 0)) {
> - emitGetArg(src1, X86::eax);
> + emitGetArg(src1, X86::eax, i);
> emitJumpSlowCaseIfNotImmNum(X86::eax, i);
> emitFastArithDeTagImmediate(X86::eax);
> m_jit.imull_i32r(X86::eax, value, X86::eax);
> @@ -1317,7 +1365,7 @@ void CTI::privateCompileMainPass()
> case op_put_global_var: {
> JSVariableObject* globalObject =
static_cast<JSVariableObject*>(instruction[i + 1].u.jsCell);
> m_jit.movl_i32r(asInteger(globalObject), X86::eax);
> - emitGetArg(instruction[i + 3].u.operand, X86::edx);
> + emitGetArg(instruction[i + 3].u.operand, X86::edx, i);
> emitPutVariableObjectRegister(X86::edx, X86::eax, instruction[i
+ 2].u.operand);
> i += 4;
> break;
> @@ -1325,7 +1373,7 @@ void CTI::privateCompileMainPass()
> case op_get_scoped_var: {
> int skip = instruction[i + 3].u.operand +
m_codeBlock->needsFullScopeChain;
>
> - emitGetArg(RegisterFile::ScopeChain, X86::eax);
> + emitGetArg(RegisterFile::ScopeChain, X86::eax, i);
> while (skip--)
> m_jit.movl_mr(OBJECT_OFFSET(ScopeChainNode, next), X86::eax,
X86::eax);
>
> @@ -1338,8 +1386,8 @@ void CTI::privateCompileMainPass()
> case op_put_scoped_var: {
> int skip = instruction[i + 2].u.operand +
m_codeBlock->needsFullScopeChain;
>
> - emitGetArg(RegisterFile::ScopeChain, X86::edx);
> - emitGetArg(instruction[i + 3].u.operand, X86::eax);
> + emitGetArg(RegisterFile::ScopeChain, X86::edx, i);
> + emitGetArg(instruction[i + 3].u.operand, X86::eax, i);
> while (skip--)
> m_jit.movl_mr(OBJECT_OFFSET(ScopeChainNode, next), X86::edx,
X86::edx);
>
> @@ -1365,13 +1413,13 @@ void CTI::privateCompileMainPass()
> emitCTICall(instruction + i, i,
Machine::cti_op_ret_scopeChain);
>
> // Return the result in %eax.
> - emitGetArg(instruction[i + 1].u.operand, X86::eax);
> + emitGetArg(instruction[i + 1].u.operand, X86::eax, i);
>
> // Grab the return address.
> - emitGetArg(RegisterFile::ReturnPC, X86::edx);
> + emitGetArg(RegisterFile::ReturnPC, X86::edx, i);
>
> // Restore our caller's "r".
> - emitGetArg(RegisterFile::CallerFrame, X86::edi);
> + emitGetArg(RegisterFile::CallerFrame, X86::edi, i);
>
> // Return.
> m_jit.pushl_r(X86::edx);
> @@ -1398,8 +1446,8 @@ void CTI::privateCompileMainPass()
> break;
> }
> case op_construct_verify: {
> - emitGetArg(instruction[i + 1].u.operand, X86::eax);
> -
> + emitGetArg(instruction[i + 1].u.operand, X86::eax, i);
> +
> m_jit.testl_i32r(JSImmediate::TagMask, X86::eax);
> X86Assembler::JmpSrc isImmediate = m_jit.emitUnlinkedJne();
> m_jit.movl_mr(OBJECT_OFFSET(JSCell, m_structureID), X86::eax,
X86::ecx);
> @@ -1407,7 +1455,7 @@ void CTI::privateCompileMainPass()
> X86Assembler::JmpSrc isObject = m_jit.emitUnlinkedJe();
>
> m_jit.link(isImmediate, m_jit.label());
> - emitGetArg(instruction[i + 2].u.operand, X86::eax);
> + emitGetArg(instruction[i + 2].u.operand, X86::eax, i);
> emitPutResult(instruction[i + 1].u.operand);
> m_jit.link(isObject, m_jit.label());
>
> @@ -1415,8 +1463,8 @@ void CTI::privateCompileMainPass()
> break;
> }
> case op_get_by_val: {
> - emitGetArg(instruction[i + 2].u.operand, X86::eax);
> - emitGetArg(instruction[i + 3].u.operand, X86::edx);
> + emitGetArg(instruction[i + 2].u.operand, X86::eax, i);
> + emitGetArg(instruction[i + 3].u.operand, X86::edx, i);
> emitJumpSlowCaseIfNotImmNum(X86::edx, i);
> emitFastArithImmToInt(X86::edx);
> m_jit.testl_i32r(JSImmediate::TagMask, X86::eax);
> @@ -1450,8 +1498,8 @@ void CTI::privateCompileMainPass()
> break;
> }
> case op_put_by_val: {
> - emitGetArg(instruction[i + 1].u.operand, X86::eax);
> - emitGetArg(instruction[i + 2].u.operand, X86::edx);
> + emitGetArg(instruction[i + 1].u.operand, X86::eax, i);
> + emitGetArg(instruction[i + 2].u.operand, X86::edx, i);
> emitJumpSlowCaseIfNotImmNum(X86::edx, i);
> emitFastArithImmToInt(X86::edx);
> m_jit.testl_i32r(JSImmediate::TagMask, X86::eax);
> @@ -1474,7 +1522,7 @@ void CTI::privateCompileMainPass()
>
> // All good - put the value into the array.
> m_jit.link(inFastVector, m_jit.label());
> - emitGetArg(instruction[i + 3].u.operand, X86::eax);
> + emitGetArg(instruction[i + 3].u.operand, X86::eax, i);
> m_jit.movl_rm(X86::eax, OBJECT_OFFSET(ArrayStorage,
m_vector[0]), X86::ecx, X86::edx, sizeof(JSValue*));
> i += 4;
> break;
> @@ -1484,7 +1532,7 @@ void CTI::privateCompileMainPass()
> emitSlowScriptCheck(instruction + i, i);
>
> unsigned target = instruction[i + 2].u.operand;
> - emitGetArg(instruction[i + 1].u.operand, X86::eax);
> + emitGetArg(instruction[i + 1].u.operand, X86::eax, i);
>
> m_jit.cmpl_i32r(asInteger(JSImmediate::zeroImmediate()),
X86::eax);
> X86Assembler::JmpSrc isZero = m_jit.emitUnlinkedJe();
> @@ -1509,7 +1557,7 @@ void CTI::privateCompileMainPass()
> break;
> }
> case op_negate: {
> - emitGetArg(instruction[i + 2].u.operand, X86::eax);
> + emitGetArg(instruction[i + 2].u.operand, X86::eax, i);
> m_jit.testl_i32r(JSImmediate::TagBitTypeInteger, X86::eax);
> X86Assembler::JmpSrc notImmediate = m_jit.emitUnlinkedJe();
>
> @@ -1599,7 +1647,7 @@ void CTI::privateCompileMainPass()
> CTI_COMPILE_BINARY_OP(op_div)
> case op_pre_dec: {
> int srcDst = instruction[i + 1].u.operand;
> - emitGetArg(srcDst, X86::eax);
> + emitGetArg(srcDst, X86::eax, i);
> emitJumpSlowCaseIfNotImmNum(X86::eax, i);
>
m_jit.subl_i8r(getDeTaggedConstantImmediate(JSImmediate::oneImmediate()),
X86::eax);
> m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJo(), i));
> @@ -1611,13 +1659,13 @@ void CTI::privateCompileMainPass()
> unsigned target = instruction[i + 3].u.operand;
> JSValue* src2imm = getConstantImmediateNumericArg(instruction[i
+ 2].u.operand);
> if (src2imm) {
> - emitGetArg(instruction[i + 1].u.operand, X86::edx);
> + emitGetArg(instruction[i + 1].u.operand, X86::edx, i);
> emitJumpSlowCaseIfNotImmNum(X86::edx, i);
> m_jit.cmpl_i32r(asInteger(src2imm), X86::edx);
> m_jmpTable.append(JmpTable(m_jit.emitUnlinkedJge(), i + 3 +
target));
> } else {
> - emitGetArg(instruction[i + 1].u.operand, X86::eax);
> - emitGetArg(instruction[i + 2].u.operand, X86::edx);
> + emitGetArg(instruction[i + 1].u.operand, X86::eax, i);
> + emitGetArg(instruction[i + 2].u.operand, X86::edx, i);
> emitJumpSlowCaseIfNotImmNum(X86::eax, i);
> emitJumpSlowCaseIfNotImmNum(X86::edx, i);
> m_jit.cmpl_rr(X86::edx, X86::eax);
> @@ -1627,7 +1675,7 @@ void CTI::privateCompileMainPass()
> break;
> }
> case op_not: {
> - emitGetArg(instruction[i + 2].u.operand, X86::eax);
> + emitGetArg(instruction[i + 2].u.operand, X86::eax, i);
> m_jit.xorl_i8r(JSImmediate::FullTagTypeBool, X86::eax);
> m_jit.testl_i32r(JSImmediate::FullTagTypeMask, X86::eax); // i8?
> m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), i));
> @@ -1638,7 +1686,7 @@ void CTI::privateCompileMainPass()
> }
> case op_jfalse: {
> unsigned target = instruction[i + 2].u.operand;
> - emitGetArg(instruction[i + 1].u.operand, X86::eax);
> + emitGetArg(instruction[i + 1].u.operand, X86::eax, i);
>
> m_jit.cmpl_i32r(asInteger(JSImmediate::zeroImmediate()),
X86::eax);
> m_jmpTable.append(JmpTable(m_jit.emitUnlinkedJe(), i + 2 +
target));
> @@ -1658,7 +1706,7 @@ void CTI::privateCompileMainPass()
> unsigned src = instruction[i + 1].u.operand;
> unsigned target = instruction[i + 2].u.operand;
>
> - emitGetArg(src, X86::eax);
> + emitGetArg(src, X86::eax, i);
> m_jit.testl_i32r(JSImmediate::TagMask, X86::eax);
> X86Assembler::JmpSrc isImmediate = m_jit.emitUnlinkedJnz();
>
> @@ -1688,7 +1736,7 @@ void CTI::privateCompileMainPass()
> unsigned src = instruction[i + 1].u.operand;
> unsigned target = instruction[i + 2].u.operand;
>
> - emitGetArg(src, X86::eax);
> + emitGetArg(src, X86::eax, i);
> m_jit.testl_i32r(JSImmediate::TagMask, X86::eax);
> X86Assembler::JmpSrc isImmediate = m_jit.emitUnlinkedJnz();
>
> @@ -1716,7 +1764,7 @@ void CTI::privateCompileMainPass()
> }
> case op_post_inc: {
> int srcDst = instruction[i + 2].u.operand;
> - emitGetArg(srcDst, X86::eax);
> + emitGetArg(srcDst, X86::eax, i);
> m_jit.movl_rr(X86::eax, X86::edx);
> emitJumpSlowCaseIfNotImmNum(X86::eax, i);
>
m_jit.addl_i8r(getDeTaggedConstantImmediate(JSImmediate::oneImmediate()),
X86::edx);
> @@ -1750,8 +1798,8 @@ void CTI::privateCompileMainPass()
> break;
> }
> case op_eq: {
> - emitGetArg(instruction[i + 2].u.operand, X86::eax);
> - emitGetArg(instruction[i + 3].u.operand, X86::edx);
> + emitGetArg(instruction[i + 2].u.operand, X86::eax, i);
> + emitGetArg(instruction[i + 3].u.operand, X86::edx, i);
> emitJumpSlowCaseIfNotImmNums(X86::eax, X86::edx, i);
> m_jit.cmpl_rr(X86::edx, X86::eax);
> m_jit.sete_r(X86::eax);
> @@ -1762,8 +1810,8 @@ void CTI::privateCompileMainPass()
> break;
> }
> case op_lshift: {
> - emitGetArg(instruction[i + 2].u.operand, X86::eax);
> - emitGetArg(instruction[i + 3].u.operand, X86::ecx);
> + emitGetArg(instruction[i + 2].u.operand, X86::eax, i);
> + emitGetArg(instruction[i + 3].u.operand, X86::ecx, i);
> emitJumpSlowCaseIfNotImmNum(X86::eax, i);
> emitJumpSlowCaseIfNotImmNum(X86::ecx, i);
> emitFastArithImmToInt(X86::eax);
> @@ -1779,18 +1827,18 @@ void CTI::privateCompileMainPass()
> unsigned src2 = instruction[i + 3].u.operand;
> unsigned dst = instruction[i + 1].u.operand;
> if (JSValue* value = getConstantImmediateNumericArg(src1)) {
> - emitGetArg(src2, X86::eax);
> + emitGetArg(src2, X86::eax, i);
> emitJumpSlowCaseIfNotImmNum(X86::eax, i);
> m_jit.andl_i32r(asInteger(value), X86::eax); // FIXME: make
it more obvious this is relying on the format of JSImmediate
> emitPutResult(dst);
> } else if (JSValue* value =
getConstantImmediateNumericArg(src2)) {
> - emitGetArg(src1, X86::eax);
> + emitGetArg(src1, X86::eax, i);
> emitJumpSlowCaseIfNotImmNum(X86::eax, i);
> m_jit.andl_i32r(asInteger(value), X86::eax);
> emitPutResult(dst);
> } else {
> - emitGetArg(src1, X86::eax);
> - emitGetArg(src2, X86::edx);
> + emitGetArg(src1, X86::eax, i);
> + emitGetArg(src2, X86::edx, i);
> m_jit.andl_rr(X86::edx, X86::eax);
> emitJumpSlowCaseIfNotImmNum(X86::eax, i);
> emitPutResult(dst);
> @@ -1799,8 +1847,8 @@ void CTI::privateCompileMainPass()
> break;
> }
> case op_rshift: {
> - emitGetArg(instruction[i + 2].u.operand, X86::eax);
> - emitGetArg(instruction[i + 3].u.operand, X86::ecx);
> + emitGetArg(instruction[i + 2].u.operand, X86::eax, i);
> + emitGetArg(instruction[i + 3].u.operand, X86::ecx, i);
> emitJumpSlowCaseIfNotImmNum(X86::eax, i);
> emitJumpSlowCaseIfNotImmNum(X86::ecx, i);
> emitFastArithImmToInt(X86::ecx);
> @@ -1811,7 +1859,7 @@ void CTI::privateCompileMainPass()
> break;
> }
> case op_bitnot: {
> - emitGetArg(instruction[i + 2].u.operand, X86::eax);
> + emitGetArg(instruction[i + 2].u.operand, X86::eax, i);
> emitJumpSlowCaseIfNotImmNum(X86::eax, i);
> m_jit.xorl_i8r(~JSImmediate::TagBitTypeInteger, X86::eax);
> emitPutResult(instruction[i + 1].u.operand);
> @@ -1836,8 +1884,8 @@ void CTI::privateCompileMainPass()
> break;
> }
> case op_mod: {
> - emitGetArg(instruction[i + 2].u.operand, X86::eax);
> - emitGetArg(instruction[i + 3].u.operand, X86::ecx);
> + emitGetArg(instruction[i + 2].u.operand, X86::eax, i);
> + emitGetArg(instruction[i + 3].u.operand, X86::ecx, i);
> emitJumpSlowCaseIfNotImmNum(X86::eax, i);
> emitJumpSlowCaseIfNotImmNum(X86::ecx, i);
> emitFastArithDeTagImmediate(X86::eax);
> @@ -1852,7 +1900,7 @@ void CTI::privateCompileMainPass()
> }
> case op_jtrue: {
> unsigned target = instruction[i + 2].u.operand;
> - emitGetArg(instruction[i + 1].u.operand, X86::eax);
> + emitGetArg(instruction[i + 1].u.operand, X86::eax, i);
>
> m_jit.cmpl_i32r(asInteger(JSImmediate::zeroImmediate()),
X86::eax);
> X86Assembler::JmpSrc isZero = m_jit.emitUnlinkedJe();
> @@ -1870,8 +1918,8 @@ void CTI::privateCompileMainPass()
> }
> CTI_COMPILE_BINARY_OP(op_less)
> case op_neq: {
> - emitGetArg(instruction[i + 2].u.operand, X86::eax);
> - emitGetArg(instruction[i + 3].u.operand, X86::edx);
> + emitGetArg(instruction[i + 2].u.operand, X86::eax, i);
> + emitGetArg(instruction[i + 3].u.operand, X86::edx, i);
> emitJumpSlowCaseIfNotImmNums(X86::eax, X86::edx, i);
> m_jit.cmpl_rr(X86::eax, X86::edx);
>
> @@ -1886,7 +1934,7 @@ void CTI::privateCompileMainPass()
> }
> case op_post_dec: {
> int srcDst = instruction[i + 2].u.operand;
> - emitGetArg(srcDst, X86::eax);
> + emitGetArg(srcDst, X86::eax, i);
> m_jit.movl_rr(X86::eax, X86::edx);
> emitJumpSlowCaseIfNotImmNum(X86::eax, i);
>
m_jit.subl_i8r(getDeTaggedConstantImmediate(JSImmediate::oneImmediate()),
X86::edx);
> @@ -1898,8 +1946,8 @@ void CTI::privateCompileMainPass()
> }
> CTI_COMPILE_BINARY_OP(op_urshift)
> case op_bitxor: {
> - emitGetArg(instruction[i + 2].u.operand, X86::eax);
> - emitGetArg(instruction[i + 3].u.operand, X86::edx);
> + emitGetArg(instruction[i + 2].u.operand, X86::eax, i);
> + emitGetArg(instruction[i + 3].u.operand, X86::edx, i);
> emitJumpSlowCaseIfNotImmNums(X86::eax, X86::edx, i);
> m_jit.xorl_rr(X86::edx, X86::eax);
> emitFastArithReTagImmediate(X86::eax);
> @@ -1916,8 +1964,8 @@ void CTI::privateCompileMainPass()
> break;
> }
> case op_bitor: {
> - emitGetArg(instruction[i + 2].u.operand, X86::eax);
> - emitGetArg(instruction[i + 3].u.operand, X86::edx);
> + emitGetArg(instruction[i + 2].u.operand, X86::eax, i);
> + emitGetArg(instruction[i + 3].u.operand, X86::edx, i);
> emitJumpSlowCaseIfNotImmNums(X86::eax, X86::edx, i);
> m_jit.orl_rr(X86::edx, X86::eax);
> emitPutResult(instruction[i + 1].u.operand);
> @@ -1983,7 +2031,7 @@ void CTI::privateCompileMainPass()
> break;
> }
> case op_to_jsnumber: {
> - emitGetArg(instruction[i + 2].u.operand, X86::eax);
> + emitGetArg(instruction[i + 2].u.operand, X86::eax, i);
>
> m_jit.testl_i32r(JSImmediate::TagBitTypeInteger, X86::eax);
> X86Assembler::JmpSrc wasImmediate = m_jit.emitUnlinkedJnz();
> @@ -2139,7 +2187,7 @@ void CTI::privateCompileMainPass()
> unsigned dst = instruction[i + 1].u.operand;
> unsigned src1 = instruction[i + 2].u.operand;
>
> - emitGetArg(src1, X86::eax);
> + emitGetArg(src1, X86::eax, i);
> m_jit.testl_i32r(JSImmediate::TagMask, X86::eax);
> X86Assembler::JmpSrc isImmediate = m_jit.emitUnlinkedJnz();
>
> @@ -2169,7 +2217,7 @@ void CTI::privateCompileMainPass()
> unsigned dst = instruction[i + 1].u.operand;
> unsigned src1 = instruction[i + 2].u.operand;
>
> - emitGetArg(src1, X86::eax);
> + emitGetArg(src1, X86::eax, i);
> m_jit.testl_i32r(JSImmediate::TagMask, X86::eax);
> X86Assembler::JmpSrc isImmediate = m_jit.emitUnlinkedJnz();
>
> @@ -2226,7 +2274,7 @@ void CTI::privateCompileMainPass()
> break;
> }
> case op_convert_this: {
> - emitGetArg(instruction[i + 1].u.operand, X86::eax);
> + emitGetArg(instruction[i + 1].u.operand, X86::eax, i);
>
> emitJumpSlowCaseIfNotJSCell(X86::eax, i);
> m_jit.movl_mr(OBJECT_OFFSET(JSCell, m_structureID), X86::eax,
X86::edx);
> @@ -2302,6 +2350,9 @@ void CTI::privateCompileSlowCases()
>
> Instruction* instruction = m_codeBlock->instructions.begin();
> for (Vector<SlowCaseEntry>::iterator iter = m_slowCases.begin(); iter !=
m_slowCases.end(); ++iter) {
> + // FIXME: enable peephole optimizations for slow cases when
applicable
> + invalidatePeepholeOptimizations();
> +
> unsigned i = iter->to;
> switch (OpcodeID opcodeID =
m_machine->getOpcodeID(instruction[i].u.opcode)) {
> case op_convert_this: {
> @@ -2372,8 +2423,9 @@ void CTI::privateCompileSlowCases()
> // Check whether the value loaded is zero; if so we need to
return undefined.
> m_jit.testl_rr(X86::ecx, X86::ecx);
> m_jit.link(m_jit.emitUnlinkedJe(), beginGetByValSlow);
> - emitPutResult(instruction[i + 1].u.operand, X86::ecx);
> -
> + m_jit.movl_rr(X86::ecx, X86::eax);
> + emitPutResult(instruction[i + 1].u.operand, X86::eax);
> +
> i += 4;
> break;
> }
> @@ -2404,8 +2456,8 @@ void CTI::privateCompileSlowCases()
> X86Assembler::JmpSrc notImm1 = iter->from;
> X86Assembler::JmpSrc notImm2 = (++iter)->from;
> m_jit.link((++iter)->from, m_jit.label());
> - emitGetArg(instruction[i + 2].u.operand, X86::eax);
> - emitGetArg(instruction[i + 3].u.operand, X86::ecx);
> + emitGetArg(instruction[i + 2].u.operand, X86::eax, i);
> + emitGetArg(instruction[i + 3].u.operand, X86::ecx, i);
> m_jit.link(notImm1, m_jit.label());
> m_jit.link(notImm2, m_jit.label());
> emitPutArg(X86::eax, 0);
> @@ -2528,7 +2580,7 @@ void CTI::privateCompileSlowCases()
> m_jit.link((++iter)->from, m_jit.label());
> emitFastArithIntToImmNoCheck(X86::edx);
> m_jit.link(notImm, m_jit.label());
> - emitGetArg(instruction[i + 3].u.operand, X86::ecx);
> + emitGetArg(instruction[i + 3].u.operand, X86::ecx, i);
> emitPutArg(X86::eax, 0);
> emitPutArg(X86::edx, 4);
> emitPutArg(X86::ecx, 8);
> @@ -2538,7 +2590,7 @@ void CTI::privateCompileSlowCases()
> // slow cases for immediate int accesses to arrays
> m_jit.link((++iter)->from, m_jit.label());
> m_jit.link((++iter)->from, m_jit.label());
> - emitGetArg(instruction[i + 3].u.operand, X86::ecx);
> + emitGetArg(instruction[i + 3].u.operand, X86::ecx, i);
> emitPutArg(X86::eax, 0);
> emitPutArg(X86::edx, 4);
> emitPutArg(X86::ecx, 8);
> @@ -2618,8 +2670,8 @@ void CTI::privateCompileSlowCases()
> m_jit.link((++iter)->from, m_jit.label());
> emitPutArg(X86::eax, 0);
> emitCTICall(instruction + i, i, Machine::cti_op_post_inc);
> - emitPutResult(instruction[i + 1].u.operand);
> emitPutResult(srcDst, X86::edx);
> + emitPutResult(instruction[i + 1].u.operand);
> i += 3;
> break;
> }
> @@ -2673,8 +2725,8 @@ void CTI::privateCompileSlowCases()
> m_jit.link((++iter)->from, m_jit.label());
> emitPutArg(X86::eax, 0);
> emitCTICall(instruction + i, i, Machine::cti_op_post_dec);
> - emitPutResult(instruction[i + 1].u.operand);
> emitPutResult(srcDst, X86::edx);
> + emitPutResult(instruction[i + 1].u.operand);
> i += 3;
> break;
> }
> @@ -2794,7 +2846,7 @@ void CTI::privateCompileSlowCases()
> if (opcodeID == op_construct) {
> emitCTICall(instruction, i,
Machine::cti_op_construct_JSConstruct);
> emitPutResult(registerOffset -
RegisterFile::CallFrameHeaderSize - argCount);
> - emitGetArg(callee, X86::ecx);
> + emitGetArg(callee, X86::ecx, i);
> }
>
> // Load the callee CodeBlock* into eax
> @@ -2803,7 +2855,7 @@ void CTI::privateCompileSlowCases()
> m_jit.testl_rr(X86::eax, X86::eax);
> X86Assembler::JmpSrc hasCodeBlockForLink =
m_jit.emitUnlinkedJne();
> emitCTICall(instruction + i, i,
Machine::cti_op_call_JSFunction);
> - emitGetArg(callee, X86::ecx);
> + emitGetArg(callee, X86::ecx, i);
> m_jit.link(hasCodeBlockForLink, m_jit.label());
>
> // Speculatively roll the callframe, assuming argCount will
match the arity.
> @@ -2815,7 +2867,7 @@ void CTI::privateCompileSlowCases()
> X86Assembler::JmpSrc arityCheckOkayForLink =
m_jit.emitUnlinkedJe();
> emitPutArg(X86::eax, 12);
> emitCTICall(instruction + i, i,
Machine::cti_op_call_arityCheck);
> - emitGetArg(callee - registerOffset, X86::ecx);
> + emitGetArg(callee - registerOffset, X86::ecx, i);
> m_jit.movl_rr(X86::edx, X86::edi);
> m_jit.link(arityCheckOkayForLink, m_jit.label());
>
> @@ -2860,7 +2912,7 @@ void CTI::privateCompileSlowCases()
> if (opcodeID == op_construct) {
> emitCTICall(instruction, i,
Machine::cti_op_construct_JSConstruct);
> emitPutResult(registerOffset -
RegisterFile::CallFrameHeaderSize - argCount);
> - emitGetArg(callee, X86::ecx);
> + emitGetArg(callee, X86::ecx, i);
> }
>
> // Load the callee CodeBlock* into eax
> @@ -2869,7 +2921,7 @@ void CTI::privateCompileSlowCases()
> m_jit.testl_rr(X86::eax, X86::eax);
> X86Assembler::JmpSrc hasCodeBlock = m_jit.emitUnlinkedJne();
> emitCTICall(instruction + i, i,
Machine::cti_op_call_JSFunction);
> - emitGetArg(callee, X86::ecx);
> + emitGetArg(callee, X86::ecx, i);
> m_jit.link(hasCodeBlock, m_jit.label());
>
> // Speculatively roll the callframe, assuming argCount will
match the arity.
> @@ -2881,7 +2933,7 @@ void CTI::privateCompileSlowCases()
> X86Assembler::JmpSrc arityCheckOkay = m_jit.emitUnlinkedJe();
> emitPutArg(X86::eax, 12);
> emitCTICall(instruction + i, i,
Machine::cti_op_call_arityCheck);
> - emitGetArg(callee - registerOffset, X86::ecx);
> + emitGetArg(callee - registerOffset, X86::ecx, i);
> m_jit.movl_rr(X86::edx, X86::edi);
> m_jit.link(arityCheckOkay, m_jit.label());
>
> @@ -3080,7 +3132,7 @@ void CTI::privateCompileGetByIdProto(Str
> X86Assembler::JmpSrc failureCases3 = m_jit.emitUnlinkedJne();
>
> // Checks out okay! - getDirectOffset
> - m_jit.movl_mr(cachedOffset * sizeof(JSValue*), X86::edx, X86::ecx);
> + m_jit.movl_mr(cachedOffset * sizeof(JSValue*), X86::edx, X86::eax);
>
> X86Assembler::JmpSrc success = m_jit.emitUnlinkedJmp();
>
> @@ -3441,6 +3493,7 @@ void CTI::privateCompilePatchGetArrayLen
>
> m_jit.addl_rr(X86::ecx, X86::ecx);
> m_jit.addl_i8r(1, X86::ecx);
> + m_jit.movl_rr(X86::ecx, X86::eax);
> X86Assembler::JmpSrc success = m_jit.emitUnlinkedJmp();
>
> void* code = m_jit.copy();
> Index: VM/CTI.h
> ===================================================================
> --- VM/CTI.h (revision 38367)
> +++ VM/CTI.h (working copy)
> @@ -374,7 +374,7 @@ namespace JSC {
> void compileBinaryArithOp(OpcodeID, unsigned dst, unsigned src1,
unsigned src2, OperandTypes opi, unsigned i);
> void compileBinaryArithOpSlowCase(Instruction*, OpcodeID,
Vector<SlowCaseEntry>::iterator& iter, unsigned dst, unsigned src1, unsigned
src2, OperandTypes opi, unsigned i);
>
> - void emitGetArg(int src, X86Assembler::RegisterID dst);
> + void emitGetArg(int src, X86Assembler::RegisterID dst, unsigned i);
> void emitGetPutArg(unsigned src, unsigned offset,
X86Assembler::RegisterID scratch);
> void emitPutArg(X86Assembler::RegisterID src, unsigned offset);
> void emitPutArgConstant(unsigned value, unsigned offset);
> @@ -430,6 +430,8 @@ namespace JSC {
> void printOpcodeOperandTypes(unsigned src1, unsigned src2);
> #endif
>
> + void invalidatePeepholeOptimizations();
> +
> X86Assembler m_jit;
> Machine* m_machine;
> JSGlobalData* m_globalData;
> @@ -456,9 +458,11 @@ namespace JSC {
> Vector<SlowCaseEntry> m_slowCases;
> Vector<SwitchRecord> m_switches;
>
> + int m_lastResultBytecodeRegister;
> + unsigned m_jumpTargetsPosition;
> +
> // This limit comes from the limit set in PCRE
> static const int MaxPatternSize = (1 << 16);
> -
> };
> }
>
> Index: VM/CodeBlock.h
> ===================================================================
> --- VM/CodeBlock.h (revision 38367)
> +++ VM/CodeBlock.h (working copy)
> @@ -265,6 +265,11 @@ namespace JSC {
> return constantRegisters[index - numVars].getJSValue();
> }
>
> + ALWAYS_INLINE bool isTemporaryRegisterIndex(int index)
> + {
> + return index >= numVars + numConstants;
> + }
> +
> #if !defined(NDEBUG) || ENABLE_OPCODE_SAMPLING
> void dump(ExecState*) const;
> void printStructureIDs(const Instruction*) const;
> @@ -332,6 +337,8 @@ namespace JSC {
> HashMap<void*, unsigned> ctiReturnAddressVPCMap;
> #endif
>
> + Vector<unsigned> jumpTargets;
> +
> EvalCodeCache evalCodeCache;
>
> private:
> Index: bytecompiler/CodeGenerator.cpp
> ===================================================================
> --- bytecompiler/CodeGenerator.cpp (revision 38367)
> +++ bytecompiler/CodeGenerator.cpp (working copy)
> @@ -497,11 +497,22 @@ PassRefPtr<LabelID> CodeGenerator::newLa
>
> PassRefPtr<LabelID> CodeGenerator::emitLabel(LabelID* l0)
> {
> - l0->setLocation(instructions().size());
> -
> + unsigned newLabelIndex = instructions().size();
> + l0->setLocation(newLabelIndex);
> +
> + if (m_codeBlock->jumpTargets.size() != 0) {
> + unsigned lastLabelIndex = m_codeBlock->jumpTargets.last();
> + ASSERT(lastLabelIndex <= newLabelIndex);
> + if (newLabelIndex == lastLabelIndex) {
> + // Peephole optimization has already been disabled by emitting
the last label
> + return l0;
> + }
> + }
> +
> + m_codeBlock->jumpTargets.append(newLabelIndex);
> +
> // This disables peephole optimizations when an instruction is a jump
target
> m_lastOpcodeID = op_end;
> -
> return l0;
> }
>
More information about the webkit-reviews
mailing list