<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" />
<title>[214901] trunk/Source/JavaScriptCore</title>
</head>
<body>
<style type="text/css"><!--
#msg dl.meta { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dl.meta dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer, #logmsg { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; }
#msg dl a { font-weight: bold}
#msg dl a:link { color:#fc3; }
#msg dl a:active { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre { overflow: auto; background: #ffc; border: 1px #fa0 solid; padding: 6px; }
#logmsg { background: #ffc; border: 1px #fa0 solid; padding: 1em 1em 0 1em; }
#logmsg p, #logmsg pre, #logmsg blockquote { margin: 0 0 1em 0; }
#logmsg p, #logmsg li, #logmsg dt, #logmsg dd { line-height: 14pt; }
#logmsg h1, #logmsg h2, #logmsg h3, #logmsg h4, #logmsg h5, #logmsg h6 { margin: .5em 0; }
#logmsg h1:first-child, #logmsg h2:first-child, #logmsg h3:first-child, #logmsg h4:first-child, #logmsg h5:first-child, #logmsg h6:first-child { margin-top: 0; }
#logmsg ul, #logmsg ol { padding: 0; list-style-position: inside; margin: 0 0 0 1em; }
#logmsg ul { text-indent: -1em; padding-left: 1em; }#logmsg ol { text-indent: -1.5em; padding-left: 1.5em; }
#logmsg > ul, #logmsg > ol { margin: 0 0 1em 0; }
#logmsg pre { background: #eee; padding: 1em; }
#logmsg blockquote { border: 1px solid #fa0; border-left-width: 10px; padding: 1em 1em 0 1em; background: white;}
#logmsg dl { margin: 0; }
#logmsg dt { font-weight: bold; }
#logmsg dd { margin: 0; padding: 0 0 0.5em 0; }
#logmsg dd:before { content:'\00bb';}
#logmsg table { border-spacing: 0px; border-collapse: collapse; border-top: 4px solid #fa0; border-bottom: 1px solid #fa0; background: #fff; }
#logmsg table th { text-align: left; font-weight: normal; padding: 0.2em 0.5em; border-top: 1px dotted #fa0; }
#logmsg table td { text-align: right; border-top: 1px dotted #fa0; padding: 0.2em 0.5em; }
#logmsg table thead th { text-align: center; border-bottom: 1px solid #fa0; }
#logmsg table th.Corner { text-align: left; }
#logmsg hr { border: none 0; border-top: 2px dashed #fa0; height: 1px; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<div id="msg">
<dl class="meta">
<dt>Revision</dt> <dd><a href="http://trac.webkit.org/projects/webkit/changeset/214901">214901</a></dd>
<dt>Author</dt> <dd>fpizlo@apple.com</dd>
<dt>Date</dt> <dd>2017-04-04 14:48:41 -0700 (Tue, 04 Apr 2017)</dd>
</dl>
<h3>Log Message</h3>
<pre>Air::lowerAfterRegAlloc should bail early if it finds no Shuffles or ColdCCalls
https://bugs.webkit.org/show_bug.cgi?id=170305
Reviewed by Saam Barati.
This reduces and sometimes completely eliminates the need to run lowerAfterRegAlloc().
This lowers the Shuffle for the arguments of a CCall before register allocation unless
the CCall arguments require a real shuffle (like if the CCall arguments were argument
registers). This lowers a ColdCCall like a CCall for optLevel<2.
Finally, lowerAfterRegAlloc() now checks if there are any Shuffles or CCalls before it
does anything else. For wasm at -O1, this means that the phase doesn't run at all. This
is a ~3% wasm -O1 compile time progression.
To make this easy, I changed optLevel into a property of Procedure and Code rather than
an argument we thread through everything. I like how Procedure and Code are dumping
ground classes. This does not bother me. Note that I cloned optLevel into Procedure and
Code so that it's cheap to query inside Air phases.
* b3/B3Compile.cpp:
(JSC::B3::compile):
* b3/B3Compile.h:
* b3/B3Generate.cpp:
(JSC::B3::prepareForGeneration):
(JSC::B3::generateToAir):
* b3/B3Generate.h:
* b3/B3Procedure.cpp:
(JSC::B3::Procedure::setOptLevel):
* b3/B3Procedure.h:
(JSC::B3::Procedure::optLevel):
* b3/air/AirCode.h:
(JSC::B3::Air::Code::isPinned):
(JSC::B3::Air::Code::setOptLevel):
(JSC::B3::Air::Code::optLevel):
* b3/air/AirEmitShuffle.cpp:
(JSC::B3::Air::ShufflePair::bank):
(JSC::B3::Air::ShufflePair::opcode):
(JSC::B3::Air::ShufflePair::inst):
(JSC::B3::Air::emitShuffle):
* b3/air/AirEmitShuffle.h:
(JSC::B3::Air::moveFor):
* b3/air/AirGenerate.cpp:
(JSC::B3::Air::prepareForGeneration):
* b3/air/AirGenerate.h:
* b3/air/AirLowerAfterRegAlloc.cpp:
(JSC::B3::Air::lowerAfterRegAlloc):
* b3/air/AirLowerMacros.cpp:
(JSC::B3::Air::lowerMacros):
* b3/testb3.cpp:
(JSC::B3::compileProc):
* wasm/WasmB3IRGenerator.cpp:
(JSC::Wasm::parseAndCompile):</pre>
<h3>Modified Paths</h3>
<ul>
<li><a href="#trunkSourceJavaScriptCoreChangeLog">trunk/Source/JavaScriptCore/ChangeLog</a></li>
<li><a href="#trunkSourceJavaScriptCoreb3B3Compilecpp">trunk/Source/JavaScriptCore/b3/B3Compile.cpp</a></li>
<li><a href="#trunkSourceJavaScriptCoreb3B3Compileh">trunk/Source/JavaScriptCore/b3/B3Compile.h</a></li>
<li><a href="#trunkSourceJavaScriptCoreb3B3Generatecpp">trunk/Source/JavaScriptCore/b3/B3Generate.cpp</a></li>
<li><a href="#trunkSourceJavaScriptCoreb3B3Generateh">trunk/Source/JavaScriptCore/b3/B3Generate.h</a></li>
<li><a href="#trunkSourceJavaScriptCoreb3B3Procedurecpp">trunk/Source/JavaScriptCore/b3/B3Procedure.cpp</a></li>
<li><a href="#trunkSourceJavaScriptCoreb3B3Procedureh">trunk/Source/JavaScriptCore/b3/B3Procedure.h</a></li>
<li><a href="#trunkSourceJavaScriptCoreb3airAirCodeh">trunk/Source/JavaScriptCore/b3/air/AirCode.h</a></li>
<li><a href="#trunkSourceJavaScriptCoreb3airAirEmitShufflecpp">trunk/Source/JavaScriptCore/b3/air/AirEmitShuffle.cpp</a></li>
<li><a href="#trunkSourceJavaScriptCoreb3airAirEmitShuffleh">trunk/Source/JavaScriptCore/b3/air/AirEmitShuffle.h</a></li>
<li><a href="#trunkSourceJavaScriptCoreb3airAirGeneratecpp">trunk/Source/JavaScriptCore/b3/air/AirGenerate.cpp</a></li>
<li><a href="#trunkSourceJavaScriptCoreb3airAirGenerateh">trunk/Source/JavaScriptCore/b3/air/AirGenerate.h</a></li>
<li><a href="#trunkSourceJavaScriptCoreb3airAirLowerAfterRegAlloccpp">trunk/Source/JavaScriptCore/b3/air/AirLowerAfterRegAlloc.cpp</a></li>
<li><a href="#trunkSourceJavaScriptCoreb3airAirLowerMacroscpp">trunk/Source/JavaScriptCore/b3/air/AirLowerMacros.cpp</a></li>
<li><a href="#trunkSourceJavaScriptCoreb3testb3cpp">trunk/Source/JavaScriptCore/b3/testb3.cpp</a></li>
<li><a href="#trunkSourceJavaScriptCorewasmWasmB3IRGeneratorcpp">trunk/Source/JavaScriptCore/wasm/WasmB3IRGenerator.cpp</a></li>
</ul>
</div>
<div id="patch">
<h3>Diff</h3>
<a id="trunkSourceJavaScriptCoreChangeLog"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/ChangeLog (214900 => 214901)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/ChangeLog        2017-04-04 21:32:15 UTC (rev 214900)
+++ trunk/Source/JavaScriptCore/ChangeLog        2017-04-04 21:48:41 UTC (rev 214901)
</span><span class="lines">@@ -1,5 +1,61 @@
</span><span class="cx"> 2017-04-04 Filip Pizlo <fpizlo@apple.com>
</span><span class="cx">
</span><ins>+ Air::lowerAfterRegAlloc should bail early if it finds no Shuffles or ColdCCalls
+ https://bugs.webkit.org/show_bug.cgi?id=170305
+
+ Reviewed by Saam Barati.
+
+ This reduces and sometimes completely eliminates the need to run lowerAfterRegAlloc().
+
+ This lowers the Shuffle for the arguments of a CCall before register allocation unless
+ the CCall arguments require a real shuffle (like if the CCall arguments were argument
+ registers). This lowers a ColdCCall like a CCall for optLevel<2.
+
+ Finally, lowerAfterRegAlloc() now checks if there are any Shuffles or CCalls before it
+ does anything else. For wasm at -O1, this means that the phase doesn't run at all. This
+ is a ~3% wasm -O1 compile time progression.
+
+ To make this easy, I changed optLevel into a property of Procedure and Code rather than
+ an argument we thread through everything. I like how Procedure and Code are dumping
+ ground classes. This does not bother me. Note that I cloned optLevel into Procedure and
+ Code so that it's cheap to query inside Air phases.
+
+ * b3/B3Compile.cpp:
+ (JSC::B3::compile):
+ * b3/B3Compile.h:
+ * b3/B3Generate.cpp:
+ (JSC::B3::prepareForGeneration):
+ (JSC::B3::generateToAir):
+ * b3/B3Generate.h:
+ * b3/B3Procedure.cpp:
+ (JSC::B3::Procedure::setOptLevel):
+ * b3/B3Procedure.h:
+ (JSC::B3::Procedure::optLevel):
+ * b3/air/AirCode.h:
+ (JSC::B3::Air::Code::isPinned):
+ (JSC::B3::Air::Code::setOptLevel):
+ (JSC::B3::Air::Code::optLevel):
+ * b3/air/AirEmitShuffle.cpp:
+ (JSC::B3::Air::ShufflePair::bank):
+ (JSC::B3::Air::ShufflePair::opcode):
+ (JSC::B3::Air::ShufflePair::inst):
+ (JSC::B3::Air::emitShuffle):
+ * b3/air/AirEmitShuffle.h:
+ (JSC::B3::Air::moveFor):
+ * b3/air/AirGenerate.cpp:
+ (JSC::B3::Air::prepareForGeneration):
+ * b3/air/AirGenerate.h:
+ * b3/air/AirLowerAfterRegAlloc.cpp:
+ (JSC::B3::Air::lowerAfterRegAlloc):
+ * b3/air/AirLowerMacros.cpp:
+ (JSC::B3::Air::lowerMacros):
+ * b3/testb3.cpp:
+ (JSC::B3::compileProc):
+ * wasm/WasmB3IRGenerator.cpp:
+ (JSC::Wasm::parseAndCompile):
+
+2017-04-04 Filip Pizlo <fpizlo@apple.com>
+
</ins><span class="cx"> Don't need to Air::reportUsedRegisters for wasm at -O1
</span><span class="cx"> https://bugs.webkit.org/show_bug.cgi?id=170459
</span><span class="cx">
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreb3B3Compilecpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/b3/B3Compile.cpp (214900 => 214901)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/b3/B3Compile.cpp        2017-04-04 21:32:15 UTC (rev 214900)
+++ trunk/Source/JavaScriptCore/b3/B3Compile.cpp        2017-04-04 21:48:41 UTC (rev 214901)
</span><span class="lines">@@ -38,11 +38,11 @@
</span><span class="cx">
</span><span class="cx"> namespace JSC { namespace B3 {
</span><span class="cx">
</span><del>-Compilation compile(Procedure& proc, unsigned optLevel)
</del><ins>+Compilation compile(Procedure& proc)
</ins><span class="cx"> {
</span><span class="cx"> TimingScope timingScope("Compilation");
</span><span class="cx">
</span><del>- prepareForGeneration(proc, optLevel);
</del><ins>+ prepareForGeneration(proc);
</ins><span class="cx">
</span><span class="cx"> CCallHelpers jit;
</span><span class="cx"> generate(proc, jit);
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreb3B3Compileh"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/b3/B3Compile.h (214900 => 214901)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/b3/B3Compile.h        2017-04-04 21:32:15 UTC (rev 214900)
+++ trunk/Source/JavaScriptCore/b3/B3Compile.h        2017-04-04 21:48:41 UTC (rev 214901)
</span><span class="lines">@@ -46,7 +46,7 @@
</span><span class="cx"> // Then you keep the Compilation object alive for as long as you want to be able to run the code.
</span><span class="cx"> // If this API feels too high-level, you can use B3::generate() directly.
</span><span class="cx">
</span><del>-JS_EXPORT_PRIVATE Compilation compile(Procedure&, unsigned optLevel = defaultOptLevel());
</del><ins>+JS_EXPORT_PRIVATE Compilation compile(Procedure&);
</ins><span class="cx">
</span><span class="cx"> } } // namespace JSC::B3
</span><span class="cx">
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreb3B3Generatecpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/b3/B3Generate.cpp (214900 => 214901)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/b3/B3Generate.cpp        2017-04-04 21:32:15 UTC (rev 214900)
+++ trunk/Source/JavaScriptCore/b3/B3Generate.cpp        2017-04-04 21:48:41 UTC (rev 214901)
</span><span class="lines">@@ -52,12 +52,12 @@
</span><span class="cx">
</span><span class="cx"> namespace JSC { namespace B3 {
</span><span class="cx">
</span><del>-void prepareForGeneration(Procedure& procedure, unsigned optLevel)
</del><ins>+void prepareForGeneration(Procedure& procedure)
</ins><span class="cx"> {
</span><span class="cx"> TimingScope timingScope("prepareForGeneration");
</span><span class="cx">
</span><del>- generateToAir(procedure, optLevel);
- Air::prepareForGeneration(procedure.code(), optLevel);
</del><ins>+ generateToAir(procedure);
+ Air::prepareForGeneration(procedure.code());
</ins><span class="cx"> }
</span><span class="cx">
</span><span class="cx"> void generate(Procedure& procedure, CCallHelpers& jit)
</span><span class="lines">@@ -65,7 +65,7 @@
</span><span class="cx"> Air::generate(procedure.code(), jit);
</span><span class="cx"> }
</span><span class="cx">
</span><del>-void generateToAir(Procedure& procedure, unsigned optLevel)
</del><ins>+void generateToAir(Procedure& procedure)
</ins><span class="cx"> {
</span><span class="cx"> TimingScope timingScope("generateToAir");
</span><span class="cx">
</span><span class="lines">@@ -80,7 +80,7 @@
</span><span class="cx"> if (shouldValidateIR())
</span><span class="cx"> validate(procedure);
</span><span class="cx">
</span><del>- if (optLevel >= 2) {
</del><ins>+ if (procedure.optLevel() >= 2) {
</ins><span class="cx"> reduceDoubleToFloat(procedure);
</span><span class="cx"> reduceStrength(procedure);
</span><span class="cx"> eliminateCommonSubexpressions(procedure);
</span><span class="lines">@@ -91,7 +91,7 @@
</span><span class="cx">
</span><span class="cx"> // FIXME: Add more optimizations here.
</span><span class="cx"> // https://bugs.webkit.org/show_bug.cgi?id=150507
</span><del>- } else if (optLevel >= 1) {
</del><ins>+ } else if (procedure.optLevel() >= 1) {
</ins><span class="cx"> // FIXME: Explore better "quick mode" optimizations.
</span><span class="cx"> reduceStrength(procedure);
</span><span class="cx"> }
</span><span class="lines">@@ -99,7 +99,7 @@
</span><span class="cx"> // This puts the IR in quirks mode.
</span><span class="cx"> lowerMacros(procedure);
</span><span class="cx">
</span><del>- if (optLevel >= 2) {
</del><ins>+ if (procedure.optLevel() >= 2) {
</ins><span class="cx"> reduceStrength(procedure);
</span><span class="cx">
</span><span class="cx"> // FIXME: Add more optimizations here.
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreb3B3Generateh"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/b3/B3Generate.h (214900 => 214901)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/b3/B3Generate.h        2017-04-04 21:32:15 UTC (rev 214900)
+++ trunk/Source/JavaScriptCore/b3/B3Generate.h        2017-04-04 21:48:41 UTC (rev 214901)
</span><span class="lines">@@ -40,7 +40,7 @@
</span><span class="cx">
</span><span class="cx"> // This takes a B3::Procedure, optimizes it in-place, lowers it to Air, and prepares the Air for
</span><span class="cx"> // generation.
</span><del>-JS_EXPORT_PRIVATE void prepareForGeneration(Procedure&, unsigned optLevel = defaultOptLevel());
</del><ins>+JS_EXPORT_PRIVATE void prepareForGeneration(Procedure&);
</ins><span class="cx">
</span><span class="cx"> // This takes a B3::Procedure that has been prepared for generation (i.e. it has been lowered to Air and
</span><span class="cx"> // the Air has been prepared for generation) and generates it. This is the equivalent of calling
</span><span class="lines">@@ -50,7 +50,7 @@
</span><span class="cx"> // This takes a B3::Procedure, optimizes it in-place, and lowers it to Air. You can then generate
</span><span class="cx"> // the Air to machine code using Air::prepareForGeneration() and Air::generate() on the Procedure's
</span><span class="cx"> // code().
</span><del>-void generateToAir(Procedure&, unsigned optLevel = defaultOptLevel());
</del><ins>+void generateToAir(Procedure&);
</ins><span class="cx">
</span><span class="cx"> } } // namespace JSC::B3
</span><span class="cx">
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreb3B3Procedurecpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/b3/B3Procedure.cpp (214900 => 214901)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/b3/B3Procedure.cpp        2017-04-04 21:32:15 UTC (rev 214900)
+++ trunk/Source/JavaScriptCore/b3/B3Procedure.cpp        2017-04-04 21:48:41 UTC (rev 214901)
</span><span class="lines">@@ -334,6 +334,12 @@
</span><span class="cx"> code().pinRegister(reg);
</span><span class="cx"> }
</span><span class="cx">
</span><ins>+void Procedure::setOptLevel(unsigned optLevel)
+{
+ m_optLevel = optLevel;
+ code().setOptLevel(optLevel);
+}
+
</ins><span class="cx"> unsigned Procedure::frameSize() const
</span><span class="cx"> {
</span><span class="cx"> return code().frameSize();
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreb3B3Procedureh"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/b3/B3Procedure.h (214900 => 214901)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/b3/B3Procedure.h        2017-04-04 21:32:15 UTC (rev 214900)
+++ trunk/Source/JavaScriptCore/b3/B3Procedure.h        2017-04-04 21:48:41 UTC (rev 214901)
</span><span class="lines">@@ -233,6 +233,9 @@
</span><span class="cx"> // This tells the register allocators to stay away from this register.
</span><span class="cx"> JS_EXPORT_PRIVATE void pinRegister(Reg);
</span><span class="cx">
</span><ins>+ JS_EXPORT_PRIVATE void setOptLevel(unsigned value);
+ unsigned optLevel() const { return m_optLevel; }
+
</ins><span class="cx"> // You can turn off used registers calculation. This may speed up compilation a bit. But if
</span><span class="cx"> // you turn it off then you cannot use StackmapGenerationParams::usedRegisters() or
</span><span class="cx"> // StackmapGenerationParams::unavailableRegisters().
</span><span class="lines">@@ -273,6 +276,7 @@
</span><span class="cx"> RefPtr<SharedTask<void(PrintStream&, Origin)>> m_originPrinter;
</span><span class="cx"> const void* m_frontendData;
</span><span class="cx"> PCToOriginMap m_pcToOriginMap;
</span><ins>+ unsigned m_optLevel { defaultOptLevel() };
</ins><span class="cx"> bool m_needsUsedRegisters { true };
</span><span class="cx"> bool m_hasQuirks { false };
</span><span class="cx"> };
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreb3airAirCodeh"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/b3/air/AirCode.h (214900 => 214901)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/b3/air/AirCode.h        2017-04-04 21:32:15 UTC (rev 214900)
+++ trunk/Source/JavaScriptCore/b3/air/AirCode.h        2017-04-04 21:48:41 UTC (rev 214901)
</span><span class="lines">@@ -88,9 +88,11 @@
</span><span class="cx"> const RegisterSet& mutableRegs() const { return m_mutableRegs; }
</span><span class="cx">
</span><span class="cx"> bool isPinned(Reg reg) const { return !mutableRegs().get(reg); }
</span><del>-
</del><span class="cx"> void pinRegister(Reg);
</span><span class="cx">
</span><ins>+ void setOptLevel(unsigned optLevel) { m_optLevel = optLevel; }
+ unsigned optLevel() const { return m_optLevel; }
+
</ins><span class="cx"> bool needsUsedRegisters() const;
</span><span class="cx">
</span><span class="cx"> JS_EXPORT_PRIVATE BasicBlock* addBlock(double frequency = 1);
</span><span class="lines">@@ -322,6 +324,7 @@
</span><span class="cx"> RefPtr<WasmBoundsCheckGenerator> m_wasmBoundsCheckGenerator;
</span><span class="cx"> const char* m_lastPhaseName;
</span><span class="cx"> std::unique_ptr<Disassembler> m_disassembler;
</span><ins>+ unsigned m_optLevel { defaultOptLevel() };
</ins><span class="cx"> };
</span><span class="cx">
</span><span class="cx"> } } } // namespace JSC::B3::Air
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreb3airAirEmitShufflecpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/b3/air/AirEmitShuffle.cpp (214900 => 214901)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/b3/air/AirEmitShuffle.cpp        2017-04-04 21:32:15 UTC (rev 214900)
+++ trunk/Source/JavaScriptCore/b3/air/AirEmitShuffle.cpp        2017-04-04 21:48:41 UTC (rev 214901)
</span><span class="lines">@@ -65,6 +65,36 @@
</span><span class="cx">
</span><span class="cx"> } // anonymous namespace
</span><span class="cx">
</span><ins>+Bank ShufflePair::bank() const
+{
+ if (src().isMemory() && dst().isMemory() && width() > pointerWidth()) {
+ // 8-byte memory-to-memory moves on a 32-bit platform are best handled as float moves.
+ return FP;
+ }
+
+ if (src().isGP() && dst().isGP()) {
+ // This means that gpPairs gets memory-to-memory shuffles. The assumption is that we
+ // can do that more efficiently using GPRs, except in the special case above.
+ return GP;
+ }
+
+ return FP;
+}
+
+Opcode ShufflePair::opcode() const
+{
+ return moveFor(bank(), width());
+}
+
+Inst ShufflePair::inst(Code* code, Value* origin) const
+{
+ if (UNLIKELY(src().isMemory() && dst().isMemory())) {
+ RELEASE_ASSERT(code);
+ return Inst(opcode(), origin, src(), dst(), code->newTmp(bank()));
+ }
+ return Inst(opcode(), origin, src(), dst());
+}
+
</ins><span class="cx"> void ShufflePair::dump(PrintStream& out) const
</span><span class="cx"> {
</span><span class="cx"> out.print(width(), ":", src(), "=>", dst());
</span><span class="lines">@@ -261,14 +291,7 @@
</span><span class="cx"> // ends with a register. We search for such a register right now.
</span><span class="cx">
</span><span class="cx"> auto moveForWidth = [&] (Width width) -> Opcode {
</span><del>- switch (width) {
- case Width32:
- return bank == GP ? Move32 : MoveFloat;
- case Width64:
- return bank == GP ? Move : MoveDouble;
- default:
- RELEASE_ASSERT_NOT_REACHED();
- }
</del><ins>+ return moveFor(bank, width);
</ins><span class="cx"> };
</span><span class="cx">
</span><span class="cx"> Opcode conservativeMove = moveForWidth(conservativeWidth(bank));
</span><span class="lines">@@ -520,15 +543,14 @@
</span><span class="cx"> Vector<ShufflePair> gpPairs;
</span><span class="cx"> Vector<ShufflePair> fpPairs;
</span><span class="cx"> for (const ShufflePair& pair : pairs) {
</span><del>- if (pair.src().isMemory() && pair.dst().isMemory() && pair.width() > pointerWidth()) {
- // 8-byte memory-to-memory moves on a 32-bit platform are best handled as float moves.
- fpPairs.append(pair);
- } else if (pair.src().isGP() && pair.dst().isGP()) {
- // This means that gpPairs gets memory-to-memory shuffles. The assumption is that we
- // can do that more efficiently using GPRs, except in the special case above.
</del><ins>+ switch (pair.bank()) {
+ case GP:
</ins><span class="cx"> gpPairs.append(pair);
</span><del>- } else
</del><ins>+ break;
+ case FP:
</ins><span class="cx"> fpPairs.append(pair);
</span><ins>+ break;
+ }
</ins><span class="cx"> }
</span><span class="cx">
</span><span class="cx"> Vector<Inst> result;
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreb3airAirEmitShuffleh"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/b3/air/AirEmitShuffle.h (214900 => 214901)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/b3/air/AirEmitShuffle.h        2017-04-04 21:32:15 UTC (rev 214900)
+++ trunk/Source/JavaScriptCore/b3/air/AirEmitShuffle.h        2017-04-04 21:48:41 UTC (rev 214901)
</span><span class="lines">@@ -39,6 +39,19 @@
</span><span class="cx">
</span><span class="cx"> class Code;
</span><span class="cx">
</span><ins>+inline Opcode moveFor(Bank bank, Width width)
+{
+ switch (width) {
+ case Width32:
+ return bank == GP ? Move32 : MoveFloat;
+ case Width64:
+ return bank == GP ? Move : MoveDouble;
+ default:
+ RELEASE_ASSERT_NOT_REACHED();
+ return Oops;
+ }
+}
+
</ins><span class="cx"> class ShufflePair {
</span><span class="cx"> public:
</span><span class="cx"> ShufflePair()
</span><span class="lines">@@ -58,6 +71,14 @@
</span><span class="cx"> // The width determines the kind of move we do. You can only choose Width32 or Width64 right now.
</span><span class="cx"> // For GP, it picks between Move32 and Move. For FP, it picks between MoveFloat and MoveDouble.
</span><span class="cx"> Width width() const { return m_width; }
</span><ins>+
+ Bank bank() const;
+ Opcode opcode() const;
+
+ // Creates an instruction for the move represented by this shuffle pair. You need to pass
+ // Code if this is a memory->memory pair. You can pass null if you know that it's not. In
+ // fact, passing null is a good way to assert that this is not a memory->memory pair.
+ Inst inst(Code*, Value* origin) const;
</ins><span class="cx">
</span><span class="cx"> void dump(PrintStream&) const;
</span><span class="cx">
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreb3airAirGeneratecpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/b3/air/AirGenerate.cpp (214900 => 214901)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/b3/air/AirGenerate.cpp        2017-04-04 21:32:15 UTC (rev 214900)
+++ trunk/Source/JavaScriptCore/b3/air/AirGenerate.cpp        2017-04-04 21:48:41 UTC (rev 214901)
</span><span class="lines">@@ -58,7 +58,7 @@
</span><span class="cx">
</span><span class="cx"> namespace JSC { namespace B3 { namespace Air {
</span><span class="cx">
</span><del>-void prepareForGeneration(Code& code, unsigned optLevel)
</del><ins>+void prepareForGeneration(Code& code)
</ins><span class="cx"> {
</span><span class="cx"> TimingScope timingScope("Air::prepareForGeneration");
</span><span class="cx">
</span><span class="lines">@@ -90,7 +90,7 @@
</span><span class="cx"> // For debugging, you can use spillEverything() to put everything to the stack between each Inst.
</span><span class="cx"> if (Options::airSpillsEverything())
</span><span class="cx"> spillEverything(code);
</span><del>- else if (optLevel >= 2)
</del><ins>+ else if (code.optLevel() >= 2)
</ins><span class="cx"> allocateRegistersByGraphColoring(code);
</span><span class="cx"> else
</span><span class="cx"> allocateRegistersByLinearScan(code);
</span><span class="lines">@@ -100,7 +100,7 @@
</span><span class="cx"> logRegisterPressure(code);
</span><span class="cx"> }
</span><span class="cx">
</span><del>- if (optLevel >= 2) {
</del><ins>+ if (code.optLevel() >= 2) {
</ins><span class="cx"> // This replaces uses of spill slots with registers or constants if possible. It does this by
</span><span class="cx"> // minimizing the amount that we perturb the already-chosen register allocation. It may extend
</span><span class="cx"> // the live ranges of registers though.
</span><span class="lines">@@ -124,7 +124,7 @@
</span><span class="cx">
</span><span class="cx"> // This is needed to satisfy a requirement of B3::StackmapValue. This also removes dead
</span><span class="cx"> // code. We can avoid running this when certain optimizations are disabled.
</span><del>- if (optLevel >= 2 || code.needsUsedRegisters())
</del><ins>+ if (code.optLevel() >= 2 || code.needsUsedRegisters())
</ins><span class="cx"> reportUsedRegisters(code);
</span><span class="cx">
</span><span class="cx"> // Attempt to remove false dependencies between instructions created by partial register changes.
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreb3airAirGenerateh"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/b3/air/AirGenerate.h (214900 => 214901)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/b3/air/AirGenerate.h        2017-04-04 21:32:15 UTC (rev 214900)
+++ trunk/Source/JavaScriptCore/b3/air/AirGenerate.h        2017-04-04 21:48:41 UTC (rev 214901)
</span><span class="lines">@@ -39,7 +39,7 @@
</span><span class="cx">
</span><span class="cx"> // This takes an Air::Code that hasn't had any stack allocation and optionally hasn't had any
</span><span class="cx"> // register allocation and does both of those things.
</span><del>-JS_EXPORT_PRIVATE void prepareForGeneration(Code&, unsigned optLevel = defaultOptLevel());
</del><ins>+JS_EXPORT_PRIVATE void prepareForGeneration(Code&);
</ins><span class="cx">
</span><span class="cx"> // This generates the code using the given CCallHelpers instance. Note that this may call callbacks
</span><span class="cx"> // in the supplied code as it is generating.
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreb3airAirLowerAfterRegAlloccpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/b3/air/AirLowerAfterRegAlloc.cpp (214900 => 214901)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/b3/air/AirLowerAfterRegAlloc.cpp        2017-04-04 21:32:15 UTC (rev 214900)
+++ trunk/Source/JavaScriptCore/b3/air/AirLowerAfterRegAlloc.cpp        2017-04-04 21:48:41 UTC (rev 214901)
</span><span class="lines">@@ -56,13 +56,23 @@
</span><span class="cx"> if (verbose)
</span><span class="cx"> dataLog("Code before lowerAfterRegAlloc:\n", code);
</span><span class="cx">
</span><del>- // FIXME:
- // 1) This should bail early if there are no Shuffles or ColdCCalls.
- // https://bugs.webkit.org/show_bug.cgi?id=170305
- // 2) We should not introduce Shuffles for normal calls.
- // https://bugs.webkit.org/show_bug.cgi?id=170306
- // 3) We should emit ColdCCall only at optLevel==1.
- // https://bugs.webkit.org/show_bug.cgi?id=170307
</del><ins>+ auto isRelevant = [] (Inst& inst) -> bool {
+ return inst.kind.opcode == Shuffle || inst.kind.opcode == ColdCCall;
+ };
+
+ bool haveAnyRelevant = false;
+ for (BasicBlock* block : code) {
+ for (Inst& inst : *block) {
+ if (isRelevant(inst)) {
+ haveAnyRelevant = true;
+ break;
+ }
+ }
+ if (haveAnyRelevant)
+ break;
+ }
+ if (!haveAnyRelevant)
+ return;
</ins><span class="cx">
</span><span class="cx"> HashMap<Inst*, RegisterSet> usedRegisters;
</span><span class="cx">
</span><span class="lines">@@ -75,9 +85,7 @@
</span><span class="cx">
</span><span class="cx"> RegisterSet set;
</span><span class="cx">
</span><del>- bool isRelevant = inst.kind.opcode == Shuffle || inst.kind.opcode == ColdCCall;
-
- if (isRelevant) {
</del><ins>+ if (isRelevant(inst)) {
</ins><span class="cx"> for (Reg reg : localCalc.live())
</span><span class="cx"> set.set(reg);
</span><span class="cx"> }
</span><span class="lines">@@ -84,7 +92,7 @@
</span><span class="cx">
</span><span class="cx"> localCalc.execute(instIndex);
</span><span class="cx">
</span><del>- if (isRelevant)
</del><ins>+ if (isRelevant(inst))
</ins><span class="cx"> usedRegisters.add(&inst, set);
</span><span class="cx"> }
</span><span class="cx"> }
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreb3airAirLowerMacroscpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/b3/air/AirLowerMacros.cpp (214900 => 214901)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/b3/air/AirLowerMacros.cpp        2017-04-04 21:32:15 UTC (rev 214900)
+++ trunk/Source/JavaScriptCore/b3/air/AirLowerMacros.cpp        2017-04-04 21:48:41 UTC (rev 214901)
</span><span class="lines">@@ -30,6 +30,7 @@
</span><span class="cx">
</span><span class="cx"> #include "AirCCallingConvention.h"
</span><span class="cx"> #include "AirCode.h"
</span><ins>+#include "AirEmitShuffle.h"
</ins><span class="cx"> #include "AirInsertionSet.h"
</span><span class="cx"> #include "AirInstInlines.h"
</span><span class="cx"> #include "AirPhaseScope.h"
</span><span class="lines">@@ -46,23 +47,46 @@
</span><span class="cx"> for (BasicBlock* block : code) {
</span><span class="cx"> for (unsigned instIndex = 0; instIndex < block->size(); ++instIndex) {
</span><span class="cx"> Inst& inst = block->at(instIndex);
</span><del>-
- switch (inst.kind.opcode) {
- case CCall: {
</del><ins>+
+ auto handleCall = [&] () {
</ins><span class="cx"> CCallValue* value = inst.origin->as<CCallValue>();
</span><span class="cx"> Kind oldKind = inst.kind;
</span><span class="cx">
</span><span class="cx"> Vector<Arg> destinations = computeCCallingConvention(code, value);
</span><del>-
- Inst shuffleArguments(Shuffle, value);
</del><ins>+
</ins><span class="cx"> unsigned offset = value->type() == Void ? 0 : 1;
</span><ins>+ Vector<ShufflePair, 16> shufflePairs;
+ bool hasRegisterSource = false;
</ins><span class="cx"> for (unsigned i = 1; i < destinations.size(); ++i) {
</span><span class="cx"> Value* child = value->child(i);
</span><del>- shuffleArguments.args.append(inst.args[offset + i]);
- shuffleArguments.args.append(destinations[i]);
- shuffleArguments.args.append(Arg::widthArg(widthForType(child->type())));
</del><ins>+ ShufflePair pair(inst.args[offset + i], destinations[i], widthForType(child->type()));
+ shufflePairs.append(pair);
+ hasRegisterSource |= pair.src().isReg();
</ins><span class="cx"> }
</span><del>- insertionSet.insertInst(instIndex, WTFMove(shuffleArguments));
</del><ins>+
+ if (UNLIKELY(hasRegisterSource))
+ insertionSet.insertInst(instIndex, createShuffle(inst.origin, Vector<ShufflePair>(shufflePairs)));
+ else {
+ // If none of the inputs are registers, then we can efficiently lower this
+ // shuffle before register allocation. First we lower all of the moves to
+ // memory, in the hopes that this is the last use of the operands. This
+ // avoids creating interference between argument registers and arguments
+ // that don't go into argument registers.
+ for (ShufflePair& pair : shufflePairs) {
+ if (pair.dst().isMemory())
+ insertionSet.insertInst(instIndex, pair.inst(&code, inst.origin));
+ }
+
+ // Fill the argument registers by starting with the first one. This avoids
+ // creating interference between things passed to low-numbered argument
+ // registers and high-numbered argument registers. The assumption here is
+ // that lower-numbered argument registers are more likely to be
+ // incidentally clobbered.
+ for (ShufflePair& pair : shufflePairs) {
+ if (!pair.dst().isMemory())
+ insertionSet.insertInst(instIndex, pair.inst(nullptr, inst.origin));
+ }
+ }
</ins><span class="cx">
</span><span class="cx"> // Indicate that we're using our original callee argument.
</span><span class="cx"> destinations[0] = inst.args[0];
</span><span class="lines">@@ -91,8 +115,17 @@
</span><span class="cx"> insertionSet.insert(instIndex + 1, Move, value, result, resultDst);
</span><span class="cx"> break;
</span><span class="cx"> }
</span><ins>+ };
+
+ switch (inst.kind.opcode) {
+ case ColdCCall:
+ if (code.optLevel() < 2)
+ handleCall();
</ins><span class="cx"> break;
</span><del>- }
</del><ins>+
+ case CCall:
+ handleCall();
+ break;
</ins><span class="cx">
</span><span class="cx"> default:
</span><span class="cx"> break;
</span></span></pre></div>
<a id="trunkSourceJavaScriptCoreb3testb3cpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/b3/testb3.cpp (214900 => 214901)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/b3/testb3.cpp        2017-04-04 21:32:15 UTC (rev 214900)
+++ trunk/Source/JavaScriptCore/b3/testb3.cpp        2017-04-04 21:48:41 UTC (rev 214901)
</span><span class="lines">@@ -119,7 +119,8 @@
</span><span class="cx">
</span><span class="cx"> std::unique_ptr<Compilation> compileProc(Procedure& procedure, unsigned optLevel = defaultOptLevel())
</span><span class="cx"> {
</span><del>- return std::make_unique<Compilation>(B3::compile(procedure, optLevel));
</del><ins>+ procedure.setOptLevel(optLevel);
+ return std::make_unique<Compilation>(B3::compile(procedure));
</ins><span class="cx"> }
</span><span class="cx">
</span><span class="cx"> template<typename T, typename... Arguments>
</span></span></pre></div>
<a id="trunkSourceJavaScriptCorewasmWasmB3IRGeneratorcpp"></a>
<div class="modfile"><h4>Modified: trunk/Source/JavaScriptCore/wasm/WasmB3IRGenerator.cpp (214900 => 214901)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/Source/JavaScriptCore/wasm/WasmB3IRGenerator.cpp        2017-04-04 21:32:15 UTC (rev 214900)
+++ trunk/Source/JavaScriptCore/wasm/WasmB3IRGenerator.cpp        2017-04-04 21:48:41 UTC (rev 214901)
</span><span class="lines">@@ -1288,6 +1288,8 @@
</span><span class="cx"> // don't strictly need to run Air::reportUsedRegisters(), which saves a bit of CPU time at
</span><span class="cx"> // optLevel=1.
</span><span class="cx"> procedure.setNeedsUsedRegisters(false);
</span><ins>+
+ procedure.setOptLevel(optLevel);
</ins><span class="cx">
</span><span class="cx"> B3IRGenerator context(info, procedure, result.get(), unlinkedWasmToWasmCalls, mode);
</span><span class="cx"> FunctionParser<B3IRGenerator> parser(context, functionStart, functionLength, signature, info, moduleSignatureIndicesToUniquedSignatureIndices);
</span><span class="lines">@@ -1304,7 +1306,7 @@
</span><span class="cx"> dataLogIf(verbose, "Post SSA: ", procedure);
</span><span class="cx">
</span><span class="cx"> {
</span><del>- B3::prepareForGeneration(procedure, optLevel);
</del><ins>+ B3::prepareForGeneration(procedure);
</ins><span class="cx"> B3::generate(procedure, *compilationContext.wasmEntrypointJIT);
</span><span class="cx"> compilationContext.wasmEntrypointByproducts = procedure.releaseByproducts();
</span><span class="cx"> result->wasmEntrypoint.calleeSaveRegisters = procedure.calleeSaveRegisters();
</span></span></pre>
</div>
</div>
</body>
</html>